1+ #include "Python.h"
2+ #include "../Parser/tokenizer.h"
3+
4+ static struct PyModuleDef _tokenizemodule ;
5+
6+ typedef struct
7+ {
8+ PyTypeObject * TokenizerIter ;
9+ } tokenize_state ;
10+
11+ static tokenize_state *
12+ get_tokenize_state (PyObject * module )
13+ {
14+ return (tokenize_state * )PyModule_GetState (module );
15+ }
16+
17+ #define _tokenize_get_state_by_type (type ) \
18+ get_tokenize_state(_PyType_GetModuleByDef(type, &_tokenizemodule))
19+
20+ #include "clinic/Python-tokenize.c.h"
21+
22+ /*[clinic input]
23+ module _tokenizer
24+ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
25+ [clinic start generated code]*/
26+ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
27+
28+ typedef struct
29+ {
30+ PyObject_HEAD struct tok_state * tok ;
31+ } tokenizeriterobject ;
32+
33+ /*[clinic input]
34+ @classmethod
35+ _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
36+
37+ source: str
38+ [clinic start generated code]*/
39+
40+ static PyObject *
41+ tokenizeriter_new_impl (PyTypeObject * type , const char * source )
42+ /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
43+ {
44+ tokenizeriterobject * self = (tokenizeriterobject * )type -> tp_alloc (type , 0 );
45+ if (self == NULL ) {
46+ return NULL ;
47+ }
48+ PyObject * filename = PyUnicode_FromString ("<string>" );
49+ if (filename == NULL ) {
50+ return NULL ;
51+ }
52+ self -> tok = PyTokenizer_FromUTF8 (source , 1 );
53+ if (self -> tok == NULL ) {
54+ return NULL ;
55+ }
56+ self -> tok -> filename = filename ;
57+ return (PyObject * )self ;
58+ }
59+
60+ static PyObject *
61+ tokenizeriter_next (tokenizeriterobject * it )
62+ {
63+ const char * start ;
64+ const char * end ;
65+ int type = PyTokenizer_Get (it -> tok , & start , & end );
66+ if (type == ERRORTOKEN && PyErr_Occurred ()) {
67+ return NULL ;
68+ }
69+ if (type == ERRORTOKEN || type == ENDMARKER ) {
70+ PyErr_SetString (PyExc_StopIteration , "EOF" );
71+ return NULL ;
72+ }
73+ PyObject * str = NULL ;
74+ if (start == NULL || end == NULL ) {
75+ str = PyUnicode_FromStringAndSize (start , end - start );
76+ } else {
77+ str = PyUnicode_FromString ("" );
78+ }
79+ if (str == NULL ) {
80+ return NULL ;
81+ }
82+
83+ Py_ssize_t size = it -> tok -> inp - it -> tok -> buf ;
84+ PyObject * line = PyUnicode_DecodeUTF8 (it -> tok -> buf , size , "replace" );
85+ if (line == NULL ) {
86+ Py_DECREF (str );
87+ return NULL ;
88+ }
89+ const char * line_start = type == STRING ? it -> tok -> multi_line_start : it -> tok -> line_start ;
90+ int lineno = type == STRING ? it -> tok -> first_lineno : it -> tok -> lineno ;
91+ int end_lineno = it -> tok -> lineno ;
92+ int col_offset = -1 ;
93+ int end_col_offset = -1 ;
94+ if (start != NULL && start >= line_start ) {
95+ col_offset = (int )(start - line_start );
96+ }
97+ if (end != NULL && end >= it -> tok -> line_start ) {
98+ end_col_offset = (int )(end - it -> tok -> line_start );
99+ }
100+
101+ return Py_BuildValue ("(NiiiiiN)" , str , type , lineno , end_lineno , col_offset , end_col_offset , line );
102+ }
103+
104+ static void
105+ tokenizeriter_dealloc (tokenizeriterobject * it )
106+ {
107+ PyTypeObject * tp = Py_TYPE (it );
108+ PyTokenizer_Free (it -> tok );
109+ tp -> tp_free (it );
110+ }
111+
112+ static PyType_Slot tokenizeriter_slots [] = {
113+ {Py_tp_new , tokenizeriter_new },
114+ {Py_tp_dealloc , tokenizeriter_dealloc },
115+ {Py_tp_getattro , PyObject_GenericGetAttr },
116+ {Py_tp_iter , PyObject_SelfIter },
117+ {Py_tp_iternext , tokenizeriter_next },
118+ {0 , NULL },
119+ };
120+
121+ static PyType_Spec tokenizeriter_spec = {
122+ .name = "_tokenize.TokenizerIter" ,
123+ .basicsize = sizeof (tokenizeriterobject ),
124+ .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE ),
125+ .slots = tokenizeriter_slots ,
126+ };
127+
128+
129+ static int
130+ tokenizemodule_exec (PyObject * m )
131+ {
132+ tokenize_state * state = get_tokenize_state (m );
133+ if (state == NULL ) {
134+ return -1 ;
135+ }
136+
137+ state -> TokenizerIter = (PyTypeObject * )PyType_FromModuleAndSpec (
138+ m , & tokenizeriter_spec , NULL );
139+ if (state -> TokenizerIter == NULL ) {
140+ return -1 ;
141+ }
142+ if (PyModule_AddType (m , state -> TokenizerIter ) < 0 ) {
143+ return -1 ;
144+ }
145+
146+ return 0 ;
147+ }
148+
149+ static PyMethodDef tokenize_methods [] = {
150+ {NULL , NULL , 0 , NULL } /* Sentinel */
151+ };
152+
153+ static PyModuleDef_Slot tokenizemodule_slots [] = {
154+ {Py_mod_exec , tokenizemodule_exec },
155+ {0 , NULL }
156+ };
157+
158+ static int
159+ tokenizemodule_traverse (PyObject * m , visitproc visit , void * arg )
160+ {
161+ tokenize_state * state = get_tokenize_state (m );
162+ Py_VISIT (state -> TokenizerIter );
163+ return 0 ;
164+ }
165+
166+ static int
167+ tokenizemodule_clear (PyObject * m )
168+ {
169+ tokenize_state * state = get_tokenize_state (m );
170+ Py_CLEAR (state -> TokenizerIter );
171+ return 0 ;
172+ }
173+
174+ static void
175+ tokenizemodule_free (void * m )
176+ {
177+ tokenizemodule_clear ((PyObject * )m );
178+ }
179+
180+ static struct PyModuleDef _tokenizemodule = {
181+ PyModuleDef_HEAD_INIT ,
182+ .m_name = "_tokenize" ,
183+ .m_size = sizeof (tokenize_state ),
184+ .m_slots = tokenizemodule_slots ,
185+ .m_methods = tokenize_methods ,
186+ .m_traverse = tokenizemodule_traverse ,
187+ .m_clear = tokenizemodule_clear ,
188+ .m_free = tokenizemodule_free ,
189+ };
190+
191+ PyMODINIT_FUNC
192+ PyInit__tokenize (void )
193+ {
194+ return PyModuleDef_Init (& _tokenizemodule );
195+ }
0 commit comments