Search code, repositories, users, issues, pull requests...

bpo-39219: Fix SyntaxError attributes in the tokenizer. (GH-17828)

Oct 14, 1990

/* Tokenizer implementation */

Feb 12, 2020

Feb 12, 2020

#define PY_SSIZE_T_CLEAN

Apr 14, 2002

Mass checkin of universal newline support.

Apr 14, 2002

#include "Python.h"

Oct 12, 2021

bpo-45439: Move _PyObject_CallNoArgs() to pycore_call.h (GH-28895)

Oct 12, 2021

#include "pycore_call.h" // _PyObject_CallNoArgs()

Oct 14, 1990

Oct 14, 1990

#include <ctype.h>

Aug 4, 2002

Aug 4, 2002

#include <assert.h>

Oct 14, 1990

Oct 14, 1990

#include "tokenizer.h"

#include "errcode.h"

Aug 4, 2002

Renamed files bytesobject.[ch] and stringobject.[ch]

Aug 4, 2002

#include "unicodeobject.h"

May 26, 2008

May 26, 2008

#include "bytesobject.h"

Aug 4, 2002

tokenizer: Remove unused tabs options (#4422)

Aug 4, 2002

#include "fileobject.h"

#include "abstract.h"

Nov 17, 2017

Nov 17, 2017

/* Alternate tab spacing */

#define ALTTABSIZE 1

Jun 10, 2007

Make identifiers str (not str8) objects throughout.

Jun 10, 2007

#define is_potential_identifier_start(c) (\

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

(c >= 'a' && c <= 'z')\

|| (c >= 'A' && c <= 'Z')\

|| c == '_'\

|| (c >= 128))

Jun 10, 2007

Make identifiers str (not str8) objects throughout.

Jun 10, 2007

#define is_potential_identifier_char(c) (\

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

(c >= 'a' && c <= 'z')\

|| (c >= 'A' && c <= 'Z')\

|| (c >= '0' && c <= '9')\

|| c == '_'\

|| (c >= 128))

Jun 10, 2007

Make identifiers str (not str8) objects throughout.

Jun 10, 2007

Aug 29, 1994

* Parser/tokenizer.c: backup over illegal newline in string

Aug 29, 1994

Feb 26, 1992

Make tabs always 8 spaces wide -- it's more portable.

Feb 26, 1992

/* Don't ever change this -- it would break the portability of Python code */

Oct 14, 1990

Nuke all remaining occurrences of Py_PROTO and Py_FPROTO.

Oct 14, 1990

#define TABSIZE 8

Dec 20, 1990

"Compiling" version

Dec 20, 1990

/* Forward */

Jul 9, 2000

Jul 9, 2000

static struct tok_state *tok_new(void);

static int tok_nextc(struct tok_state *tok);

static void tok_backup(struct tok_state *tok, int c);

Dec 20, 1990

"Compiling" version

Dec 20, 1990

Oct 20, 2007

Plug a memory leak where a struct tok_state was not being freed.

Oct 20, 2007

Jan 31, 2019

bpo-35766: Merge typed_ast back into CPython (GH-11645)

Jan 31, 2019

/* Spaces in this constant are treated as "zero or more spaces or tabs" when

tokenizing. */

static const char* type_comment_prefix = "# type: ";

Oct 14, 1990

Oct 14, 1990

/* Create and initialize a new tok_state structure */

static struct tok_state *

Jul 22, 2000

Mass ANSIfication.

Jul 22, 2000

tok_new(void)

Oct 14, 1990

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Oct 14, 1990

{

Dec 1, 2020

Dec 1, 2020

struct tok_state *tok = (struct tok_state *)PyMem_Malloc(

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

sizeof(struct tok_state));

if (tok == NULL)

return NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

tok->buf = tok->cur = tok->inp = NULL;

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

tok->fp_interactive = 0;

tok->interactive_src_start = NULL;

tok->interactive_src_end = NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

tok->start = NULL;

tok->end = NULL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

tok->done = E_OK;

tok->fp = NULL;

tok->input = NULL;

tok->tabsize = TABSIZE;

tok->indent = 0;

tok->indstack[0] = 0;

tok->atbol = 1;

tok->pendin = 0;

tok->prompt = tok->nextprompt = NULL;

tok->lineno = 0;

tok->level = 0;

tok->altindstack[0] = 0;

tok->decoding_state = STATE_INIT;

tok->decoding_erred = 0;

tok->enc = NULL;

tok->encoding = NULL;

tok->cont_line = 0;

Apr 4, 2011

Issue #10785: Store the filename as Unicode in the Python parser.

Apr 4, 2011

tok->filename = NULL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

tok->decoding_readline = NULL;

tok->decoding_buffer = NULL;

Jan 31, 2019

bpo-35766: Merge typed_ast back into CPython (GH-11645)

Jan 31, 2019

tok->type_comments = 0;

Mar 7, 2019

bpo-35975: Support parsing earlier minor versions of Python 3 (GH-12086)

Mar 7, 2019

tok->async_hacks = 0;

tok->async_def = 0;

tok->async_def_indent = 0;

tok->async_def_nl = 0;

May 22, 2021

bpo-44201: Avoid side effects of "invalid_*" rules in the REPL (GH-26…

May 22, 2021

tok->interactive_underflow = IUNDERFLOW_NORMAL;

Mar 7, 2019

bpo-35975: Support parsing earlier minor versions of Python 3 (GH-12086)

Mar 7, 2019

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

return tok;

Oct 14, 1990

Oct 14, 1990

}

Nov 13, 2009

check the return value of new_string() (closes #18470)

Nov 13, 2009

static char *

Jul 16, 2013

Jul 16, 2013

new_string(const char *s, Py_ssize_t len, struct tok_state *tok)

Nov 13, 2009

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Nov 13, 2009

{

Dec 1, 2020

Dec 1, 2020

char* result = (char *)PyMem_Malloc(len + 1);

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

if (!result) {

tok->done = E_NOMEM;

100

return NULL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

101

}

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

102

memcpy(result, s, len);

103

result[len] = '\0';

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

104

return result;

Nov 13, 2009

Nov 13, 2009

105

}

106

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

107

static char *

108

error_ret(struct tok_state *tok) /* XXX */

109

{

May 9, 2010

May 9, 2010

110

tok->decoding_erred = 1;

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

111

if (tok->fp != NULL && tok->buf != NULL) /* see _PyTokenizer_Free */

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

112

PyMem_Free(tok->buf);

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

113

tok->buf = tok->cur = tok->inp = NULL;

114

tok->start = NULL;

115

tok->end = NULL;

Nov 14, 2015

Issue #25388: Fixed tokenizer crash when processing undecodable sourc…

Nov 14, 2015

116

tok->done = E_DECODE;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

117

return NULL; /* as if it were EOF */

Aug 4, 2002

Issue #25923: Added more const qualifiers to signatures of static and…

Aug 4, 2002

118

}

119

120

Dec 25, 2015

Dec 25, 2015

121

static const char *

122

get_normal_name(const char *s) /* for utf-8 and latin-1 */

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

123

{

May 9, 2010

May 9, 2010

124

char buf[13];

125

int i;

126

for (i = 0; i < 12; i++) {

127

int c = s[i];

128

if (c == '\0')

129

break;

130

else if (c == '_')

131

buf[i] = '-';

132

else

133

buf[i] = tolower(c);

134

}

135

buf[i] = '\0';

136

if (strcmp(buf, "utf-8") == 0 ||

137

strncmp(buf, "utf-8-", 6) == 0)

138

return "utf-8";

139

else if (strcmp(buf, "latin-1") == 0 ||

140

strcmp(buf, "iso-8859-1") == 0 ||

141

strcmp(buf, "iso-latin-1") == 0 ||

142

strncmp(buf, "latin-1-", 8) == 0 ||

143

strncmp(buf, "iso-8859-1-", 11) == 0 ||

144

strncmp(buf, "iso-latin-1-", 12) == 0)

145

return "iso-8859-1";

146

else

147

return s;

Aug 4, 2002

check the return value of new_string() (closes #18470)

Aug 4, 2002

148

}

149

150

/* Return the coding spec in S, or NULL if none is found. */

151

Jul 16, 2013

Jul 16, 2013

152

static int

153

get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

154

{

May 9, 2010

May 9, 2010

155

Py_ssize_t i;

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

156

*spec = NULL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

157

/* Coding spec must be in a comment, and that comment must be

158

* the only statement on the source code line. */

159

for (i = 0; i < size - 6; i++) {

160

if (s[i] == '#')

161

break;

162

if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

163

return 1;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

164

}

165

for (; i < size - 6; i++) { /* XXX inefficient search */

166

const char* t = s + i;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

167

if (memcmp(t, "coding", 6) == 0) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

168

const char* begin = NULL;

169

t += 6;

170

if (t[0] != ':' && t[0] != '=')

171

continue;

172

do {

173

t++;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

174

} while (t[0] == ' ' || t[0] == '\t');

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

175

176

begin = t;

177

while (Py_ISALNUM(t[0]) ||

178

t[0] == '-' || t[0] == '_' || t[0] == '.')

179

t++;

180

181

if (begin < t) {

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

182

char* r = new_string(begin, t - begin, tok);

Dec 25, 2015

Issue #25923: Added more const qualifiers to signatures of static and…

Dec 25, 2015

183

const char* q;

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

184

if (!r)

185

return 0;

Jul 16, 2013

move declaration to top of block

Jul 16, 2013

186

q = get_normal_name(r);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

187

if (r != q) {

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

188

PyMem_Free(r);

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

189

r = new_string(q, strlen(q), tok);

190

if (!r)

191

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

192

}

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

193

*spec = r;

Mar 20, 2016

Issue #26581: Use the first coding cookie on a line, not the last one.

Mar 20, 2016

194

break;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

195

}

196

}

197

}

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

198

return 1;

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

199

}

200

201

/* Check whether the line contains a coding spec. If it does,

202

invoke the set_readline function for the new encoding.

203

This function receives the tok_state and the new encoding.

204

Return 1 on success, 0 on failure. */

205

206

static int

Feb 15, 2006

Merge ssize_t branch.

Feb 15, 2006

207

check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,

May 9, 2010

May 9, 2010

208

int set_readline(struct tok_state *, const char *))

Aug 4, 2002

check the return value of new_string() (closes #18470)

Aug 4, 2002

209

{

Jul 16, 2013

Jul 16, 2013

210

char *cs;

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

211

if (tok->cont_line) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

212

/* It's a continuation line, so it can't be a coding spec. */

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

213

tok->decoding_state = STATE_NORMAL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

214

return 1;

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

215

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

216

if (!get_coding_spec(line, &cs, size, tok)) {

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

217

return 0;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

218

}

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

219

if (!cs) {

220

Py_ssize_t i;

221

for (i = 0; i < size; i++) {

222

if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')

223

break;

224

if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {

225

/* Stop checking coding spec after a line containing

226

* anything except a comment. */

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

227

tok->decoding_state = STATE_NORMAL;

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

228

break;

229

}

230

}

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

231

return 1;

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

232

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

233

tok->decoding_state = STATE_NORMAL;

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

234

if (tok->encoding == NULL) {

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

235

assert(tok->decoding_readline == NULL);

236

if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) {

237

error_ret(tok);

238

PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);

239

PyMem_Free(cs);

240

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

241

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

242

tok->encoding = cs;

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

243

} else { /* then, compare cs with BOM */

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

244

if (strcmp(tok->encoding, cs) != 0) {

245

error_ret(tok);

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

246

PyErr_Format(PyExc_SyntaxError,

247

"encoding problem: %s with BOM", cs);

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

248

PyMem_Free(cs);

249

return 0;

250

}

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

251

PyMem_Free(cs);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

252

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

253

return 1;

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

254

}

255

256

/* See whether the file starts with a BOM. If it does,

257

invoke the set_readline function with the new encoding.

258

Return 1 on success, 0 on failure. */

259

260

static int

261

check_bom(int get_char(struct tok_state *),

May 9, 2010

May 9, 2010

262

void unget_char(int, struct tok_state *),

263

int set_readline(struct tok_state *, const char *),

264

struct tok_state *tok)

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

265

{

May 9, 2010

May 9, 2010

266

int ch1, ch2, ch3;

267

ch1 = get_char(tok);

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

268

tok->decoding_state = STATE_SEEK_CODING;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

269

if (ch1 == EOF) {

270

return 1;

271

} else if (ch1 == 0xEF) {

272

ch2 = get_char(tok);

273

if (ch2 != 0xBB) {

274

unget_char(ch2, tok);

275

unget_char(ch1, tok);

276

return 1;

277

}

278

ch3 = get_char(tok);

279

if (ch3 != 0xBF) {

280

unget_char(ch3, tok);

281

unget_char(ch2, tok);

282

unget_char(ch1, tok);

283

return 1;

284

}

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

285

#if 0

May 9, 2010

May 9, 2010

286

/* Disable support for UTF-16 BOMs until a decision

287

is made whether this needs to be supported. */

288

} else if (ch1 == 0xFE) {

289

ch2 = get_char(tok);

290

if (ch2 != 0xFF) {

291

unget_char(ch2, tok);

292

unget_char(ch1, tok);

293

return 1;

294

}

295

if (!set_readline(tok, "utf-16-be"))

296

return 0;

297

tok->decoding_state = STATE_NORMAL;

298

} else if (ch1 == 0xFF) {

299

ch2 = get_char(tok);

300

if (ch2 != 0xFE) {

301

unget_char(ch2, tok);

302

unget_char(ch1, tok);

303

return 1;

304

}

305

if (!set_readline(tok, "utf-16-le"))

306

return 0;

307

tok->decoding_state = STATE_NORMAL;

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

308

#endif

May 9, 2010

May 9, 2010

309

} else {

310

unget_char(ch1, tok);

311

return 1;

312

}

313

if (tok->encoding != NULL)

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

314

PyMem_Free(tok->encoding);

Jul 16, 2013

check the return value of new_string() (closes #18470)

Jul 16, 2013

315

tok->encoding = new_string("utf-8", 5, tok);

316

if (!tok->encoding)

317

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

318

/* No need to set_readline: input is already utf-8 */

319

return 1;

Aug 4, 2002

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Aug 4, 2002

320

}

321

Mar 28, 2021

Mar 28, 2021

322

static int

323

tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

324

assert(tok->fp_interactive);

325

326

if (!line) {

327

return 0;

328

}

329

330

Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;

331

Py_ssize_t line_size = strlen(line);

332

char* new_str = tok->interactive_src_start;

333

334

new_str = PyMem_Realloc(new_str, current_size + line_size + 1);

335

if (!new_str) {

336

if (tok->interactive_src_start) {

337

PyMem_Free(tok->interactive_src_start);

338

}

339

tok->interactive_src_start = NULL;

340

tok->interactive_src_end = NULL;

341

tok->done = E_NOMEM;

342

return -1;

343

}

344

strcpy(new_str + current_size, line);

345

346

tok->interactive_src_start = new_str;

347

tok->interactive_src_end = new_str + current_size + line_size;

348

return 0;

349

}

350

351

Aug 4, 2002

Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a sou…

Aug 4, 2002

352

/* Read a line of text from TOK into S, using the stream in TOK.

Jul 12, 2005

Jul 12, 2005

353

Return NULL on failure, else S.

Apr 21, 2006

Merge p3yk branch with the trunk up to revision 45595. This breaks a …

Apr 21, 2006

354

Jul 12, 2005

Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a sou…

Jul 12, 2005

355

On entry, tok->decoding_buffer will be one of:

356

1) NULL: need to call tok->decoding_readline to get a new line

357

2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

358

stored the result in tok->decoding_buffer

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

359

3) PyByteArrayObject *: previous call to tok_readline_recode did not have enough room

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

360

(in the s buffer) to copy entire contents of the line read

361

by tok->decoding_readline. tok->decoding_buffer has the overflow.

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

362

In this case, tok_readline_recode is called in a loop (with an expanded buffer)

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

363

until the buffer ends with a '\n' (or until the end of the file is

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

364

reached): see tok_nextc and its calls to tok_reserve_buf.

Jul 12, 2005

Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a sou…

Jul 12, 2005

365

Aug 4, 2002

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Aug 4, 2002

366

Mar 28, 2021

Mar 28, 2021

367

static int

368

tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)

Aug 4, 2002

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Aug 4, 2002

369

{

Mar 28, 2021

Mar 28, 2021

370

Py_ssize_t cur = tok->cur - tok->buf;

371

Py_ssize_t oldsize = tok->inp - tok->buf;

372

Py_ssize_t newsize = oldsize + Py_MAX(size, oldsize >> 1);

373

if (newsize > tok->end - tok->buf) {

374

char *newbuf = tok->buf;

375

Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;

Jun 12, 2021

bpo-44396: Update multi-line-start location when reallocating tokeniz…

Jun 12, 2021

376

Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;

377

Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

378

newbuf = (char *)PyMem_Realloc(newbuf, newsize);

379

if (newbuf == NULL) {

380

tok->done = E_NOMEM;

381

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

382

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

383

tok->buf = newbuf;

384

tok->cur = tok->buf + cur;

385

tok->inp = tok->buf + oldsize;

386

tok->end = tok->buf + newsize;

387

tok->start = start < 0 ? NULL : tok->buf + start;

Jun 12, 2021

bpo-44396: Update multi-line-start location when reallocating tokeniz…

Jun 12, 2021

388

tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;

389

tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

390

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

391

return 1;

392

}

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

393

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

394

static int

395

tok_readline_recode(struct tok_state *tok) {

396

PyObject *line;

397

const char *buf;

398

Py_ssize_t buflen;

399

line = tok->decoding_buffer;

400

if (line == NULL) {

401

line = PyObject_CallNoArgs(tok->decoding_readline);

402

if (line == NULL) {

403

error_ret(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

404

goto error;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

405

}

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

406

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

407

else {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

408

tok->decoding_buffer = NULL;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

409

}

410

buf = PyUnicode_AsUTF8AndSize(line, &buflen);

411

if (buf == NULL) {

412

error_ret(tok);

413

goto error;

414

}

415

if (!tok_reserve_buf(tok, buflen + 1)) {

416

goto error;

417

}

418

memcpy(tok->inp, buf, buflen);

419

tok->inp += buflen;

420

*tok->inp = '\0';

421

if (tok->fp_interactive &&

422

tok_concatenate_interactive_new_line(tok, buf) == -1) {

423

goto error;

424

}

425

Py_DECREF(line);

426

return 1;

Aug 12, 2007

Fix refleaks from execfile('file that contains a # coding: line')

Aug 12, 2007

427

error:

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

428

Py_XDECREF(line);

429

return 0;

Aug 4, 2002

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Aug 4, 2002

430

}

431

432

/* Set the readline function for TOK to a StreamReader's

433

readline function. The StreamReader is named ENC.

434

435

This function is called from check_bom and check_coding_spec.

436

437

ENC is usually identical to the future value of tok->encoding,

438

except for the (currently unsupported) case of UTF-16.

439

440

Return 1 on success, 0 on failure. */

441

442

static int

443

fp_setreadl(struct tok_state *tok, const char* enc)

444

{

Sep 13, 2016

Sep 13, 2016

445

PyObject *readline, *io, *stream;

Oct 14, 2011

Rename _Py_identifier to _Py_IDENTIFIER.

Oct 14, 2011

446

_Py_IDENTIFIER(open);

447

_Py_IDENTIFIER(readline);

Oct 14, 2010

Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead th…

Oct 14, 2010

448

int fd;

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

449

long pos;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

450

Oct 14, 2010

Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead th…

Oct 14, 2010

451

fd = fileno(tok->fp);

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

452

/* Due to buffering the file offset for fd can be different from the file

Feb 28, 2014

Issue #20731: Properly position in source code files even if they

Feb 28, 2014

453

* position of tok->fp. If tok->fp was opened in text mode on Windows,

454

* its file position counts CRLF as one char and can't be directly mapped

455

* to the file offset for fd. Instead we step back one byte and read to

456

* the end of line.*/

Jan 9, 2014

Issue #18960: Fix bugs with Python source code encoding in the second…

Jan 9, 2014

457

pos = ftell(tok->fp);

Feb 28, 2014

Issue #20731: Properly position in source code files even if they

Feb 28, 2014

458

if (pos == -1 ||

459

lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) {

Oct 14, 2010

Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead th…

Oct 14, 2010

460

PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

461

return 0;

Oct 14, 2010

Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead th…

Oct 14, 2010

462

}

463

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

464

io = PyImport_ImportModuleNoBlock("io");

465

if (io == NULL)

466

return 0;

467

Oct 9, 2011

Add API for static strings, primarily good for identifiers.

Oct 9, 2011

468

stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO",

Oct 14, 2010

Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead th…

Oct 14, 2010

469

fd, "r", -1, enc, Py_None, Py_None, Py_False);

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

470

Py_DECREF(io);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

471

if (stream == NULL)

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

472

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

473

Oct 10, 2011

Use identifier API for PyObject_GetAttrString.

Oct 10, 2011

474

readline = _PyObject_GetAttrId(stream, &PyId_readline);

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

475

Py_DECREF(stream);

476

if (readline == NULL)

477

return 0;

Apr 6, 2016

Issue #22570: Renamed Py_SETREF to Py_XSETREF.

Apr 6, 2016

478

Py_XSETREF(tok->decoding_readline, readline);

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

479

Feb 28, 2014

Issue #20731: Properly position in source code files even if they

Feb 28, 2014

480

if (pos > 0) {

Oct 11, 2021

bpo-45439: Rename _PyObject_CallNoArg() to _PyObject_CallNoArgs() (GH…

Oct 11, 2021

481

PyObject *bufobj = _PyObject_CallNoArgs(readline);

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

482

if (bufobj == NULL)

483

return 0;

484

Py_DECREF(bufobj);

Feb 28, 2014

Issue #20731: Properly position in source code files even if they

Feb 28, 2014

485

}

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

486

Sep 13, 2016

restructure fp_setreadl so as to avoid refleaks (closes #27981)

Sep 13, 2016

487

return 1;

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

488

}

489

490

/* Fetch the next byte from TOK. */

491

492

static int fp_getc(struct tok_state *tok) {

May 9, 2010

May 9, 2010

493

return getc(tok->fp);

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

494

}

495

496

/* Unfetch the last byte back into TOK. */

497

498

static void fp_ungetc(int c, struct tok_state *tok) {

May 9, 2010

May 9, 2010

499

ungetc(c, tok->fp);

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

500

}

501

Jul 29, 2007

Implement PEP 3120.

Jul 29, 2007

502

/* Check whether the characters at s start a valid

503

UTF-8 sequence. Return the number of characters forming

504

the sequence if yes, 0 if not. */

505

static int valid_utf8(const unsigned char* s)

506

{

May 9, 2010

May 9, 2010

507

int expected = 0;

508

int length;

509

if (*s < 0x80)

510

/* single-byte code */

511

return 1;

512

if (*s < 0xc0)

513

/* following byte */

514

return 0;

515

if (*s < 0xE0)

516

expected = 1;

517

else if (*s < 0xF0)

518

expected = 2;

519

else if (*s < 0xF8)

520

expected = 3;

521

else

522

return 0;

523

length = expected + 1;

524

for (; expected; expected--)

525

if (s[expected] < 0x80 || s[expected] >= 0xC0)

526

return 0;

527

return length;

Jul 29, 2007

Implement PEP 3120.

Jul 29, 2007

528

}

529

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

530

static int

531

ensure_utf8(char *line, struct tok_state *tok)

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

532

{

May 9, 2010

May 9, 2010

533

int badchar = 0;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

534

unsigned char *c;

535

int length;

536

for (c = (unsigned char *)line; *c; c += length) {

537

if (!(length = valid_utf8(c))) {

538

badchar = *c;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

539

break;

540

}

541

}

542

if (badchar) {

543

/* Need to add 1 to the line number, since this line

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

544

has not been counted, yet. */

Apr 25, 2011

Revert bb62908896fe, but keep the test

Apr 25, 2011

545

PyErr_Format(PyExc_SyntaxError,

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

546

"Non-UTF-8 code starting with '\\x%.2x' "

547

"in file %U on line %i, "

548

"but no encoding declared; "

Jul 30, 2021

Update URLs in comments and metadata to use HTTPS (GH-27458)

Jul 30, 2021

549

"see https://python.org/dev/peps/pep-0263/ for details",

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

550

badchar, tok->filename, tok->lineno + 1);

551

return 0;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

552

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

553

return 1;

Aug 4, 2002

Merge p3yk branch with the trunk up to revision 45595. This breaks a …

Aug 4, 2002

554

}

555

556

/* Fetch a byte from TOK, using the string buffer. */

557

Apr 21, 2006

Apr 21, 2006

558

static int

559

buf_getc(struct tok_state *tok) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

560

return Py_CHARMASK(*tok->str++);

Aug 4, 2002

Merge p3yk branch with the trunk up to revision 45595. This breaks a …

Aug 4, 2002

561

}

562

563

/* Unfetch a byte from TOK, using the string buffer. */

564

Apr 21, 2006

Apr 21, 2006

565

static void

566

buf_ungetc(int c, struct tok_state *tok) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

567

tok->str--;

568

assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */

Aug 4, 2002

Merge p3yk branch with the trunk up to revision 45595. This breaks a …

Aug 4, 2002

569

}

570

571

/* Set the readline function for TOK to ENC. For the string-based

572

tokenizer, this means to just record the encoding. */

573

Apr 21, 2006

Apr 21, 2006

574

static int

575

buf_setreadl(struct tok_state *tok, const char* enc) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

576

tok->enc = enc;

577

return 1;

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

578

}

579

580

/* Return a UTF-8 encoding Python string object from the

581

C byte string STR, which is encoded with ENC. */

582

583

static PyObject *

584

translate_into_utf8(const char* str, const char* enc) {

May 9, 2010

May 9, 2010

585

PyObject *utf8;

586

PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);

587

if (buf == NULL)

588

return NULL;

589

utf8 = PyUnicode_AsUTF8String(buf);

590

Py_DECREF(buf);

591

return utf8;

Aug 4, 2002

Aug 4, 2002

592

}

593

Nov 13, 2009

Issue #9566: Fix compiler warning on Windows 64-bit

Nov 13, 2009

594

595

static char *

596

translate_newlines(const char *s, int exec_input, struct tok_state *tok) {

Jun 4, 2013

Jun 4, 2013

597

int skip_next_lf = 0;

598

size_t needed_length = strlen(s) + 2, final_length;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

599

char *buf, *current;

600

char c = '\0';

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

601

buf = PyMem_Malloc(needed_length);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

602

if (buf == NULL) {

603

tok->done = E_NOMEM;

604

return NULL;

605

}

606

for (current = buf; *s; s++, current++) {

607

c = *s;

608

if (skip_next_lf) {

609

skip_next_lf = 0;

610

if (c == '\n') {

611

c = *++s;

612

if (!c)

613

break;

614

}

615

}

616

if (c == '\r') {

617

skip_next_lf = 1;

618

c = '\n';

619

}

620

*current = c;

621

}

622

/* If this is exec input, add a newline to the end of the string if

623

there isn't one already. */

624

if (exec_input && c != '\n') {

625

*current = '\n';

626

current++;

627

}

628

*current = '\0';

629

final_length = current - buf + 1;

Mar 19, 2019

bpo-36367: Free buffer if realloc fails in tokenize.c (GH-12442)

Mar 19, 2019

630

if (final_length < needed_length && final_length) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

631

/* should never fail */

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

632

char* result = PyMem_Realloc(buf, final_length);

Mar 19, 2019

bpo-36367: Free buffer if realloc fails in tokenize.c (GH-12442)

Mar 19, 2019

633

if (result == NULL) {

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

634

PyMem_Free(buf);

Mar 19, 2019

bpo-36367: Free buffer if realloc fails in tokenize.c (GH-12442)

Mar 19, 2019

635

}

636

buf = result;

637

}

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

638

return buf;

Nov 13, 2009

Nov 13, 2009

639

}

640

Aug 4, 2002

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Aug 4, 2002

641

/* Decode a byte string STR for use as the buffer of TOK.

642

Look for encoding declarations inside STR, and record them

643

inside TOK. */

644

Feb 28, 2020

Feb 28, 2020

645

static char *

Nov 13, 2009

Nov 13, 2009

646

decode_str(const char *input, int single, struct tok_state *tok)

Aug 4, 2002

Recorded merge of revisions 81029 via svnmerge from

Aug 4, 2002

647

{

May 9, 2010

May 9, 2010

648

PyObject* utf8 = NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

649

char *str;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

650

const char *s;

651

const char *newl[2] = {NULL, NULL};

652

int lineno = 0;

653

tok->input = str = translate_newlines(input, single, tok);

654

if (str == NULL)

655

return NULL;

656

tok->enc = NULL;

657

tok->str = str;

658

if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))

659

return error_ret(tok);

660

str = tok->str; /* string after BOM if any */

661

assert(str);

662

if (tok->enc != NULL) {

663

utf8 = translate_into_utf8(str, tok->enc);

664

if (utf8 == NULL)

665

return error_ret(tok);

666

str = PyBytes_AsString(utf8);

667

}

668

for (s = str;; s++) {

669

if (*s == '\0') break;

670

else if (*s == '\n') {

671

assert(lineno < 2);

672

newl[lineno] = s;

673

lineno++;

674

if (lineno == 2) break;

675

}

676

}

677

tok->enc = NULL;

678

/* need to check line 1 and 2 separately since check_coding_spec

679

assumes a single line as input */

680

if (newl[0]) {

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

681

if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) {

682

return NULL;

683

}

684

if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) {

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

685

if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],

686

tok, buf_setreadl))

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

687

return NULL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

688

}

689

}

690

if (tok->enc != NULL) {

691

assert(utf8 == NULL);

692

utf8 = translate_into_utf8(str, tok->enc);

693

if (utf8 == NULL)

694

return error_ret(tok);

695

str = PyBytes_AS_STRING(utf8);

696

}

697

assert(tok->decoding_buffer == NULL);

698

tok->decoding_buffer = utf8; /* CAUTION */

699

return str;

Aug 4, 2002

Aug 4, 2002

700

}

701

Oct 14, 1990

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 14, 1990

702

/* Set up tokenizer for string */

703

704

struct tok_state *

Oct 13, 2021

Oct 13, 2021

705

_PyTokenizer_FromString(const char *str, int exec_input)

Oct 14, 1990

Recorded merge of revisions 81029 via svnmerge from

Oct 14, 1990

706

{

May 9, 2010

May 9, 2010

707

struct tok_state *tok = tok_new();

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

708

char *decoded;

709

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

710

if (tok == NULL)

711

return NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

712

decoded = decode_str(str, exec_input, tok);

713

if (decoded == NULL) {

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

714

_PyTokenizer_Free(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

715

return NULL;

716

}

717

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

718

tok->buf = tok->cur = tok->inp = decoded;

719

tok->end = decoded;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

720

return tok;

Oct 14, 1990

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Oct 14, 1990

721

}

722

Mar 28, 2021

Mar 28, 2021

723

/* Set up tokenizer for UTF-8 string */

724

Mar 2, 2009

ignore the coding cookie in compile(), exec(), and eval() if the sour…

Mar 2, 2009

725

struct tok_state *

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

726

_PyTokenizer_FromUTF8(const char *str, int exec_input)

Mar 2, 2009

ignore the coding cookie in compile(), exec(), and eval() if the sour…

Mar 2, 2009

727

{

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

728

struct tok_state *tok = tok_new();

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

729

char *translated;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

730

if (tok == NULL)

731

return NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

732

tok->input = translated = translate_newlines(str, exec_input, tok);

733

if (translated == NULL) {

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

734

_PyTokenizer_Free(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

735

return NULL;

736

}

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

737

tok->decoding_state = STATE_NORMAL;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

738

tok->enc = NULL;

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

739

tok->str = translated;

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

740

tok->encoding = new_string("utf-8", 5, tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

741

if (!tok->encoding) {

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

742

_PyTokenizer_Free(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

743

return NULL;

744

}

745

Feb 28, 2020

closes bpo-39721: Fix constness of members of tok_state struct. (GH-1…

Feb 28, 2020

746

tok->buf = tok->cur = tok->inp = translated;

747

tok->end = translated;

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

748

return tok;

Mar 2, 2009

ignore the coding cookie in compile(), exec(), and eval() if the sour…

Mar 2, 2009

749

}

750

Jul 27, 1991

Completely ignore lines with only a newline token on them, except

Jul 27, 1991

751

/* Set up tokenizer for file */

Oct 14, 1990

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 14, 1990

752

753

struct tok_state *

Oct 13, 2021

Oct 13, 2021

754

_PyTokenizer_FromFile(FILE *fp, const char* enc,

755

const char *ps1, const char *ps2)

Oct 14, 1990

Recorded merge of revisions 81029 via svnmerge from

Oct 14, 1990

756

{

May 9, 2010

May 9, 2010

757

struct tok_state *tok = tok_new();

758

if (tok == NULL)

759

return NULL;

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

760

if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

761

_PyTokenizer_Free(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

762

return NULL;

763

}

764

tok->cur = tok->inp = tok->buf;

765

tok->end = tok->buf + BUFSIZ;

766

tok->fp = fp;

767

tok->prompt = ps1;

768

tok->nextprompt = ps2;

769

if (enc != NULL) {

770

/* Must copy encoding declaration since it

771

gets copied into the parse tree. */

Mar 28, 2021

bpo-25643: Refactor the C tokenizer into smaller, logical units (GH-2…

Mar 28, 2021

772

tok->encoding = new_string(enc, strlen(enc), tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

773

if (!tok->encoding) {

Oct 13, 2021

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 13, 2021

774

_PyTokenizer_Free(tok);

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

775

return NULL;

776

}

777

tok->decoding_state = STATE_NORMAL;

778

}

779

return tok;

Oct 14, 1990

bpo-45434: Mark the PyTokenizer C API as private (GH-28924)

Oct 14, 1990

780

}

781

782

/* Free a tok_state structure */

783

784

void

Oct 13, 2021

Oct 13, 2021

785

_PyTokenizer_Free(struct tok_state *tok)

Oct 14, 1990

bpo-43410: Fix crash in the parser when producing syntax errors when …

Oct 14, 1990

786

{

Mar 14, 2021

Mar 14, 2021

787

if (tok->encoding != NULL) {

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

788

PyMem_Free(tok->encoding);

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

789

}

May 9, 2010

Recorded merge of revisions 81029 via svnmerge from

May 9, 2010

790

Py_XDECREF(tok->decoding_readline);

791

Py_XDECREF(tok->decoding_buffer);

Apr 4, 2011

Issue #10785: Store the filename as Unicode in the Python parser.

Apr 4, 2011

792

Py_XDECREF(tok->filename);

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

793

if (tok->fp != NULL && tok->buf != NULL) {

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

794

PyMem_Free(tok->buf);

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

795

}

796

if (tok->input) {

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

797

PyMem_Free(tok->input);

Mar 14, 2021

bpo-43410: Fix crash in the parser when producing syntax errors when …

Mar 14, 2021

798

}

799

if (tok->interactive_src_start != NULL) {

800

PyMem_Free(tok->interactive_src_start);

801

}

Dec 1, 2020

bpo-42519: Replace PyMem_MALLOC() with PyMem_Malloc() (GH-23586)

Dec 1, 2020

802

PyMem_Free(tok);

Oct 14, 1990