Skip to content

Commit c70ab02

Browse files
authored
bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)
No longer limit repr(structseq) to 512 bytes. Use _PyUnicodeWriter for better performance and to write directly Unicode rather than encoding repr() value to UTF-8 and then decoding from UTF-8.
1 parent fd23cfa commit c70ab02

File tree

2 files changed

+69
-58
lines changed

2 files changed

+69
-58
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
repr(structseq) is no longer limited to 512 bytes.

Objects/structseq.c

Lines changed: 68 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -168,78 +168,88 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict)
168168
static PyObject *
169169
structseq_repr(PyStructSequence *obj)
170170
{
171-
/* buffer and type size were chosen well considered. */
172-
#define REPR_BUFFER_SIZE 512
173-
#define TYPE_MAXSIZE 100
174-
175171
PyTypeObject *typ = Py_TYPE(obj);
176-
Py_ssize_t i;
177-
int removelast = 0;
178-
Py_ssize_t len;
179-
char buf[REPR_BUFFER_SIZE];
180-
char *endofbuf, *pbuf = buf;
181-
182-
/* pointer to end of writeable buffer; safes space for "...)\0" */
183-
endofbuf= &buf[REPR_BUFFER_SIZE-5];
184-
185-
/* "typename(", limited to TYPE_MAXSIZE */
186-
len = strlen(typ->tp_name);
187-
len = Py_MIN(len, TYPE_MAXSIZE);
188-
memcpy(pbuf, typ->tp_name, len);
189-
pbuf += len;
190-
*pbuf++ = '(';
191-
192-
for (i=0; i < VISIBLE_SIZE(obj); i++) {
193-
PyObject *val, *repr;
194-
const char *cname, *crepr;
195-
196-
cname = typ->tp_members[i].name;
197-
if (cname == NULL) {
172+
_PyUnicodeWriter writer;
173+
174+
/* Write "typename(" */
175+
PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name,
176+
strlen(typ->tp_name),
177+
NULL);
178+
if (type_name == NULL) {
179+
goto error;
180+
}
181+
182+
_PyUnicodeWriter_Init(&writer);
183+
writer.overallocate = 1;
184+
/* count 5 characters per item: "x=1, " */
185+
writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1
186+
+ VISIBLE_SIZE(obj) * 5 + 1);
187+
188+
if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) {
189+
Py_DECREF(type_name);
190+
goto error;
191+
}
192+
Py_DECREF(type_name);
193+
194+
if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) {
195+
goto error;
196+
}
197+
198+
for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) {
199+
if (i > 0) {
200+
/* Write ", " */
201+
if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
202+
goto error;
203+
}
204+
}
205+
206+
/* Write "name=repr" */
207+
const char *name_utf8 = typ->tp_members[i].name;
208+
if (name_utf8 == NULL) {
198209
PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL"
199210
" for type %.500s", i, typ->tp_name);
200-
return NULL;
211+
goto error;
201212
}
202-
val = PyStructSequence_GET_ITEM(obj, i);
203-
repr = PyObject_Repr(val);
204-
if (repr == NULL)
205-
return NULL;
206-
crepr = PyUnicode_AsUTF8(repr);
207-
if (crepr == NULL) {
208-
Py_DECREF(repr);
209-
return NULL;
213+
214+
PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL);
215+
if (name == NULL) {
216+
goto error;
217+
}
218+
if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) {
219+
Py_DECREF(name);
220+
goto error;
210221
}
222+
Py_DECREF(name);
211223

212-
/* + 3: keep space for "=" and ", " */
213-
len = strlen(cname) + strlen(crepr) + 3;
214-
if ((pbuf+len) <= endofbuf) {
215-
strcpy(pbuf, cname);
216-
pbuf += strlen(cname);
217-
*pbuf++ = '=';
218-
strcpy(pbuf, crepr);
219-
pbuf += strlen(crepr);
220-
*pbuf++ = ',';
221-
*pbuf++ = ' ';
222-
removelast = 1;
223-
Py_DECREF(repr);
224+
if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) {
225+
goto error;
224226
}
225-
else {
226-
strcpy(pbuf, "...");
227-
pbuf += 3;
228-
removelast = 0;
227+
228+
PyObject *value = PyStructSequence_GET_ITEM(obj, i);
229+
assert(value != NULL);
230+
PyObject *repr = PyObject_Repr(value);
231+
if (repr == NULL) {
232+
goto error;
233+
}
234+
if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) {
229235
Py_DECREF(repr);
230-
break;
236+
goto error;
231237
}
238+
Py_DECREF(repr);
232239
}
233-
if (removelast) {
234-
/* overwrite last ", " */
235-
pbuf-=2;
240+
241+
if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) {
242+
goto error;
236243
}
237-
*pbuf++ = ')';
238-
*pbuf = '\0';
239244

240-
return PyUnicode_FromString(buf);
245+
return _PyUnicodeWriter_Finish(&writer);
246+
247+
error:
248+
_PyUnicodeWriter_Dealloc(&writer);
249+
return NULL;
241250
}
242251

252+
243253
static PyObject *
244254
structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
245255
{

0 commit comments

Comments
 (0)