Skip to content

Commit 9c93a13

Browse files
Statically allocate and initialize the single character latin-1 unicode objects.
1 parent c7e7602 commit 9c93a13

4 files changed

Lines changed: 295 additions & 48 deletions

File tree

Include/internal/pycore_global_objects.h

Lines changed: 287 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,40 @@ extern "C" {
3636

3737
/* unicode objects */
3838

39-
#define _PyASCII_INIT(len) \
39+
#define _PyASCII_BYTE_INIT(len, ASCII) \
4040
{ \
4141
.ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \
4242
.length = len, \
4343
.hash = -1, \
4444
.state = { \
4545
.kind = PyUnicode_1BYTE_KIND, \
4646
.compact = 1, \
47-
.ascii = 1, \
47+
.ascii = ASCII, \
4848
.ready = 1, \
4949
}, \
5050
}
5151

5252
#define _PyASCIIObject_FULL(len) \
53-
struct _PyASCIIObject { \
53+
struct _PyASCIIObject_ ## len { \
5454
PyASCIIObject ascii; \
5555
uint8_t data[len + 1]; \
5656
}
5757
#define _PyASCIIObject_FULL_INIT(LITERAL) \
5858
{ \
59-
.ascii = _PyASCII_INIT(Py_ARRAY_LENGTH(LITERAL) - 1), \
59+
.ascii = _PyASCII_BYTE_INIT(Py_ARRAY_LENGTH((LITERAL)) - 1, 1), \
60+
.data = LITERAL, \
61+
}
62+
63+
#define _PyLatin1Object_FULL(len) \
64+
struct _PyLatin1Object_ ## len { \
65+
PyCompactUnicodeObject compact; \
66+
uint8_t data[len + 1]; \
67+
}
68+
#define _PyLatin1Object_FULL_INIT(LITERAL) \
69+
{ \
70+
.compact = { \
71+
._base = _PyASCII_BYTE_INIT(Py_ARRAY_LENGTH((LITERAL)) - 1, 0), \
72+
}, \
6073
.data = LITERAL, \
6174
}
6275

@@ -91,7 +104,10 @@ struct _Py_global_objects {
91104

92105
// The empty Unicode object is a singleton to improve performance.
93106
_PyASCIIObject_FULL(0) unicode_empty;
94-
PyASCIIObject *unicode_latin1[256];
107+
/* Single character Unicode strings in the Latin-1 range are being
108+
shared as well. */
109+
_PyASCIIObject_FULL(1) unicode_ascii[128];
110+
_PyLatin1Object_FULL(1) unicode_latin1[128];
95111
} singletons;
96112
};
97113

@@ -363,13 +379,279 @@ struct _Py_global_objects {
363379
}, \
364380
\
365381
.unicode_empty = _PyASCIIObject_FULL_INIT(""), \
382+
.unicode_ascii = { \
383+
_PyASCIIObject_FULL_INIT("\x00"), \
384+
_PyASCIIObject_FULL_INIT("\x01"), \
385+
_PyASCIIObject_FULL_INIT("\x02"), \
386+
_PyASCIIObject_FULL_INIT("\x03"), \
387+
_PyASCIIObject_FULL_INIT("\x04"), \
388+
_PyASCIIObject_FULL_INIT("\x05"), \
389+
_PyASCIIObject_FULL_INIT("\x06"), \
390+
_PyASCIIObject_FULL_INIT("\x07"), \
391+
_PyASCIIObject_FULL_INIT("\x08"), \
392+
_PyASCIIObject_FULL_INIT("\x09"), \
393+
_PyASCIIObject_FULL_INIT("\x0a"), \
394+
_PyASCIIObject_FULL_INIT("\x0b"), \
395+
_PyASCIIObject_FULL_INIT("\x0c"), \
396+
_PyASCIIObject_FULL_INIT("\x0d"), \
397+
_PyASCIIObject_FULL_INIT("\x0e"), \
398+
_PyASCIIObject_FULL_INIT("\x0f"), \
399+
_PyASCIIObject_FULL_INIT("\x10"), \
400+
_PyASCIIObject_FULL_INIT("\x11"), \
401+
_PyASCIIObject_FULL_INIT("\x12"), \
402+
_PyASCIIObject_FULL_INIT("\x13"), \
403+
_PyASCIIObject_FULL_INIT("\x14"), \
404+
_PyASCIIObject_FULL_INIT("\x15"), \
405+
_PyASCIIObject_FULL_INIT("\x16"), \
406+
_PyASCIIObject_FULL_INIT("\x17"), \
407+
_PyASCIIObject_FULL_INIT("\x18"), \
408+
_PyASCIIObject_FULL_INIT("\x19"), \
409+
_PyASCIIObject_FULL_INIT("\x1a"), \
410+
_PyASCIIObject_FULL_INIT("\x1b"), \
411+
_PyASCIIObject_FULL_INIT("\x1c"), \
412+
_PyASCIIObject_FULL_INIT("\x1d"), \
413+
_PyASCIIObject_FULL_INIT("\x1e"), \
414+
_PyASCIIObject_FULL_INIT("\x1f"), \
415+
_PyASCIIObject_FULL_INIT("\x20"), \
416+
_PyASCIIObject_FULL_INIT("\x21"), \
417+
_PyASCIIObject_FULL_INIT("\x22"), \
418+
_PyASCIIObject_FULL_INIT("\x23"), \
419+
_PyASCIIObject_FULL_INIT("\x24"), \
420+
_PyASCIIObject_FULL_INIT("\x25"), \
421+
_PyASCIIObject_FULL_INIT("\x26"), \
422+
_PyASCIIObject_FULL_INIT("\x27"), \
423+
_PyASCIIObject_FULL_INIT("\x28"), \
424+
_PyASCIIObject_FULL_INIT("\x29"), \
425+
_PyASCIIObject_FULL_INIT("\x2a"), \
426+
_PyASCIIObject_FULL_INIT("\x2b"), \
427+
_PyASCIIObject_FULL_INIT("\x2c"), \
428+
_PyASCIIObject_FULL_INIT("\x2d"), \
429+
_PyASCIIObject_FULL_INIT("\x2e"), \
430+
_PyASCIIObject_FULL_INIT("\x2f"), \
431+
_PyASCIIObject_FULL_INIT("\x30"), \
432+
_PyASCIIObject_FULL_INIT("\x31"), \
433+
_PyASCIIObject_FULL_INIT("\x32"), \
434+
_PyASCIIObject_FULL_INIT("\x33"), \
435+
_PyASCIIObject_FULL_INIT("\x34"), \
436+
_PyASCIIObject_FULL_INIT("\x35"), \
437+
_PyASCIIObject_FULL_INIT("\x36"), \
438+
_PyASCIIObject_FULL_INIT("\x37"), \
439+
_PyASCIIObject_FULL_INIT("\x38"), \
440+
_PyASCIIObject_FULL_INIT("\x39"), \
441+
_PyASCIIObject_FULL_INIT("\x3a"), \
442+
_PyASCIIObject_FULL_INIT("\x3b"), \
443+
_PyASCIIObject_FULL_INIT("\x3c"), \
444+
_PyASCIIObject_FULL_INIT("\x3d"), \
445+
_PyASCIIObject_FULL_INIT("\x3e"), \
446+
_PyASCIIObject_FULL_INIT("\x3f"), \
447+
_PyASCIIObject_FULL_INIT("\x40"), \
448+
_PyASCIIObject_FULL_INIT("\x41"), \
449+
_PyASCIIObject_FULL_INIT("\x42"), \
450+
_PyASCIIObject_FULL_INIT("\x43"), \
451+
_PyASCIIObject_FULL_INIT("\x44"), \
452+
_PyASCIIObject_FULL_INIT("\x45"), \
453+
_PyASCIIObject_FULL_INIT("\x46"), \
454+
_PyASCIIObject_FULL_INIT("\x47"), \
455+
_PyASCIIObject_FULL_INIT("\x48"), \
456+
_PyASCIIObject_FULL_INIT("\x49"), \
457+
_PyASCIIObject_FULL_INIT("\x4a"), \
458+
_PyASCIIObject_FULL_INIT("\x4b"), \
459+
_PyASCIIObject_FULL_INIT("\x4c"), \
460+
_PyASCIIObject_FULL_INIT("\x4d"), \
461+
_PyASCIIObject_FULL_INIT("\x4e"), \
462+
_PyASCIIObject_FULL_INIT("\x4f"), \
463+
_PyASCIIObject_FULL_INIT("\x50"), \
464+
_PyASCIIObject_FULL_INIT("\x51"), \
465+
_PyASCIIObject_FULL_INIT("\x52"), \
466+
_PyASCIIObject_FULL_INIT("\x53"), \
467+
_PyASCIIObject_FULL_INIT("\x54"), \
468+
_PyASCIIObject_FULL_INIT("\x55"), \
469+
_PyASCIIObject_FULL_INIT("\x56"), \
470+
_PyASCIIObject_FULL_INIT("\x57"), \
471+
_PyASCIIObject_FULL_INIT("\x58"), \
472+
_PyASCIIObject_FULL_INIT("\x59"), \
473+
_PyASCIIObject_FULL_INIT("\x5a"), \
474+
_PyASCIIObject_FULL_INIT("\x5b"), \
475+
_PyASCIIObject_FULL_INIT("\x5c"), \
476+
_PyASCIIObject_FULL_INIT("\x5d"), \
477+
_PyASCIIObject_FULL_INIT("\x5e"), \
478+
_PyASCIIObject_FULL_INIT("\x5f"), \
479+
_PyASCIIObject_FULL_INIT("\x60"), \
480+
_PyASCIIObject_FULL_INIT("\x61"), \
481+
_PyASCIIObject_FULL_INIT("\x62"), \
482+
_PyASCIIObject_FULL_INIT("\x63"), \
483+
_PyASCIIObject_FULL_INIT("\x64"), \
484+
_PyASCIIObject_FULL_INIT("\x65"), \
485+
_PyASCIIObject_FULL_INIT("\x66"), \
486+
_PyASCIIObject_FULL_INIT("\x67"), \
487+
_PyASCIIObject_FULL_INIT("\x68"), \
488+
_PyASCIIObject_FULL_INIT("\x69"), \
489+
_PyASCIIObject_FULL_INIT("\x6a"), \
490+
_PyASCIIObject_FULL_INIT("\x6b"), \
491+
_PyASCIIObject_FULL_INIT("\x6c"), \
492+
_PyASCIIObject_FULL_INIT("\x6d"), \
493+
_PyASCIIObject_FULL_INIT("\x6e"), \
494+
_PyASCIIObject_FULL_INIT("\x6f"), \
495+
_PyASCIIObject_FULL_INIT("\x70"), \
496+
_PyASCIIObject_FULL_INIT("\x71"), \
497+
_PyASCIIObject_FULL_INIT("\x72"), \
498+
_PyASCIIObject_FULL_INIT("\x73"), \
499+
_PyASCIIObject_FULL_INIT("\x74"), \
500+
_PyASCIIObject_FULL_INIT("\x75"), \
501+
_PyASCIIObject_FULL_INIT("\x76"), \
502+
_PyASCIIObject_FULL_INIT("\x77"), \
503+
_PyASCIIObject_FULL_INIT("\x78"), \
504+
_PyASCIIObject_FULL_INIT("\x79"), \
505+
_PyASCIIObject_FULL_INIT("\x7a"), \
506+
_PyASCIIObject_FULL_INIT("\x7b"), \
507+
_PyASCIIObject_FULL_INIT("\x7c"), \
508+
_PyASCIIObject_FULL_INIT("\x7d"), \
509+
_PyASCIIObject_FULL_INIT("\x7e"), \
510+
_PyASCIIObject_FULL_INIT("\x7f"), \
511+
}, \
512+
.unicode_latin1 = { \
513+
_PyLatin1Object_FULL_INIT("\x80"), \
514+
_PyLatin1Object_FULL_INIT("\x81"), \
515+
_PyLatin1Object_FULL_INIT("\x82"), \
516+
_PyLatin1Object_FULL_INIT("\x83"), \
517+
_PyLatin1Object_FULL_INIT("\x84"), \
518+
_PyLatin1Object_FULL_INIT("\x85"), \
519+
_PyLatin1Object_FULL_INIT("\x86"), \
520+
_PyLatin1Object_FULL_INIT("\x87"), \
521+
_PyLatin1Object_FULL_INIT("\x88"), \
522+
_PyLatin1Object_FULL_INIT("\x89"), \
523+
_PyLatin1Object_FULL_INIT("\x8a"), \
524+
_PyLatin1Object_FULL_INIT("\x8b"), \
525+
_PyLatin1Object_FULL_INIT("\x8c"), \
526+
_PyLatin1Object_FULL_INIT("\x8d"), \
527+
_PyLatin1Object_FULL_INIT("\x8e"), \
528+
_PyLatin1Object_FULL_INIT("\x8f"), \
529+
_PyLatin1Object_FULL_INIT("\x90"), \
530+
_PyLatin1Object_FULL_INIT("\x91"), \
531+
_PyLatin1Object_FULL_INIT("\x92"), \
532+
_PyLatin1Object_FULL_INIT("\x93"), \
533+
_PyLatin1Object_FULL_INIT("\x94"), \
534+
_PyLatin1Object_FULL_INIT("\x95"), \
535+
_PyLatin1Object_FULL_INIT("\x96"), \
536+
_PyLatin1Object_FULL_INIT("\x97"), \
537+
_PyLatin1Object_FULL_INIT("\x98"), \
538+
_PyLatin1Object_FULL_INIT("\x99"), \
539+
_PyLatin1Object_FULL_INIT("\x9a"), \
540+
_PyLatin1Object_FULL_INIT("\x9b"), \
541+
_PyLatin1Object_FULL_INIT("\x9c"), \
542+
_PyLatin1Object_FULL_INIT("\x9d"), \
543+
_PyLatin1Object_FULL_INIT("\x9e"), \
544+
_PyLatin1Object_FULL_INIT("\x9f"), \
545+
_PyLatin1Object_FULL_INIT("\xa0"), \
546+
_PyLatin1Object_FULL_INIT("\xa1"), \
547+
_PyLatin1Object_FULL_INIT("\xa2"), \
548+
_PyLatin1Object_FULL_INIT("\xa3"), \
549+
_PyLatin1Object_FULL_INIT("\xa4"), \
550+
_PyLatin1Object_FULL_INIT("\xa5"), \
551+
_PyLatin1Object_FULL_INIT("\xa6"), \
552+
_PyLatin1Object_FULL_INIT("\xa7"), \
553+
_PyLatin1Object_FULL_INIT("\xa8"), \
554+
_PyLatin1Object_FULL_INIT("\xa9"), \
555+
_PyLatin1Object_FULL_INIT("\xaa"), \
556+
_PyLatin1Object_FULL_INIT("\xab"), \
557+
_PyLatin1Object_FULL_INIT("\xac"), \
558+
_PyLatin1Object_FULL_INIT("\xad"), \
559+
_PyLatin1Object_FULL_INIT("\xae"), \
560+
_PyLatin1Object_FULL_INIT("\xaf"), \
561+
_PyLatin1Object_FULL_INIT("\xb0"), \
562+
_PyLatin1Object_FULL_INIT("\xb1"), \
563+
_PyLatin1Object_FULL_INIT("\xb2"), \
564+
_PyLatin1Object_FULL_INIT("\xb3"), \
565+
_PyLatin1Object_FULL_INIT("\xb4"), \
566+
_PyLatin1Object_FULL_INIT("\xb5"), \
567+
_PyLatin1Object_FULL_INIT("\xb6"), \
568+
_PyLatin1Object_FULL_INIT("\xb7"), \
569+
_PyLatin1Object_FULL_INIT("\xb8"), \
570+
_PyLatin1Object_FULL_INIT("\xb9"), \
571+
_PyLatin1Object_FULL_INIT("\xba"), \
572+
_PyLatin1Object_FULL_INIT("\xbb"), \
573+
_PyLatin1Object_FULL_INIT("\xbc"), \
574+
_PyLatin1Object_FULL_INIT("\xbd"), \
575+
_PyLatin1Object_FULL_INIT("\xbe"), \
576+
_PyLatin1Object_FULL_INIT("\xbf"), \
577+
_PyLatin1Object_FULL_INIT("\xc0"), \
578+
_PyLatin1Object_FULL_INIT("\xc1"), \
579+
_PyLatin1Object_FULL_INIT("\xc2"), \
580+
_PyLatin1Object_FULL_INIT("\xc3"), \
581+
_PyLatin1Object_FULL_INIT("\xc4"), \
582+
_PyLatin1Object_FULL_INIT("\xc5"), \
583+
_PyLatin1Object_FULL_INIT("\xc6"), \
584+
_PyLatin1Object_FULL_INIT("\xc7"), \
585+
_PyLatin1Object_FULL_INIT("\xc8"), \
586+
_PyLatin1Object_FULL_INIT("\xc9"), \
587+
_PyLatin1Object_FULL_INIT("\xca"), \
588+
_PyLatin1Object_FULL_INIT("\xcb"), \
589+
_PyLatin1Object_FULL_INIT("\xcc"), \
590+
_PyLatin1Object_FULL_INIT("\xcd"), \
591+
_PyLatin1Object_FULL_INIT("\xce"), \
592+
_PyLatin1Object_FULL_INIT("\xcf"), \
593+
_PyLatin1Object_FULL_INIT("\xd0"), \
594+
_PyLatin1Object_FULL_INIT("\xd1"), \
595+
_PyLatin1Object_FULL_INIT("\xd2"), \
596+
_PyLatin1Object_FULL_INIT("\xd3"), \
597+
_PyLatin1Object_FULL_INIT("\xd4"), \
598+
_PyLatin1Object_FULL_INIT("\xd5"), \
599+
_PyLatin1Object_FULL_INIT("\xd6"), \
600+
_PyLatin1Object_FULL_INIT("\xd7"), \
601+
_PyLatin1Object_FULL_INIT("\xd8"), \
602+
_PyLatin1Object_FULL_INIT("\xd9"), \
603+
_PyLatin1Object_FULL_INIT("\xda"), \
604+
_PyLatin1Object_FULL_INIT("\xdb"), \
605+
_PyLatin1Object_FULL_INIT("\xdc"), \
606+
_PyLatin1Object_FULL_INIT("\xdd"), \
607+
_PyLatin1Object_FULL_INIT("\xde"), \
608+
_PyLatin1Object_FULL_INIT("\xdf"), \
609+
_PyLatin1Object_FULL_INIT("\xe0"), \
610+
_PyLatin1Object_FULL_INIT("\xe1"), \
611+
_PyLatin1Object_FULL_INIT("\xe2"), \
612+
_PyLatin1Object_FULL_INIT("\xe3"), \
613+
_PyLatin1Object_FULL_INIT("\xe4"), \
614+
_PyLatin1Object_FULL_INIT("\xe5"), \
615+
_PyLatin1Object_FULL_INIT("\xe6"), \
616+
_PyLatin1Object_FULL_INIT("\xe7"), \
617+
_PyLatin1Object_FULL_INIT("\xe8"), \
618+
_PyLatin1Object_FULL_INIT("\xe9"), \
619+
_PyLatin1Object_FULL_INIT("\xea"), \
620+
_PyLatin1Object_FULL_INIT("\xeb"), \
621+
_PyLatin1Object_FULL_INIT("\xec"), \
622+
_PyLatin1Object_FULL_INIT("\xed"), \
623+
_PyLatin1Object_FULL_INIT("\xee"), \
624+
_PyLatin1Object_FULL_INIT("\xef"), \
625+
_PyLatin1Object_FULL_INIT("\xf0"), \
626+
_PyLatin1Object_FULL_INIT("\xf1"), \
627+
_PyLatin1Object_FULL_INIT("\xf2"), \
628+
_PyLatin1Object_FULL_INIT("\xf3"), \
629+
_PyLatin1Object_FULL_INIT("\xf4"), \
630+
_PyLatin1Object_FULL_INIT("\xf5"), \
631+
_PyLatin1Object_FULL_INIT("\xf6"), \
632+
_PyLatin1Object_FULL_INIT("\xf7"), \
633+
_PyLatin1Object_FULL_INIT("\xf8"), \
634+
_PyLatin1Object_FULL_INIT("\xf9"), \
635+
_PyLatin1Object_FULL_INIT("\xfa"), \
636+
_PyLatin1Object_FULL_INIT("\xfb"), \
637+
_PyLatin1Object_FULL_INIT("\xfc"), \
638+
_PyLatin1Object_FULL_INIT("\xfd"), \
639+
_PyLatin1Object_FULL_INIT("\xfe"), \
640+
_PyLatin1Object_FULL_INIT("\xff"), \
641+
}, \
366642
}, \
367643
}
368644

369645
static inline void
370646
_Py_global_objects_reset(struct _Py_global_objects *objects)
371647
{
372648
_PyUnicode_reset(&objects->singletons.unicode_empty.ascii);
649+
for (int i = 0; i < 128 + 1; i++) {
650+
_PyUnicode_reset(&objects->singletons.unicode_ascii[i].ascii);
651+
}
652+
for (int i = 128; i < 256; i++) {
653+
_PyUnicode_reset(&objects->singletons.unicode_latin1[i].compact._base);
654+
}
373655
}
374656

375657
#ifdef __cplusplus

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ extern "C" {
1212
/* runtime lifecycle */
1313

1414
extern void _PyUnicode_InitState(PyInterpreterState *);
15-
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
1615
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
1716
extern void _PyUnicode_Fini(PyInterpreterState *);
1817

0 commit comments

Comments
 (0)