@@ -36,27 +36,40 @@ extern "C" {
3636
3737/* unicode objects */
3838
39- #define _PyASCII_INIT (len ) \
39+ #define _PyASCII_BYTE_INIT (len , ASCII ) \
4040 { \
4141 .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \
4242 .length = len, \
4343 .hash = -1, \
4444 .state = { \
4545 .kind = PyUnicode_1BYTE_KIND, \
4646 .compact = 1, \
47- .ascii = 1 , \
47+ .ascii = ASCII , \
4848 .ready = 1, \
4949 }, \
5050 }
5151
5252#define _PyASCIIObject_FULL (len ) \
53- struct _PyASCIIObject { \
53+ struct _PyASCIIObject_ ## len { \
5454 PyASCIIObject ascii; \
5555 uint8_t data[len + 1]; \
5656 }
5757#define _PyASCIIObject_FULL_INIT (LITERAL ) \
5858 { \
59- .ascii = _PyASCII_INIT(Py_ARRAY_LENGTH(LITERAL) - 1), \
59+ .ascii = _PyASCII_BYTE_INIT(Py_ARRAY_LENGTH((LITERAL)) - 1, 1), \
60+ .data = LITERAL, \
61+ }
62+
63+ #define _PyLatin1Object_FULL (len ) \
64+ struct _PyLatin1Object_ ## len { \
65+ PyCompactUnicodeObject compact; \
66+ uint8_t data[len + 1]; \
67+ }
68+ #define _PyLatin1Object_FULL_INIT (LITERAL ) \
69+ { \
70+ .compact = { \
71+ ._base = _PyASCII_BYTE_INIT(Py_ARRAY_LENGTH((LITERAL)) - 1, 0), \
72+ }, \
6073 .data = LITERAL, \
6174 }
6275
@@ -91,7 +104,10 @@ struct _Py_global_objects {
91104
92105 // The empty Unicode object is a singleton to improve performance.
93106 _PyASCIIObject_FULL (0 ) unicode_empty ;
94- PyASCIIObject * unicode_latin1 [256 ];
107+ /* Single character Unicode strings in the Latin-1 range are being
108+ shared as well. */
109+ _PyASCIIObject_FULL (1 ) unicode_ascii [128 ];
110+ _PyLatin1Object_FULL (1 ) unicode_latin1 [128 ];
95111 } singletons ;
96112};
97113
@@ -363,13 +379,279 @@ struct _Py_global_objects {
363379 }, \
364380 \
365381 .unicode_empty = _PyASCIIObject_FULL_INIT(""), \
382+ .unicode_ascii = { \
383+ _PyASCIIObject_FULL_INIT("\x00"), \
384+ _PyASCIIObject_FULL_INIT("\x01"), \
385+ _PyASCIIObject_FULL_INIT("\x02"), \
386+ _PyASCIIObject_FULL_INIT("\x03"), \
387+ _PyASCIIObject_FULL_INIT("\x04"), \
388+ _PyASCIIObject_FULL_INIT("\x05"), \
389+ _PyASCIIObject_FULL_INIT("\x06"), \
390+ _PyASCIIObject_FULL_INIT("\x07"), \
391+ _PyASCIIObject_FULL_INIT("\x08"), \
392+ _PyASCIIObject_FULL_INIT("\x09"), \
393+ _PyASCIIObject_FULL_INIT("\x0a"), \
394+ _PyASCIIObject_FULL_INIT("\x0b"), \
395+ _PyASCIIObject_FULL_INIT("\x0c"), \
396+ _PyASCIIObject_FULL_INIT("\x0d"), \
397+ _PyASCIIObject_FULL_INIT("\x0e"), \
398+ _PyASCIIObject_FULL_INIT("\x0f"), \
399+ _PyASCIIObject_FULL_INIT("\x10"), \
400+ _PyASCIIObject_FULL_INIT("\x11"), \
401+ _PyASCIIObject_FULL_INIT("\x12"), \
402+ _PyASCIIObject_FULL_INIT("\x13"), \
403+ _PyASCIIObject_FULL_INIT("\x14"), \
404+ _PyASCIIObject_FULL_INIT("\x15"), \
405+ _PyASCIIObject_FULL_INIT("\x16"), \
406+ _PyASCIIObject_FULL_INIT("\x17"), \
407+ _PyASCIIObject_FULL_INIT("\x18"), \
408+ _PyASCIIObject_FULL_INIT("\x19"), \
409+ _PyASCIIObject_FULL_INIT("\x1a"), \
410+ _PyASCIIObject_FULL_INIT("\x1b"), \
411+ _PyASCIIObject_FULL_INIT("\x1c"), \
412+ _PyASCIIObject_FULL_INIT("\x1d"), \
413+ _PyASCIIObject_FULL_INIT("\x1e"), \
414+ _PyASCIIObject_FULL_INIT("\x1f"), \
415+ _PyASCIIObject_FULL_INIT("\x20"), \
416+ _PyASCIIObject_FULL_INIT("\x21"), \
417+ _PyASCIIObject_FULL_INIT("\x22"), \
418+ _PyASCIIObject_FULL_INIT("\x23"), \
419+ _PyASCIIObject_FULL_INIT("\x24"), \
420+ _PyASCIIObject_FULL_INIT("\x25"), \
421+ _PyASCIIObject_FULL_INIT("\x26"), \
422+ _PyASCIIObject_FULL_INIT("\x27"), \
423+ _PyASCIIObject_FULL_INIT("\x28"), \
424+ _PyASCIIObject_FULL_INIT("\x29"), \
425+ _PyASCIIObject_FULL_INIT("\x2a"), \
426+ _PyASCIIObject_FULL_INIT("\x2b"), \
427+ _PyASCIIObject_FULL_INIT("\x2c"), \
428+ _PyASCIIObject_FULL_INIT("\x2d"), \
429+ _PyASCIIObject_FULL_INIT("\x2e"), \
430+ _PyASCIIObject_FULL_INIT("\x2f"), \
431+ _PyASCIIObject_FULL_INIT("\x30"), \
432+ _PyASCIIObject_FULL_INIT("\x31"), \
433+ _PyASCIIObject_FULL_INIT("\x32"), \
434+ _PyASCIIObject_FULL_INIT("\x33"), \
435+ _PyASCIIObject_FULL_INIT("\x34"), \
436+ _PyASCIIObject_FULL_INIT("\x35"), \
437+ _PyASCIIObject_FULL_INIT("\x36"), \
438+ _PyASCIIObject_FULL_INIT("\x37"), \
439+ _PyASCIIObject_FULL_INIT("\x38"), \
440+ _PyASCIIObject_FULL_INIT("\x39"), \
441+ _PyASCIIObject_FULL_INIT("\x3a"), \
442+ _PyASCIIObject_FULL_INIT("\x3b"), \
443+ _PyASCIIObject_FULL_INIT("\x3c"), \
444+ _PyASCIIObject_FULL_INIT("\x3d"), \
445+ _PyASCIIObject_FULL_INIT("\x3e"), \
446+ _PyASCIIObject_FULL_INIT("\x3f"), \
447+ _PyASCIIObject_FULL_INIT("\x40"), \
448+ _PyASCIIObject_FULL_INIT("\x41"), \
449+ _PyASCIIObject_FULL_INIT("\x42"), \
450+ _PyASCIIObject_FULL_INIT("\x43"), \
451+ _PyASCIIObject_FULL_INIT("\x44"), \
452+ _PyASCIIObject_FULL_INIT("\x45"), \
453+ _PyASCIIObject_FULL_INIT("\x46"), \
454+ _PyASCIIObject_FULL_INIT("\x47"), \
455+ _PyASCIIObject_FULL_INIT("\x48"), \
456+ _PyASCIIObject_FULL_INIT("\x49"), \
457+ _PyASCIIObject_FULL_INIT("\x4a"), \
458+ _PyASCIIObject_FULL_INIT("\x4b"), \
459+ _PyASCIIObject_FULL_INIT("\x4c"), \
460+ _PyASCIIObject_FULL_INIT("\x4d"), \
461+ _PyASCIIObject_FULL_INIT("\x4e"), \
462+ _PyASCIIObject_FULL_INIT("\x4f"), \
463+ _PyASCIIObject_FULL_INIT("\x50"), \
464+ _PyASCIIObject_FULL_INIT("\x51"), \
465+ _PyASCIIObject_FULL_INIT("\x52"), \
466+ _PyASCIIObject_FULL_INIT("\x53"), \
467+ _PyASCIIObject_FULL_INIT("\x54"), \
468+ _PyASCIIObject_FULL_INIT("\x55"), \
469+ _PyASCIIObject_FULL_INIT("\x56"), \
470+ _PyASCIIObject_FULL_INIT("\x57"), \
471+ _PyASCIIObject_FULL_INIT("\x58"), \
472+ _PyASCIIObject_FULL_INIT("\x59"), \
473+ _PyASCIIObject_FULL_INIT("\x5a"), \
474+ _PyASCIIObject_FULL_INIT("\x5b"), \
475+ _PyASCIIObject_FULL_INIT("\x5c"), \
476+ _PyASCIIObject_FULL_INIT("\x5d"), \
477+ _PyASCIIObject_FULL_INIT("\x5e"), \
478+ _PyASCIIObject_FULL_INIT("\x5f"), \
479+ _PyASCIIObject_FULL_INIT("\x60"), \
480+ _PyASCIIObject_FULL_INIT("\x61"), \
481+ _PyASCIIObject_FULL_INIT("\x62"), \
482+ _PyASCIIObject_FULL_INIT("\x63"), \
483+ _PyASCIIObject_FULL_INIT("\x64"), \
484+ _PyASCIIObject_FULL_INIT("\x65"), \
485+ _PyASCIIObject_FULL_INIT("\x66"), \
486+ _PyASCIIObject_FULL_INIT("\x67"), \
487+ _PyASCIIObject_FULL_INIT("\x68"), \
488+ _PyASCIIObject_FULL_INIT("\x69"), \
489+ _PyASCIIObject_FULL_INIT("\x6a"), \
490+ _PyASCIIObject_FULL_INIT("\x6b"), \
491+ _PyASCIIObject_FULL_INIT("\x6c"), \
492+ _PyASCIIObject_FULL_INIT("\x6d"), \
493+ _PyASCIIObject_FULL_INIT("\x6e"), \
494+ _PyASCIIObject_FULL_INIT("\x6f"), \
495+ _PyASCIIObject_FULL_INIT("\x70"), \
496+ _PyASCIIObject_FULL_INIT("\x71"), \
497+ _PyASCIIObject_FULL_INIT("\x72"), \
498+ _PyASCIIObject_FULL_INIT("\x73"), \
499+ _PyASCIIObject_FULL_INIT("\x74"), \
500+ _PyASCIIObject_FULL_INIT("\x75"), \
501+ _PyASCIIObject_FULL_INIT("\x76"), \
502+ _PyASCIIObject_FULL_INIT("\x77"), \
503+ _PyASCIIObject_FULL_INIT("\x78"), \
504+ _PyASCIIObject_FULL_INIT("\x79"), \
505+ _PyASCIIObject_FULL_INIT("\x7a"), \
506+ _PyASCIIObject_FULL_INIT("\x7b"), \
507+ _PyASCIIObject_FULL_INIT("\x7c"), \
508+ _PyASCIIObject_FULL_INIT("\x7d"), \
509+ _PyASCIIObject_FULL_INIT("\x7e"), \
510+ _PyASCIIObject_FULL_INIT("\x7f"), \
511+ }, \
512+ .unicode_latin1 = { \
513+ _PyLatin1Object_FULL_INIT("\x80"), \
514+ _PyLatin1Object_FULL_INIT("\x81"), \
515+ _PyLatin1Object_FULL_INIT("\x82"), \
516+ _PyLatin1Object_FULL_INIT("\x83"), \
517+ _PyLatin1Object_FULL_INIT("\x84"), \
518+ _PyLatin1Object_FULL_INIT("\x85"), \
519+ _PyLatin1Object_FULL_INIT("\x86"), \
520+ _PyLatin1Object_FULL_INIT("\x87"), \
521+ _PyLatin1Object_FULL_INIT("\x88"), \
522+ _PyLatin1Object_FULL_INIT("\x89"), \
523+ _PyLatin1Object_FULL_INIT("\x8a"), \
524+ _PyLatin1Object_FULL_INIT("\x8b"), \
525+ _PyLatin1Object_FULL_INIT("\x8c"), \
526+ _PyLatin1Object_FULL_INIT("\x8d"), \
527+ _PyLatin1Object_FULL_INIT("\x8e"), \
528+ _PyLatin1Object_FULL_INIT("\x8f"), \
529+ _PyLatin1Object_FULL_INIT("\x90"), \
530+ _PyLatin1Object_FULL_INIT("\x91"), \
531+ _PyLatin1Object_FULL_INIT("\x92"), \
532+ _PyLatin1Object_FULL_INIT("\x93"), \
533+ _PyLatin1Object_FULL_INIT("\x94"), \
534+ _PyLatin1Object_FULL_INIT("\x95"), \
535+ _PyLatin1Object_FULL_INIT("\x96"), \
536+ _PyLatin1Object_FULL_INIT("\x97"), \
537+ _PyLatin1Object_FULL_INIT("\x98"), \
538+ _PyLatin1Object_FULL_INIT("\x99"), \
539+ _PyLatin1Object_FULL_INIT("\x9a"), \
540+ _PyLatin1Object_FULL_INIT("\x9b"), \
541+ _PyLatin1Object_FULL_INIT("\x9c"), \
542+ _PyLatin1Object_FULL_INIT("\x9d"), \
543+ _PyLatin1Object_FULL_INIT("\x9e"), \
544+ _PyLatin1Object_FULL_INIT("\x9f"), \
545+ _PyLatin1Object_FULL_INIT("\xa0"), \
546+ _PyLatin1Object_FULL_INIT("\xa1"), \
547+ _PyLatin1Object_FULL_INIT("\xa2"), \
548+ _PyLatin1Object_FULL_INIT("\xa3"), \
549+ _PyLatin1Object_FULL_INIT("\xa4"), \
550+ _PyLatin1Object_FULL_INIT("\xa5"), \
551+ _PyLatin1Object_FULL_INIT("\xa6"), \
552+ _PyLatin1Object_FULL_INIT("\xa7"), \
553+ _PyLatin1Object_FULL_INIT("\xa8"), \
554+ _PyLatin1Object_FULL_INIT("\xa9"), \
555+ _PyLatin1Object_FULL_INIT("\xaa"), \
556+ _PyLatin1Object_FULL_INIT("\xab"), \
557+ _PyLatin1Object_FULL_INIT("\xac"), \
558+ _PyLatin1Object_FULL_INIT("\xad"), \
559+ _PyLatin1Object_FULL_INIT("\xae"), \
560+ _PyLatin1Object_FULL_INIT("\xaf"), \
561+ _PyLatin1Object_FULL_INIT("\xb0"), \
562+ _PyLatin1Object_FULL_INIT("\xb1"), \
563+ _PyLatin1Object_FULL_INIT("\xb2"), \
564+ _PyLatin1Object_FULL_INIT("\xb3"), \
565+ _PyLatin1Object_FULL_INIT("\xb4"), \
566+ _PyLatin1Object_FULL_INIT("\xb5"), \
567+ _PyLatin1Object_FULL_INIT("\xb6"), \
568+ _PyLatin1Object_FULL_INIT("\xb7"), \
569+ _PyLatin1Object_FULL_INIT("\xb8"), \
570+ _PyLatin1Object_FULL_INIT("\xb9"), \
571+ _PyLatin1Object_FULL_INIT("\xba"), \
572+ _PyLatin1Object_FULL_INIT("\xbb"), \
573+ _PyLatin1Object_FULL_INIT("\xbc"), \
574+ _PyLatin1Object_FULL_INIT("\xbd"), \
575+ _PyLatin1Object_FULL_INIT("\xbe"), \
576+ _PyLatin1Object_FULL_INIT("\xbf"), \
577+ _PyLatin1Object_FULL_INIT("\xc0"), \
578+ _PyLatin1Object_FULL_INIT("\xc1"), \
579+ _PyLatin1Object_FULL_INIT("\xc2"), \
580+ _PyLatin1Object_FULL_INIT("\xc3"), \
581+ _PyLatin1Object_FULL_INIT("\xc4"), \
582+ _PyLatin1Object_FULL_INIT("\xc5"), \
583+ _PyLatin1Object_FULL_INIT("\xc6"), \
584+ _PyLatin1Object_FULL_INIT("\xc7"), \
585+ _PyLatin1Object_FULL_INIT("\xc8"), \
586+ _PyLatin1Object_FULL_INIT("\xc9"), \
587+ _PyLatin1Object_FULL_INIT("\xca"), \
588+ _PyLatin1Object_FULL_INIT("\xcb"), \
589+ _PyLatin1Object_FULL_INIT("\xcc"), \
590+ _PyLatin1Object_FULL_INIT("\xcd"), \
591+ _PyLatin1Object_FULL_INIT("\xce"), \
592+ _PyLatin1Object_FULL_INIT("\xcf"), \
593+ _PyLatin1Object_FULL_INIT("\xd0"), \
594+ _PyLatin1Object_FULL_INIT("\xd1"), \
595+ _PyLatin1Object_FULL_INIT("\xd2"), \
596+ _PyLatin1Object_FULL_INIT("\xd3"), \
597+ _PyLatin1Object_FULL_INIT("\xd4"), \
598+ _PyLatin1Object_FULL_INIT("\xd5"), \
599+ _PyLatin1Object_FULL_INIT("\xd6"), \
600+ _PyLatin1Object_FULL_INIT("\xd7"), \
601+ _PyLatin1Object_FULL_INIT("\xd8"), \
602+ _PyLatin1Object_FULL_INIT("\xd9"), \
603+ _PyLatin1Object_FULL_INIT("\xda"), \
604+ _PyLatin1Object_FULL_INIT("\xdb"), \
605+ _PyLatin1Object_FULL_INIT("\xdc"), \
606+ _PyLatin1Object_FULL_INIT("\xdd"), \
607+ _PyLatin1Object_FULL_INIT("\xde"), \
608+ _PyLatin1Object_FULL_INIT("\xdf"), \
609+ _PyLatin1Object_FULL_INIT("\xe0"), \
610+ _PyLatin1Object_FULL_INIT("\xe1"), \
611+ _PyLatin1Object_FULL_INIT("\xe2"), \
612+ _PyLatin1Object_FULL_INIT("\xe3"), \
613+ _PyLatin1Object_FULL_INIT("\xe4"), \
614+ _PyLatin1Object_FULL_INIT("\xe5"), \
615+ _PyLatin1Object_FULL_INIT("\xe6"), \
616+ _PyLatin1Object_FULL_INIT("\xe7"), \
617+ _PyLatin1Object_FULL_INIT("\xe8"), \
618+ _PyLatin1Object_FULL_INIT("\xe9"), \
619+ _PyLatin1Object_FULL_INIT("\xea"), \
620+ _PyLatin1Object_FULL_INIT("\xeb"), \
621+ _PyLatin1Object_FULL_INIT("\xec"), \
622+ _PyLatin1Object_FULL_INIT("\xed"), \
623+ _PyLatin1Object_FULL_INIT("\xee"), \
624+ _PyLatin1Object_FULL_INIT("\xef"), \
625+ _PyLatin1Object_FULL_INIT("\xf0"), \
626+ _PyLatin1Object_FULL_INIT("\xf1"), \
627+ _PyLatin1Object_FULL_INIT("\xf2"), \
628+ _PyLatin1Object_FULL_INIT("\xf3"), \
629+ _PyLatin1Object_FULL_INIT("\xf4"), \
630+ _PyLatin1Object_FULL_INIT("\xf5"), \
631+ _PyLatin1Object_FULL_INIT("\xf6"), \
632+ _PyLatin1Object_FULL_INIT("\xf7"), \
633+ _PyLatin1Object_FULL_INIT("\xf8"), \
634+ _PyLatin1Object_FULL_INIT("\xf9"), \
635+ _PyLatin1Object_FULL_INIT("\xfa"), \
636+ _PyLatin1Object_FULL_INIT("\xfb"), \
637+ _PyLatin1Object_FULL_INIT("\xfc"), \
638+ _PyLatin1Object_FULL_INIT("\xfd"), \
639+ _PyLatin1Object_FULL_INIT("\xfe"), \
640+ _PyLatin1Object_FULL_INIT("\xff"), \
641+ }, \
366642 }, \
367643}
368644
369645static inline void
370646_Py_global_objects_reset (struct _Py_global_objects * objects )
371647{
372648 _PyUnicode_reset (& objects -> singletons .unicode_empty .ascii );
649+ for (int i = 0 ; i < 128 + 1 ; i ++ ) {
650+ _PyUnicode_reset (& objects -> singletons .unicode_ascii [i ].ascii );
651+ }
652+ for (int i = 128 ; i < 256 ; i ++ ) {
653+ _PyUnicode_reset (& objects -> singletons .unicode_latin1 [i ].compact ._base );
654+ }
373655}
374656
375657#ifdef __cplusplus
0 commit comments