Skip to content

Commit 674d78f

Browse files
committed
py/mpprint: Use a padding buffer on the stack.
This reworks `mp_print_strn` to use a stack-allocated padding buffer rather than special-cased hardcoded ROM strings in order to reduce code size and improve string formatting performance. Note that this is actually just as performant, even for zeroes and spaces! On my RP2350 Cortex M33 hardware, spaces are about 1% faster for short-padding cases, and 3.4% faster for long-padding cases. I've done some cursory tests for alternate values of `PAD_BUF_SIZE`, but the results definitely won't generalize to other architectures, and probably not even to other implementations of the same architecture. The buffer size of 20 is chosen as the smallest size that easily admits a later implementation of #18092 to support padding with grouping characters, to avoid pessimizing the short-padding cases any more than required. I've also explored alternatives involving using `alloca` for the padding buffer, but the conditionals and fallback logic needed to bound stack usage for the pathological cases end up pessimizing code size beyond what's reasonable for the very marginal additional speed gains. Signed-off-by: Anson Mansfield <[email protected]>
1 parent 718b28a commit 674d78f

File tree

1 file changed

+38
-46
lines changed

1 file changed

+38
-46
lines changed

py/mpprint.c

Lines changed: 38 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
#include "py/formatfloat.h"
4141
#endif
4242

43-
static const char pad_spaces[] = " ";
44-
static const char pad_zeroes[] = "0000000000000000";
4543

4644
static void plat_print_strn(void *env, const char *str, size_t len) {
4745
(void)env;
@@ -58,62 +56,56 @@ int mp_print_str(const mp_print_t *print, const char *str) {
5856
return len;
5957
}
6058

59+
// Efficiently print `count` chars looping over `str[len]`.
60+
static void print_strn_cycle(const mp_print_t *print, const char *str, size_t len, size_t count) {
61+
while (count > len) {
62+
print->print_strn(print->data, str, len);
63+
count -= len;
64+
}
65+
if (count > 0) {
66+
print->print_strn(print->data, str, count);
67+
}
68+
}
69+
70+
// lcm(4,5) to match stride for both comma and underscore grouping
71+
#define PAD_BUF_SIZE 20
72+
6173
int mp_print_strn(const mp_print_t *print, const char *str, size_t len, unsigned int flags, char fill, int width) {
6274
int left_pad = 0;
6375
int right_pad = 0;
6476
int pad = width - len;
65-
int pad_size;
6677
int total_chars_printed = 0;
67-
const char *pad_chars;
68-
69-
if (!fill || fill == ' ') {
70-
pad_chars = pad_spaces;
71-
pad_size = sizeof(pad_spaces) - 1;
72-
} else if (fill == '0') {
73-
pad_chars = pad_zeroes;
74-
pad_size = sizeof(pad_zeroes) - 1;
75-
} else {
76-
// Other pad characters are fairly unusual, so we'll take the hit
77-
// and output them 1 at a time.
78-
pad_chars = &fill;
79-
pad_size = 1;
80-
}
8178

82-
if (flags & PF_FLAG_CENTER_ADJUST) {
83-
left_pad = pad / 2;
84-
right_pad = pad - left_pad;
85-
} else if (flags & PF_FLAG_LEFT_ADJUST) {
86-
right_pad = pad;
87-
} else {
88-
left_pad = pad;
89-
}
79+
// alignment ensures fast memset
80+
char pad_buf[PAD_BUF_SIZE] __attribute__ ((aligned(4)));
81+
82+
if (pad > 0) {
83+
fill = fill ? fill : ' ';
84+
memset(pad_buf, fill, sizeof(pad_buf));
85+
86+
if (flags & PF_FLAG_CENTER_ADJUST) {
87+
left_pad = pad / 2;
88+
right_pad = pad - left_pad;
89+
} else if (flags & PF_FLAG_LEFT_ADJUST) {
90+
left_pad = 0;
91+
right_pad = pad;
92+
} else {
93+
left_pad = pad;
94+
right_pad = 0;
95+
}
9096

91-
if (left_pad > 0) {
97+
print_strn_cycle(print, pad_buf, sizeof(pad_buf), left_pad);
9298
total_chars_printed += left_pad;
93-
while (left_pad > 0) {
94-
int p = left_pad;
95-
if (p > pad_size) {
96-
p = pad_size;
97-
}
98-
print->print_strn(print->data, pad_chars, p);
99-
left_pad -= p;
100-
}
10199
}
102-
if (len) {
100+
101+
if (MP_LIKELY(len)) {
103102
print->print_strn(print->data, str, len);
104103
total_chars_printed += len;
105104
}
106-
if (right_pad > 0) {
107-
total_chars_printed += right_pad;
108-
while (right_pad > 0) {
109-
int p = right_pad;
110-
if (p > pad_size) {
111-
p = pad_size;
112-
}
113-
print->print_strn(print->data, pad_chars, p);
114-
right_pad -= p;
115-
}
116-
}
105+
106+
print_strn_cycle(print, pad_buf, sizeof(pad_buf), right_pad);
107+
total_chars_printed += right_pad;
108+
117109
return total_chars_printed;
118110
}
119111

0 commit comments

Comments
 (0)