Skip to content

Conversation

@Gadgetoid
Copy link
Contributor

Use pio_sm_set_pins_with_mask64 and pio_sm_set_pindirs_with_mask64 in lieu of manually shifting by the pin base.

These functions were added in SDK 2.1.0.

I matched, to a certain extent, the form of #16191

Somewhat related to #16199

Testing

I'll get 'round to it, I swear.

Trade-offs and Alternatives

None, these functions should be used for clarity and brevity. Though sheesh I don't like the uint64_t casts 🫠

Use pio_sm_set_pins_with_mask64 and pio_sm_set_pindirs_with_mask64
in lieu of manually shifting by the pin base.

These functions were added in SDK 2.1.0.

Signed-off-by: Phil Howard <[email protected]>
@Gadgetoid Gadgetoid force-pushed the patch-rp2-pio-maskpins branch from e5950ae to d452e14 Compare March 21, 2025 17:13
@github-actions
Copy link

Code size report:

   bare-arm:    +0 +0.000% 
minimal x86:    +0 +0.000% 
   unix x64:    +0 +0.000% standard
      stm32:    +0 +0.000% PYBV10
     mimxrt:    +0 +0.000% TEENSY40
        rp2:   +64 +0.007% RPI_PICO_W
       samd:    +0 +0.000% ADAFRUIT_ITSYBITSY_M4_EXPRESS
  qemu rv32:    +0 +0.000% VIRT_RV32

@Gadgetoid
Copy link
Contributor Author

rp2:   +64 +0.007% RPI_PICO_W

I guess I should have seen that coming 🤦

@AJMansfield
Copy link
Contributor

Out of curiosity I looked into why it's such a huge increase, and it's because all of those 64-bit operations are done as bl ____aeabi_llsl_veneer calls.

Compare the original disassembly:

1002f5d0 <asm_pio_init_gpio>:
1002f5d0:	b5f8      	push	{r3, r4, r5, r6, r7, lr}
1002f5d2:	2300      	movs	r3, #0
1002f5d4:	2601      	movs	r6, #1
1002f5d6:	56d3      	ldrsb	r3, [r2, r3]
1002f5d8:	0015      	movs	r5, r2
1002f5da:	7852      	ldrb	r2, [r2, #1]
1002f5dc:	0004      	movs	r4, r0
1002f5de:	4096      	lsls	r6, r2
1002f5e0:	3e01      	subs	r6, #1
1002f5e2:	409e      	lsls	r6, r3
1002f5e4:	68aa      	ldr	r2, [r5, #8]
1002f5e6:	000f      	movs	r7, r1
1002f5e8:	409a      	lsls	r2, r3
1002f5ea:	0033      	movs	r3, r6
1002f5ec:	f004 ffe8 	bl	100345c0 <pio_sm_set_pins_with_mask>
1002f5f0:	2300      	movs	r3, #0
1002f5f2:	686a      	ldr	r2, [r5, #4]
1002f5f4:	56eb      	ldrsb	r3, [r5, r3]
1002f5f6:	0020      	movs	r0, r4
1002f5f8:	409a      	lsls	r2, r3
1002f5fa:	0033      	movs	r3, r6
1002f5fc:	2600      	movs	r6, #0
1002f5fe:	0039      	movs	r1, r7
1002f600:	f005 f81a 	bl	10034638 <pio_sm_set_pindirs_with_mask>
1002f604:	4b08      	ldr	r3, [pc, #32]	; (1002f628 <asm_pio_init_gpio+0x58>)
1002f606:	18e4      	adds	r4, r4, r3
1002f608:	0d24      	lsrs	r4, r4, #20
1002f60a:	3406      	adds	r4, #6
1002f60c:	b2e4      	uxtb	r4, r4
1002f60e:	786b      	ldrb	r3, [r5, #1]
1002f610:	42b3      	cmp	r3, r6
1002f612:	d800      	bhi.n	1002f616 <asm_pio_init_gpio+0x46>
1002f614:	bdf8      	pop	{r3, r4, r5, r6, r7, pc}
1002f616:	2000      	movs	r0, #0
1002f618:	5628      	ldrsb	r0, [r5, r0]
1002f61a:	0021      	movs	r1, r4
1002f61c:	1980      	adds	r0, r0, r6
1002f61e:	f003 facb 	bl	10032bb8 <gpio_set_function>
1002f622:	3601      	adds	r6, #1
1002f624:	e7f3      	b.n	1002f60e <asm_pio_init_gpio+0x3e>
1002f626:	46c0      	nop			; (mov r8, r8)
1002f628:	afe00000 	.word	0xafe00000

versus the new disassembly:

1002f5d0 <asm_pio_init_gpio>:
1002f5d0:	b5f0      	push	{r4, r5, r6, r7, lr}
1002f5d2:	2500      	movs	r5, #0
1002f5d4:	b085      	sub	sp, #20
1002f5d6:	5755      	ldrsb	r5, [r2, r5]
1002f5d8:	0007      	movs	r7, r0
1002f5da:	0014      	movs	r4, r2
1002f5dc:	000e      	movs	r6, r1
1002f5de:	7852      	ldrb	r2, [r2, #1]
1002f5e0:	2001      	movs	r0, #1
1002f5e2:	2100      	movs	r1, #0
1002f5e4:	f00d fac0 	bl	1003cb68 <____aeabi_llsl_veneer>
1002f5e8:	2201      	movs	r2, #1
1002f5ea:	4252      	negs	r2, r2
1002f5ec:	17d3      	asrs	r3, r2, #31
1002f5ee:	1880      	adds	r0, r0, r2
1002f5f0:	4159      	adcs	r1, r3
1002f5f2:	002a      	movs	r2, r5
1002f5f4:	f00d fab8 	bl	1003cb68 <____aeabi_llsl_veneer>
1002f5f8:	002a      	movs	r2, r5
1002f5fa:	9002      	str	r0, [sp, #8]
1002f5fc:	9103      	str	r1, [sp, #12]
1002f5fe:	68a0      	ldr	r0, [r4, #8]
1002f600:	2100      	movs	r1, #0
1002f602:	f00d fab1 	bl	1003cb68 <____aeabi_llsl_veneer>
1002f606:	000b      	movs	r3, r1
1002f608:	9902      	ldr	r1, [sp, #8]
1002f60a:	0002      	movs	r2, r0
1002f60c:	9100      	str	r1, [sp, #0]
1002f60e:	9903      	ldr	r1, [sp, #12]
1002f610:	0038      	movs	r0, r7
1002f612:	9101      	str	r1, [sp, #4]
1002f614:	0031      	movs	r1, r6
1002f616:	f005 f82b 	bl	10034670 <pio_sm_set_pins_with_mask64>
1002f61a:	2200      	movs	r2, #0
1002f61c:	6860      	ldr	r0, [r4, #4]
1002f61e:	56a2      	ldrsb	r2, [r4, r2]
1002f620:	2100      	movs	r1, #0
1002f622:	f00d faa1 	bl	1003cb68 <____aeabi_llsl_veneer>
1002f626:	2500      	movs	r5, #0
1002f628:	000b      	movs	r3, r1
1002f62a:	9902      	ldr	r1, [sp, #8]
1002f62c:	0002      	movs	r2, r0
1002f62e:	9100      	str	r1, [sp, #0]
1002f630:	9903      	ldr	r1, [sp, #12]
1002f632:	0038      	movs	r0, r7
1002f634:	9101      	str	r1, [sp, #4]
1002f636:	0031      	movs	r1, r6
1002f638:	f005 f85e 	bl	100346f8 <pio_sm_set_pindirs_with_mask64>
1002f63c:	4b08      	ldr	r3, [pc, #32]	; (1002f660 <asm_pio_init_gpio+0x90>)
1002f63e:	18ff      	adds	r7, r7, r3
1002f640:	0d3f      	lsrs	r7, r7, #20
1002f642:	3706      	adds	r7, #6
1002f644:	b2ff      	uxtb	r7, r7
1002f646:	7863      	ldrb	r3, [r4, #1]
1002f648:	42ab      	cmp	r3, r5
1002f64a:	d801      	bhi.n	1002f650 <asm_pio_init_gpio+0x80>
1002f64c:	b005      	add	sp, #20
1002f64e:	bdf0      	pop	{r4, r5, r6, r7, pc}
1002f650:	2000      	movs	r0, #0
1002f652:	5620      	ldrsb	r0, [r4, r0]
1002f654:	0039      	movs	r1, r7
1002f656:	1940      	adds	r0, r0, r5
1002f658:	f003 faca 	bl	10032bf0 <gpio_set_function>
1002f65c:	3501      	adds	r5, #1
1002f65e:	e7f2      	b.n	1002f646 <asm_pio_init_gpio+0x76>
1002f660:	afe00000 	.word	0xafe00000

Copy link
Contributor

@AJMansfield AJMansfield left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems like an absurd size increase for a feature that adds literally no functionality. If this were the thing needed to add some missing corner-case to fully support upper bank pin PIO, that would be one thing (even if I'd still suggest gating it on #if PICO_PIO_USE_GPIO_BASE) -- but the previous code already offers complete support for the upper bank access.

Copy link
Contributor

@AJMansfield AJMansfield left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is a change we want, recommending to close.

@Gadgetoid
Copy link
Contributor Author

Looks like these same 64bit ops might be leaked into Pico 2 and Pico 2 W builds:

https://github.com/raspberrypi/pico-sdk/blob/bddd20f928ce76142793bef434d4f75f4af6e433/src/rp2_common/hardware_pio/pio.c#L261-L267

via:

static inline void mp_hal_pin_low(mp_hal_pin_obj_t pin) {
gpio_clr_mask64(UINT64_C(1) << pin);
}
static inline void mp_hal_pin_high(mp_hal_pin_obj_t pin) {
gpio_set_mask64(UINT64_C(1) << pin);
}

Gating them probably wouldn't help to accomplish the original goal of clarity, but I wouldn't consider 64 bytes "huge" enough to weigh so heavily in a correctness vs size tradeoff.

Would this even have been a debate if these SDK functions were available when the original PIO changes were made?

@AJMansfield
Copy link
Contributor

AJMansfield commented Mar 21, 2025

in a correctness vs size tradeoff.

The only correctness issue that this solves is one it replaces with a functional deficiency.

The hardware supports using pin ranges that wrap at one of the two possible 32-pin upper boundaries. By which I mean, it's completely valid and supported to use [30, 31, 0, 1] or [46, 47, 16, 17] as contiguous sets of PIO pins.

The previous 32-bit version had a mild correctness bug, where it would blindly truncate those bit masks, and as a result only configure [30, 31] or [46, 47]. That's not what it should do, but it's at least easy enough to work around; a user could still set up the rest of those pins themselves if they had to.

In the new code, though, that is no longer a possible configuration at all; an attempt to set up a configuration like that would instead fault on check_pio_pin_mask64; and that's not a recoverable fault in the context of micropython user code.

For the 64-bit version to even itself be correct, it would need to check that parameter and raise an appropriate python exception; but the actual correct version is to just use the 32-bit operations but replace the regular bitshifts with bit rotations.

EDIT: Errors of that sort will become recoverable if #16991 is merged, so there's an argument to be made that with that added, the 64-bit version does have the more correct behavior. It's still not ideal, though; the wrapping use-case ought to be supported.

@AJMansfield
Copy link
Contributor

AJMansfield commented Mar 21, 2025

Here's what I'd propose:

static inline uint32_t rotl32a(uint32_t x, uint32_t n) {
  return (x<<n) | (x>>(32-n));
}
static void asm_pio_init_gpio(PIO pio, uint32_t sm, asm_pio_config_t *config) {
    uint32_t gpio_base = pio_get_gpio_base(pio);
    uint32_t pinmask = rotl32a( ~(-1 << config->count), config->base - gpio_base);
    pio_sm_set_pins_with_mask(pio, sm, rotl32a(config->pinvals, config->base - gpio_base), pinmask);
    pio_sm_set_pindirs_with_mask(pio, sm, rotl32a(config->pindirs, config->base - gpio_base), pinmask);
    for (size_t i = 0; i < 32; ++i) {
        if (pinmask & (1<<i)) {
            gpio_set_function(gpio_base + i, GPIO_FUNC_PIO0 + pio_get_index(pio));
        }
    }
}

@dpgeorge
Copy link
Member

Regarding the code size and calls to ____aeabi_llsl_veneer: knowing about this now, I think it's actually worth revisiting the use of 64-bit numbers with GPIO to not use the 64-bit shifting on rp2040. Pin code should be as fast as possible (and also being smaller is better). I think on rp2350 it's not such a big deal because the 64-bit shift can be more easily done in hardware, so there's no call to a helper function.

Regarding this PR itself: I tend to agree with @AJMansfield above that it's probably a good idea to have the ability to wrap the pin configuration around back to pin 0 or pin 16. Would need to first check that that's actually allowed by all the other bits of MicroPython PIO code.

@sfe-SparkFro
Copy link
Contributor

Hi folks, I believe this would be a fix for #16199 according to my testing with this replication code: #16199 (comment)

Would appreciate this PR being re-evaluated. 64 bytes sounds like a reasonable trade-off to me for an important bug fix!

@jepler
Copy link
Contributor

jepler commented Sep 6, 2025

fwiw in circuitpython we made some typedefs and convenience macros to adapt to whichever situation...

// pio_pinmask_t can hold ANY pin masks, so it is used before selection of gpiobase
#if NUM_BANK0_GPIOS > 32
typedef struct { uint64_t value;
} pio_pinmask_t;
... #else
typedef struct { uint32_t value;
} pio_pinmask_t;
typedef uint32_t pio_pinmask_value_t;

this is vagely similar to mp's machine_pin_open_drain_mask; typedef.

here was my PR if you want more of the little details: adafruit#9901

We decided to punt on wraparound. I think this was based on discord discussion, but basically we found no evidence that wraparound had ever worked on rp2040 in circuitpython and that meant nobody had ever asked for it either.

@sfe-SparkFro
Copy link
Contributor

@dpgeorge @AJMansfield @Gadgetoid Can this PR please be re-evaluated? This appears to fix #16199 from my own testing. I'm working on a product that requires use of PIO on GPIO >=32, and the product won't function without #16199 being fixed.

Thanks!

@AJMansfield
Copy link
Contributor

I've been meaning to get some proper RP2350B hardware to test things like this on anyway...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants