Skip to content

Commit 890b96f

Browse files
committed
cmd/asm, cmd/internal/obj: generate proper atomic ops for riscv64
Go's memory model closely follows the approach C++ concurrency memory model (https://go.dev/ref/mem) and Go atomic "has the same semantics as C++'s sequentially consistent atomics". Meanwhile according to RISCV manual A.6 "Mappings from C/C++ primitives to RISC-V primitives". C/C++ atomic operations (memory_order_acq_rel) should be map to "amo<op>.{w|d}.aqrl" LR/SC (memory_order_acq_rel) should map to "lr.{w|d}.aq; <op>; sc.{w|d}.rl" goos: linux goarch: riscv64 pkg: runtime/internal/atomic │ atomic.old.bench │ atomic.new.bench │ │ sec/op │ sec/op vs base │ AtomicLoad64-4 4.216n ± 1% 4.202n ± 0% ~ (p=0.127 n=10) AtomicStore64-4 5.040n ± 0% 6.718n ± 0% +33.30% (p=0.000 n=10) AtomicLoad-4 4.217n ± 0% 4.213n ± 0% ~ (p=0.145 n=10) AtomicStore-4 5.040n ± 0% 6.718n ± 0% +33.30% (p=0.000 n=10) And8-4 9.237n ± 0% 9.240n ± 0% ~ (p=0.582 n=10) And-4 5.878n ± 0% 6.719n ± 0% +14.31% (p=0.000 n=10) And8Parallel-4 28.44n ± 0% 28.46n ± 0% +0.07% (p=0.000 n=10) AndParallel-4 28.40n ± 0% 28.43n ± 0% +0.11% (p=0.000 n=10) Or8-4 8.399n ± 0% 8.398n ± 0% ~ (p=0.357 n=10) Or-4 5.879n ± 0% 6.718n ± 0% +14.27% (p=0.000 n=10) Or8Parallel-4 28.43n ± 0% 28.45n ± 0% +0.09% (p=0.000 n=10) OrParallel-4 28.40n ± 0% 28.43n ± 0% +0.11% (p=0.000 n=10) Xadd-4 30.05n ± 0% 30.10n ± 0% +0.18% (p=0.000 n=10) Xadd64-4 30.05n ± 0% 30.09n ± 0% +0.12% (p=0.000 n=10) Cas-4 60.48n ± 0% 61.13n ± 0% +1.08% (p=0.000 n=10) Cas64-4 62.28n ± 0% 62.34n ± 0% ~ (p=0.810 n=10) Xchg-4 30.05n ± 0% 30.09n ± 0% +0.15% (p=0.000 n=10) Xchg64-4 30.05n ± 0% 30.09n ± 0% +0.13% (p=0.000 n=10) geomean 15.42n 16.17n +4.89% Fixes #61295 Change-Id: I97b5325db50467eeec36fb079bded7b09a32330f Reviewed-on: https://go-review.googlesource.com/c/go/+/508715 Reviewed-by: Austin Clements <[email protected]> Reviewed-by: Joel Sing <[email protected]> Run-TryBot: M Zhuo <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Bryan Mills <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 5fe3f0a commit 890b96f

2 files changed

Lines changed: 29 additions & 24 deletions

File tree

src/cmd/asm/internal/asm/testdata/riscv64.s

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -183,28 +183,28 @@ start:
183183
// 8.2: Load-Reserved/Store-Conditional
184184
LRW (X5), X6 // 2fa30214
185185
LRD (X5), X6 // 2fb30214
186-
SCW X5, (X6), X7 // af23531c
187-
SCD X5, (X6), X7 // af33531c
186+
SCW X5, (X6), X7 // af23531a
187+
SCD X5, (X6), X7 // af33531a
188188

189189
// 8.3: Atomic Memory Operations
190-
AMOSWAPW X5, (X6), X7 // af23530c
191-
AMOSWAPD X5, (X6), X7 // af33530c
192-
AMOADDW X5, (X6), X7 // af235304
193-
AMOADDD X5, (X6), X7 // af335304
194-
AMOANDW X5, (X6), X7 // af235364
195-
AMOANDD X5, (X6), X7 // af335364
196-
AMOORW X5, (X6), X7 // af235344
197-
AMOORD X5, (X6), X7 // af335344
198-
AMOXORW X5, (X6), X7 // af235324
199-
AMOXORD X5, (X6), X7 // af335324
200-
AMOMAXW X5, (X6), X7 // af2353a4
201-
AMOMAXD X5, (X6), X7 // af3353a4
202-
AMOMAXUW X5, (X6), X7 // af2353e4
203-
AMOMAXUD X5, (X6), X7 // af3353e4
204-
AMOMINW X5, (X6), X7 // af235384
205-
AMOMIND X5, (X6), X7 // af335384
206-
AMOMINUW X5, (X6), X7 // af2353c4
207-
AMOMINUD X5, (X6), X7 // af3353c4
190+
AMOSWAPW X5, (X6), X7 // af23530e
191+
AMOSWAPD X5, (X6), X7 // af33530e
192+
AMOADDW X5, (X6), X7 // af235306
193+
AMOADDD X5, (X6), X7 // af335306
194+
AMOANDW X5, (X6), X7 // af235366
195+
AMOANDD X5, (X6), X7 // af335366
196+
AMOORW X5, (X6), X7 // af235346
197+
AMOORD X5, (X6), X7 // af335346
198+
AMOXORW X5, (X6), X7 // af235326
199+
AMOXORD X5, (X6), X7 // af335326
200+
AMOMAXW X5, (X6), X7 // af2353a6
201+
AMOMAXD X5, (X6), X7 // af3353a6
202+
AMOMAXUW X5, (X6), X7 // af2353e6
203+
AMOMAXUD X5, (X6), X7 // af3353e6
204+
AMOMINW X5, (X6), X7 // af235386
205+
AMOMIND X5, (X6), X7 // af335386
206+
AMOMINUW X5, (X6), X7 // af2353c6
207+
AMOMINUD X5, (X6), X7 // af3353c6
208208

209209
// 10.1: Base Counters and Timers
210210
RDCYCLE X5 // f32200c0

src/cmd/internal/obj/riscv/obj.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2067,17 +2067,22 @@ func instructionsForProg(p *obj.Prog) []*instruction {
20672067
return instructionsForStore(p, ins.as, p.To.Reg)
20682068

20692069
case ALRW, ALRD:
2070-
// Set aq to use acquire access ordering, which matches Go's memory requirements.
2070+
// Set aq to use acquire access ordering
20712071
ins.funct7 = 2
20722072
ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
20732073

20742074
case AADDI, AANDI, AORI, AXORI:
20752075
inss = instructionsForOpImmediate(p, ins.as, p.Reg)
20762076

2077-
case ASCW, ASCD, AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
2077+
case ASCW, ASCD:
2078+
// Set release access ordering
2079+
ins.funct7 = 1
2080+
ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
2081+
2082+
case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
20782083
AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
2079-
// Set aq to use acquire access ordering, which matches Go's memory requirements.
2080-
ins.funct7 = 2
2084+
// Set aqrl to use acquire & release access ordering
2085+
ins.funct7 = 3
20812086
ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
20822087

20832088
case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:

0 commit comments

Comments
 (0)