Skip to content

Commit 092c1fc

Browse files
MacDuec-rhodes
authored andcommitted
[AArch64][SME] Disable tail calls in new ZA/ZT0 functions (#177152)
Allowing this can result in invalid tail calls to shared ZA functions. It may be possible to limit this to the case where the caller is private ZA and the callee shares ZA, but for now it is generally disabled. (cherry picked from commit 10aca26)
1 parent 24367c9 commit 092c1fc

2 files changed

Lines changed: 80 additions & 1 deletion

File tree

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9328,7 +9328,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
93289328
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
93299329
CallAttrs.requiresPreservingAllZAState() ||
93309330
CallAttrs.requiresPreservingZT0() ||
9331-
CallAttrs.caller().hasStreamingBody())
9331+
CallAttrs.caller().hasStreamingBody() || CallAttrs.caller().isNewZA() ||
9332+
CallAttrs.caller().isNewZT0())
93329333
return false;
93339334

93349335
// Functions using the C or Fast calling convention that have an SVE signature
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -O3 -verify-machineinstrs < %s | FileCheck %s
3+
4+
declare void @inout_za_zt0() "aarch64_inout_za" "aarch64_inout_zt0"
5+
6+
define void @new_za_zt0() "aarch64_new_za" "aarch64_new_zt0" {
7+
; CHECK-LABEL: new_za_zt0:
8+
; CHECK: // %bb.0: // %entry
9+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
10+
; CHECK-NEXT: .cfi_def_cfa_offset 16
11+
; CHECK-NEXT: .cfi_offset w30, -16
12+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
13+
; CHECK-NEXT: cbz x8, .LBB0_2
14+
; CHECK-NEXT: // %bb.1: // %entry
15+
; CHECK-NEXT: bl __arm_tpidr2_save
16+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
17+
; CHECK-NEXT: zero {za}
18+
; CHECK-NEXT: zero { zt0 }
19+
; CHECK-NEXT: .LBB0_2: // %entry
20+
; CHECK-NEXT: smstart za
21+
; CHECK-NEXT: bl inout_za_zt0
22+
; CHECK-NEXT: smstop za
23+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
24+
; CHECK-NEXT: ret
25+
entry:
26+
tail call void @inout_za_zt0()
27+
ret void
28+
}
29+
30+
declare void @inout_za() "aarch64_inout_za"
31+
32+
define void @new_za() "aarch64_new_za" {
33+
; CHECK-LABEL: new_za:
34+
; CHECK: // %bb.0: // %entry
35+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
36+
; CHECK-NEXT: .cfi_def_cfa_offset 16
37+
; CHECK-NEXT: .cfi_offset w30, -16
38+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
39+
; CHECK-NEXT: cbz x8, .LBB1_2
40+
; CHECK-NEXT: // %bb.1: // %entry
41+
; CHECK-NEXT: bl __arm_tpidr2_save
42+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
43+
; CHECK-NEXT: zero {za}
44+
; CHECK-NEXT: .LBB1_2: // %entry
45+
; CHECK-NEXT: smstart za
46+
; CHECK-NEXT: bl inout_za
47+
; CHECK-NEXT: smstop za
48+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
49+
; CHECK-NEXT: ret
50+
entry:
51+
tail call void @inout_za()
52+
ret void
53+
}
54+
55+
declare void @inout_zt0() "aarch64_inout_zt0"
56+
57+
define void @new_zt0() "aarch64_new_zt0" {
58+
; CHECK-LABEL: new_zt0:
59+
; CHECK: // %bb.0: // %entry
60+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
61+
; CHECK-NEXT: .cfi_def_cfa_offset 16
62+
; CHECK-NEXT: .cfi_offset w30, -16
63+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
64+
; CHECK-NEXT: cbz x8, .LBB2_2
65+
; CHECK-NEXT: // %bb.1: // %entry
66+
; CHECK-NEXT: bl __arm_tpidr2_save
67+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
68+
; CHECK-NEXT: zero { zt0 }
69+
; CHECK-NEXT: .LBB2_2: // %entry
70+
; CHECK-NEXT: smstart za
71+
; CHECK-NEXT: bl inout_zt0
72+
; CHECK-NEXT: smstop za
73+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
74+
; CHECK-NEXT: ret
75+
entry:
76+
tail call void @inout_zt0()
77+
ret void
78+
}

0 commit comments

Comments
 (0)