[Clang][LLVM] Disable NonLazyBind when pointer authentication is enabled#188638
[Clang][LLVM] Disable NonLazyBind when pointer authentication is enabled#188638oskarwirga wants to merge 9 commits intollvm:mainfrom
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang-codegen Author: Oskar Wirga (oskarwirga) ChangesThis is part of work being done in #188378.
Conditionally disable NonLazyBind in all callsites:
The clang-side checks use PointerAuth.FunctionPointers.isEnabled(). The LLVM-side check uses Triple.isArm64e() because CodeGenOptions are not available at that level of the pipeline. AI Disclaimer: 6 Files Affected:
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b57802ebfced8..f67e34ebd8b46 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2687,7 +2687,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// is used.
// FIXME: what if we just haven't processed the function definition
// yet, or if it's an external definition like C99 inline?
- if (CodeGenOpts.NoPLT) {
+ // Suppress NonLazyBind when ptrauth is enabled: inline GOT loads
+ // bypass authentication stubs.
+ if (CodeGenOpts.NoPLT &&
+ !CodeGenOpts.PointerAuth.FunctionPointers.isEnabled()) {
if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
if (!Fn->isDefined() && !AttrOnCallSite) {
FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind);
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 10aad2e26938d..0a105b761ac44 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -2299,10 +2299,12 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF,
llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName);
- // We have Native ARC, so set nonlazybind attribute for performance
- if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee()))
- if (fnName == "objc_retain")
- f->addFnAttr(llvm::Attribute::NonLazyBind);
+ // We have Native ARC, so set nonlazybind attribute for performance.
+ // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs).
+ if (!CGF.CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled())
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee()))
+ if (fnName == "objc_retain")
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
}
// Cast the argument to 'id'.
@@ -2875,9 +2877,11 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value,
llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
fn = CGM.CreateRuntimeFunction(fnType, "objc_release");
setARCRuntimeFunctionLinkage(CGM, fn);
- // We have Native ARC, so set nonlazybind attribute for performance
- if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee()))
- f->addFnAttr(llvm::Attribute::NonLazyBind);
+ // We have Native ARC, so set nonlazybind attribute for performance.
+ // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs).
+ if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled())
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee()))
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
}
// Cast the argument to 'id'.
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index e6c244547cefd..296c7d5cd5309 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -63,12 +63,17 @@ class ObjCCommonTypesHelper {
llvm::FunctionCallee getMessageSendFn() const {
// Add the non-lazy-bind attribute, since objc_msgSend is likely to
// be called a lot.
+ // Suppress when ptrauth is enabled (inline GOT loads bypass auth stubs).
llvm::Type *params[] = {ObjectPtrTy, SelectorPtrTy};
+ llvm::AttributeList Attrs;
+ if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) {
+ Attrs = llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind);
+ }
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend",
- llvm::AttributeList::get(CGM.getLLVMContext(),
- llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ Attrs);
}
/// void objc_msgSend_stret (id, SEL, ...)
@@ -551,11 +556,14 @@ class ObjCTypesHelper : public ObjCCommonTypesHelper {
llvm::FunctionCallee getSetJmpFn() {
// This is specifically the prototype for x86.
llvm::Type *params[] = {CGM.DefaultPtrTy};
+ llvm::AttributeList Attrs;
+ if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) {
+ Attrs = llvm::AttributeList::get(CGM.getLLVMContext(),
+ llvm::AttributeList::FunctionIndex,
+ llvm::Attribute::NonLazyBind);
+ }
return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp",
- llvm::AttributeList::get(CGM.getLLVMContext(),
- llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NonLazyBind));
+ llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", Attrs);
}
public:
@@ -700,13 +708,14 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper {
// classref except by calling this function.
llvm::Type *params[] = {Int8PtrPtrTy};
llvm::LLVMContext &C = CGM.getLLVMContext();
- llvm::AttributeSet AS = llvm::AttributeSet::get(
- C, {
- llvm::Attribute::get(C, llvm::Attribute::NonLazyBind),
- llvm::Attribute::getWithMemoryEffects(
- C, llvm::MemoryEffects::none()),
- llvm::Attribute::get(C, llvm::Attribute::NoUnwind),
- });
+ llvm::SmallVector<llvm::Attribute, 3> AttrVec;
+ if (!CGM.getCodeGenOpts().PointerAuth.FunctionPointers.isEnabled()) {
+ AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NonLazyBind));
+ }
+ AttrVec.push_back(
+ llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()));
+ AttrVec.push_back(llvm::Attribute::get(C, llvm::Attribute::NoUnwind));
+ llvm::AttributeSet AS = llvm::AttributeSet::get(C, AttrVec);
llvm::FunctionCallee F = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(ClassnfABIPtrTy, params, false),
"objc_loadClassref",
diff --git a/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c b/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c
new file mode 100644
index 0000000000000..cd0dab8a01468
--- /dev/null
+++ b/clang/test/CodeGen/ptrauth-suppress-nonlazybind.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple arm64e-apple-ios -fptrauth-calls -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=PTRAUTH
+// RUN: %clang_cc1 -triple arm64-apple-ios -fno-plt -emit-llvm %s -o - | FileCheck %s --check-prefix=NOPTRAUTH
+//
+// When pointer authentication is enabled (-fptrauth-calls), NonLazyBind
+// must NOT be applied. NonLazyBind causes inline GOT loads that bypass
+// the linker's authentication stubs, leading to crashes on arm64e where
+// GOT entries are PAC-signed.
+
+void external_function(void);
+
+void caller(void) {
+ external_function();
+}
+
+// With ptrauth enabled, the declaration should NOT have nonlazybind.
+// PTRAUTH: declare{{.*}} void @external_function()
+// PTRAUTH-NOT: nonlazybind
+
+// Without ptrauth, -fno-plt adds nonlazybind normally.
+// NOPTRAUTH: ; Function Attrs:{{.*}}nonlazybind
+// NOPTRAUTH: declare{{.*}} void @external_function()
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0544995f979f7..c96a8c337c9f8 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -37,6 +37,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
@@ -169,7 +170,10 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn,
if (setNonLazyBind && !Fn->isWeakForLinker()) {
// If we have Native ARC, set nonlazybind attribute for these APIs for
// performance.
- Fn->addFnAttr(Attribute::NonLazyBind);
+ // Suppress on arm64e: inline GOT loads bypass auth stubs.
+ Triple TT(M->getTargetTriple());
+ if (!TT.isArm64e())
+ Fn->addFnAttr(Attribute::NonLazyBind);
}
}
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll
new file mode 100644
index 0000000000000..4c5c951054fdc
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/ptrauth-objc-arc.ll
@@ -0,0 +1,29 @@
+; RUN: opt -mtriple=arm64e-apple-ios -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s --check-prefix=ARM64E
+; RUN: opt -mtriple=arm64-apple-ios -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s --check-prefix=ARM64
+;
+; Test that objc_retain and objc_release do not get nonlazybind on arm64e.
+
+define ptr @test_objc_retain(ptr %arg0) {
+entry:
+ %0 = call ptr @llvm.objc.retain(ptr %arg0)
+ ret ptr %0
+}
+
+define void @test_objc_release(ptr %arg0) {
+entry:
+ call void @llvm.objc.release(ptr %arg0)
+ ret void
+}
+
+declare void @llvm.objc.release(ptr)
+declare ptr @llvm.objc.retain(ptr)
+
+; arm64e: retain and release should NOT have nonlazybind.
+; ARM64E-DAG: declare void @objc_release(ptr)
+; ARM64E-DAG: declare ptr @objc_retain(ptr)
+; ARM64E-NOT: nonlazybind
+
+; arm64: retain and release should have nonlazybind.
+; ARM64-DAG: declare void @objc_release(ptr) [[NLB:#[0-9]+]]
+; ARM64-DAG: declare ptr @objc_retain(ptr) [[NLB]]
+; ARM64: attributes [[NLB]] = { nonlazybind }
|
NonLazyBind causes the compiler to emit inline GOT loads that bypass the linker's authentication stubs. On arm64e, calls must go through stubs that load from __auth_got and authenticate via braa; NonLazyBind skips this by loading directly from __got. Conditionally disable NonLazyBind in all callsites: - CGCall.cpp: general -fno-plt path - CGObjC.cpp: objc_retain, objc_release (Native ARC) - CGObjCMac.cpp: objc_msgSend, _setjmp, objc_loadClassref - PreISelIntrinsicLowering.cpp: ObjC runtime lowering The clang-side checks use PointerAuth.FunctionPointers.isEnabled(). The LLVM-side check uses Triple.isArm64e() because CodeGenOptions are not available at that level of the pipeline.
3a5e904 to
afc90ae
Compare
Oh that's embarrassing! It doesn't even make sense in the context of this diff given Apologies for any thrash :( if theres anything else I can do to reduce review burden on these PRs I am happy to do so. I really want to unblock PAC in Rust so we can land it, but I want to do so the right way :) |
efriedma-quic
left a comment
There was a problem hiding this comment.
This fix doesn't seem right. If the backend miscompiles NonLazyBind, the right response isn't to avoid using NonLazyBind; we should just fix NonLazyBind so it works.
(From your description, it doesn't seem like there's any fundamental reason NonLazyBind can't work.)
I double checked and yeah no fundamental reason it can't work! Time for me to be "non lazy" and "bind" myself to fixing this (sorry had to) One caveat is that making NonLazyBind arm64e aware will make this PR a dependent of #188378 |
kovdan01
left a comment
There was a problem hiding this comment.
Let me highlight several important points you should consider when addressing comments from other folks. Note that I do not know all the Apple-specific things, but I am familiar with PAuth in terms of Linux (pauthtest ABI) and target-neutral things.
-
Please do not mix the term "arm64e" and the
PointerAuth.FunctionPointerscodegen opt. These are not identical. Arm64e is a full-fledged Apple-specific PAuth ABI including many options, includingPointerAuth.FunctionPointers, but not limited to. This patch is definitely Apple-specific and it's not about function pointer signing, but about how arm64e PAuth ABI affects GOT. So, I'm 99% sure that you need to check against arm64e. -
As mentioned above, function pointers signing is part of arm64e. So,
-fptrauth-callsis implicitly enabled for arm64e targets. You do not need to pass that manually (as you do in your tests). -
It might be worth including "arm64e" in test names so it's clear that they are arm64e-specific.
Please note that there is a Rust project goal dedicated to pauthtest Linux ABI adoption for Rust https://github.com/rust-lang/rust-project-goals/blob/main/src/2026/aarch64_pointer_authentication_pauthtest.md We will start submitting patches under the umbrella of this goal soon. CC @jchlanda |
I appreciate all of these, thank you! Going from "technically working" to upstream ready has been hard and your points help.
Yes I saw rust-lang/rust#148640, particularly I wanted to unblock the gap @asl identified as my use case is focused on arm64e:
I have been relying on hacks in rustc to get it working, further discussion was here: rust-lang/rust#73628 (comment) |
…h.FunctionPointers Address reviewer feedback: - Check getTriple().isArm64e() instead of PointerAuth.FunctionPointers.isEnabled() in all Clang CodeGen sites. This is about how the arm64e ABI affects GOT, not about function pointer signing generically. - Remove redundant -fptrauth-calls from test (implicit for arm64e). - Rename tests to include "arm64e" in filenames. - Update CHECK prefixes from PTRAUTH/NOPTRAUTH to ARM64E/ARM64.
|
Turns out there is a fundamental reason NonLazyBind can't work on arm64e today: the MachO ABI has no relocation type for compiler-emitted auth GOT loads. Apple's own toolchain actually has this same behavior! Xcode emits unsigned GOT loads when using GlobalISel (aka when using // repro.m
@interface NSObject
+ (instancetype)alloc;
- (instancetype)init;
@end
@interface Foo : NSObject
@end
Foo *retain_test(Foo *obj) {
return obj;
}
$ xcrun --sdk iphoneos clang -target arm64e-apple-ios16 -fobjc-arc -S -o - repro.m | grep -A3 blr -B3
ldur x0, [x29, #-8]
adrp x8, _objc_retain@GOTPAGE
ldr x8, [x8, _objc_retain@GOTPAGEOFF]
blr x8 <---- This is bad
ldr x1, [sp] ; 8-byte Folded Reload
mov x2, x0
ldr x0, [sp, #8] ; 8-byte Folded ReloadI think we keep this PR which suppresses NonLazyBind on arm64e. I've also updated the tests and gating to use arm64e naming. EDIT: Included a better repro that doesn't rely on |
|
If one were to make nonlazybind use a signed pointer, that could be one way to do it. A better way would be something like LOADgotPAC, performing a call rather than a load, i.e., an atomic GOT-ADRP-LDR-BLR using x16 instead of a GPR. But anyway, more to the point: what the toolchain indeed doesn't support is |
Yeah should I just add that to this PR or make a separate one?
No it seems like we do need to outright disable NonLazyBind on arm64e, particularly because something liek llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp Lines 169 to 172 in ede5661 |
|
If there's really no way to satisfy the proper security properties with nonlazybind, we should fix the AArch64 backend to reject it. Otherwise, we should make it work. I don't have a strong opinion on what we should do if the user passes -fno-plt. |
Unauthenticated indirect branches (blr) bypass pointer authentication on arm64e. Drop NonLazyBind in GlobalISel CallLowering and RtLibUseGOT in both SelectionDAG and GlobalISel when targeting arm64e. This is defense-in-depth for the Clang-level suppression: even if NonLazyBind or RtLibUseGOT leaks into IR, the backend will not emit unsigned indirect calls on arm64e.
Good call because in the process of adding tests I found another place where un-auth'd branches were being emitted when targeting arm64e. |
…rAuth (#189474) This is part of work being done in #188378 and #188638, split out from #188650. `lowerConstantPtrAuth` silently miscompiled `ConstantPtrAuth` constants with non-`GlobalValue` pointer bases — emitting `0@AUTH(da,0)` instead of erroring. Changes: - Handle `ConstantPointerNull` bases explicitly - Error via `reportFatalUsageError` on any remaining unresolved base (e.g. nested ptrauth) instead of silently miscompiling This PR was mostly developed with LLM assistance
- Added RTLCI getter to LibcallLoweringModuleAnalysisResult - Replaced setNonLazyBind with RTLCI pointer - use getFunctionTy to move target specific conditions out into RuntimeLibcalls.cpp
There was a problem hiding this comment.
Can you cover this in test/Transforms/Utils/DeclareRuntimeLibcalls
There was a problem hiding this comment.
It turns out these functions don't get declared for any target so DeclareRuntimeLibcalls never processes them, should I leave it as is and just have arm64e-objc-arc.ll cover this or should we make this impls for darwin targets?
| if (!CGF.CGM.getTriple().isArm64e()) | ||
| if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) | ||
| if (fnName == "objc_retain") | ||
| f->addFnAttr(llvm::Attribute::NonLazyBind); |
There was a problem hiding this comment.
In theory the RuntimeLibcalls attributes should be auto-applied, but that won't happen with the current state
efriedma-quic
left a comment
There was a problem hiding this comment.
Are you planning to add code to reject nonlazybind in the backend in this patch?
Unless I misunderstood, I believe I already did so in bde6719 |
It looks like that's silently suppressing the nonlazybind attribute. |
OK so IIUC we should just error on it? |
|
Yes |
Updated the test to match!
…rAuth (llvm#189474) This is part of work being done in llvm#188378 and llvm#188638, split out from llvm#188650. `lowerConstantPtrAuth` silently miscompiled `ConstantPtrAuth` constants with non-`GlobalValue` pointer bases — emitting `0@AUTH(da,0)` instead of erroring. Changes: - Handle `ConstantPointerNull` bases explicitly - Error via `reportFatalUsageError` on any remaining unresolved base (e.g. nested ptrauth) instead of silently miscompiling This PR was mostly developed with LLM assistance
…rAuth (llvm#189474) This is part of work being done in llvm#188378 and llvm#188638, split out from llvm#188650. `lowerConstantPtrAuth` silently miscompiled `ConstantPtrAuth` constants with non-`GlobalValue` pointer bases — emitting `0@AUTH(da,0)` instead of erroring. Changes: - Handle `ConstantPointerNull` bases explicitly - Error via `reportFatalUsageError` on any remaining unresolved base (e.g. nested ptrauth) instead of silently miscompiling This PR was mostly developed with LLM assistance
|
I think we're in a decent spot so I plan to ship it this week, I'll give a couple more days for any review before merging. |
efriedma-quic
left a comment
There was a problem hiding this comment.
It looks like you added the diagnostic to globalisel, but not selectiondag isel/fast isel?
GlobalISel already emits a DiagnosticInfoUnsupported when a callee has the nonlazybind attribute on arm64e. Mirror that in the SelectionDAG and FastISel call lowering paths so the error is consistent regardless of which ISel handles the call. Cover all three paths in a new focused test.
Added that + a new test because the existing one has |
efriedma-quic
left a comment
There was a problem hiding this comment.
I don't think I have any further concerns. (I'll let an arm64e reviewer approve.)
This is part of work being done in #188378.
NonLazyBind causes the compiler to emit inline GOT loads that bypass the linker's authentication stubs. On arm64e, calls must go through stubs that load from __auth_got and authenticate via braa; NonLazyBind skips this by loading directly from __got which when it hits the
braacauses a PAC crash.Conditionally disable NonLazyBind in all callsites:
All checks use Triple::isArm64e().
AI Disclaimer:
I used Claude to root-cause and fix this after observing
EXC_ARM_PAC_FAILcrashes in Objective-C ARC calls. This fix has been tested on arm64e hardware.