Skip to content

ppc64le: rhel-9: crash when applying klp-relocation #1228

@joe-lawrence

Description

@joe-lawrence

The following stripped-down module.patch crashes on rhel-9-beta ppc64le:

--- src.old/fs/nfsd/export.c	2021-09-28 17:00:09.657021230 -0400
+++ src/fs/nfsd/export.c	2021-09-28 17:31:52.919892206 -0400
@@ -1294,6 +1294,8 @@ static void exp_flags(struct seq_file *m
 	}
 }
 
+extern char *kpatch_string(void);
+__attribute__((optimize("-fno-optimize-sibling-calls")))
 static int e_show(struct seq_file *m, void *p)
 {
 	struct cache_head *cp = p;
@@ -1307,6 +1309,7 @@ static int e_show(struct seq_file *m, vo
 			seq_puts(m, "# Path Client Start-time\n#\tStats\n");
 		else
 			seq_puts(m, "# Path Client(Flags) # IPs\n");
+		seq_puts(m, kpatch_string());
 		return 0;
 	}
 
--- src.old/net/netlink/af_netlink.c	2021-09-28 17:00:12.176918995 -0400
+++ src/net/netlink/af_netlink.c	2021-09-28 17:02:46.280666828 -0400
@@ -2908,4 +2908,9 @@ panic:
 	panic("netlink_init: Cannot allocate nl_table\n");
 }
 
+char *kpatch_string(void)
+{
+	return "# kpatch\n";
+}
+
 core_initcall(netlink_proto_init);

When the nfsd module is loaded and the kernel tries to write klp-relocations:

$ echo 'file arch/powerpc/kernel/module_64.c +p' > /sys/kernel/debug/dynamic_debug/control
$ modprobe nfsd
$ insmod livepatch-minimal.ko

...
module_64: Found relocations in section 91
module_64: Ptr: 0000000045b1e312.  Number: 2
module_64: Looks like a total of 31 stubs, max
...
module_64: Applying ADD relocate section 91 to 13
module_64: RELOC at 0000000005969497: 10-type as .klp.sym.sunrpc.cache_check,0 (0xc008000002758418) + 0
BUG: Unable to handle kernel data access on write at 0xc008000000aa0e28
Faulting instruction address: 0xc000000000056d38
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: livepatch_minimal(OEK+) nfsd auth_rpcgss nfs_acl lockd grace sunrpc bonding tls rfkill pseries_rng drm drm_panel_orientation_q libcrc32c sd_mod t10_pi sg ibmvscsi ibmveth scsi_transport_srp vmx_crypto dm_mirror dm_region_hash dm_log dm_mod
CPU: 7 PID: 1460 Comm: insmod Kdump: loaded Tainted: G           OE K  --------- ---  5.14.0-3.el9.ppc64le #1
NIP:  c000000000056d38 LR: c000000000056cfc CTR: 0000000000000014
REGS: c000000048ec3450 TRAP: 0300   Tainted: G           OE K  --------- ---   (5.14.0-3.el9.ppc64le)
MSR:  800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 28008284  XER: 20040007
CFAR: c000000000056c48 DAR: c008000000aa0e28 DSISR: 0a000000 IRQMASK: 0
GPR00: c000000000056cfc c000000048ec36f0 c000000002a45900 0000000000000000
GPR04: c008000000aa0e28 396b00003d620000 e98b0020f8410018 00000000ffffffff
GPR08: 4e8004207d8903a6 0000000080000000 c008000000a98e28 000000000000001f
GPR12: 0000000000008000 c000000007fc7700 c00000004b1ee000 c008000000ad1020
GPR16: c008000000ab1028 c00000004b1ef6c0 00000000ffffffff c0000000011286c0
GPR20: c00000000299e8a0 c008000000ac0500 c008000000ad1038 aaaaaaaaaaaaaaab
GPR24: c00000004b1ef580 c008000000ad0000 c008000000aa0890 000000000000024c
GPR28: c008000000ac0500 c008000002758418 c00000004b1ee000 c008000000aa0e28
NIP [c000000000056d38] create_stub+0x78/0x240
LR [c000000000056cfc] create_stub+0x3c/0x240
Call Trace:
[c000000048ec36f0] [c00000004b1ef580] 0xc00000004b1ef580 (unreliable)
[c000000048ec3770] [c000000000056fec] stub_for_addr+0xec/0x120
[c000000048ec37a0] [c0000000000580f4] apply_relocate_add+0x814/0x9a0
[c000000048ec38a0] [c00000000022cfd8] klp_apply_section_relocs+0x208/0x2d0
[c000000048ec3990] [c00000000022d130] klp_init_object_loaded+0x90/0x1d0
[c000000048ec3a20] [c00000000022d87c] klp_enable_patch+0x32c/0x540
[c000000048ec3ae0] [c008000000aa07d0] patch_init+0x228/0x298 [livepatch_minimal]
[c000000048ec3b30] [c000000000012330] do_one_initcall+0x60/0x2c0
[c000000048ec3c00] [c00000000027080c] do_init_module+0x7c/0x3b0
[c000000048ec3c90] [c000000000273254] __do_sys_finit_module+0xd4/0x160
[c000000048ec3db0] [c000000000030764] system_call_exception+0x144/0x280
[c000000048ec3e10] [c00000000000c170] system_call_vectored_common+0xf0/0x280
--- interrupt: 3000 at 0x7fff8274691c
NIP:  00007fff8274691c LR: 0000000000000000 CTR: 0000000000000000
REGS: c000000048ec3e80 TRAP: 3000   Tainted: G           OE K  --------- ---   (5.14.0-3.el9.ppc64le)
MSR:  800000000000f033 <SF,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28008244  XER: 00000000
IRQMASK: 0
GPR00: 0000000000000161 00007fffdad8d5a0 00007fff82847100 0000000000000003
GPR04: 0000000106d16ca0 0000000000000000 0000000000000003 0000000000000000
GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR12: 0000000000000000 00007fff8300c380 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000002 0000000000000000 00007fffdad8f1df
GPR24: 00007fffdad8dcb0 00007fffdad8f218 0000010000611980 0000000000000000
GPR28: 0000010000611820 0000000106d16ca0 0000000000000000 0000010000611860
NIP [00007fff8274691c] 0x7fff8274691c
LR [0000000000000000] 0x0
--- interrupt: 3000
Instruction dump:
3d02ffb1 395f8000 3d208000 3ce0ffff 38c6e370 3908e384 79290020 60e7ffff
e8a60014 e8c80008 e9080010 78e70020 <f8bf0000> f8df0008 f91f0010 811c0224
---[ end trace 5ca3b0c8409a2784 ]---

Which corresponds to the last klp-relocation section:

  [90] .klp.rela.nfsd..toc RELA            0000000000000000 1d0318 000090 18 AIo 86  52  8
  [91] .klp.rela.nfsd..text.e_show RELA            0000000000000000 1d03a8 000030 18 AIo 86  13  8

Relocation section '.klp.rela.nfsd..toc' at offset 0x1d0318 contains 6 entries:
    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
0000000000000268  0000007e00000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.nfsd.expflags,1 + 0
00000000000002d8  000000a900000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.nfsd.nfsd_mutex,0 + 0
0000000000000380  000000aa00000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.nfsd.nfsd_net_id,0 + 0
00000000000003a0  000000ab00000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.sunrpc.nfsd_debug,0 + 0
0000000000000438  0000007f00000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.nfsd.svc_export_cache_template,1 + 0
0000000000000440  0000008000000026 R_PPC64_ADDR64         0000000000000000 .klp.sym.nfsd.svc_expkey_cache_template,1 + 0

Relocation section '.klp.rela.nfsd..text.e_show' at offset 0x1d03a8 contains 2 entries:
    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
000000000000024c  000000ac0000000a R_PPC64_REL24          0000000000000000 .klp.sym.sunrpc.cache_check,0 + 0
00000000000002b8  000000810000000a R_PPC64_REL24          0000000000000000 .klp.sym.nfsd.svc_export_show,1 + 0

The crash is in create_stub():

/usr/src/debug/kernel-5.14.0-3/linux-5.14.0-3.el9.ppc64le/arch/powerpc/kernel/module_64.c: 423
...
/usr/src/debug/kernel-5.14.0-3/linux-5.14.0-3.el9.ppc64le/arch/powerpc/kernel/module_64.c: 426
...
/usr/src/debug/kernel-5.14.0-3/linux-5.14.0-3.el9.ppc64le/./include/linux/fortify-string.h: 191
0xc000000000056d04 <create_stub+0x44>:  addis   r6,r2,-79
0xc000000000056d08 <create_stub+0x48>:  addis   r8,r2,-79
0xc000000000056d0c <create_stub+0x4c>:  addi    r10,r31,-32768
0xc000000000056d10 <create_stub+0x50>:  lis     r9,-32768
0xc000000000056d14 <create_stub+0x54>:  lis     r7,-1
0xc000000000056d18 <create_stub+0x58>:  addi    r6,r6,-7312
0xc000000000056d1c <create_stub+0x5c>:  addi    r8,r8,-7292
0xc000000000056d20 <create_stub+0x60>:  clrldi  r9,r9,32
0xc000000000056d24 <create_stub+0x64>:  ori     r7,r7,65535
0xc000000000056d28 <create_stub+0x68>:  ld      r5,20(r6)
0xc000000000056d2c <create_stub+0x6c>:  ld      r6,8(r8)
0xc000000000056d30 <create_stub+0x70>:  ld      r8,16(r8)
0xc000000000056d34 <create_stub+0x74>:  clrldi  r7,r7,32
0xc000000000056d38 <create_stub+0x78>:  std     r5,0(r31)

In its memcpy():

417 /* Patch stub to reference function and correct r2 value. */
418 static inline int create_stub(const Elf64_Shdr *sechdrs,
419                               struct ppc64_stub_entry *entry,
420                               unsigned long addr,
421                               struct module *me,
422                               const char *name)
423 {
424         long reladdr;
425 
426         if (is_mprofile_ftrace_call(name))
427                 return create_ftrace_stub(entry, addr, me);
428 
429         memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));

Note that R_PPC_REL24 seem to be special and operate through "stubs" unlike other relocation types. module_64.c specifically handles sym->st_shndx == SHN_LIVEPATCH as well and there has been some recent activity in this code block: torvalds/linux@bd55e792de084, but that's as far as I've progressed in tracing the crash kernel-code-side.

ppc64le-crash.tar.gz

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions