tl;dr my personal fix
Extend the lifetime of the arrays assigned to argv to cover its usage.
--- a/src/nvc_ldcache.c
+++ b/src/nvc_ldcache.c
@@ -472,6 +472,9 @@
if (validate_args(ctx, cnt != NULL) < 0)
return (-1);
+ char *argv1[] = {cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cuda_compat_dir, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
+ char *argv2[] = {cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
+
if (cnt->flags & OPT_CUDA_COMPAT_MODE_LDCONFIG && cnt->cuda_compat_dir != NULL) {
/*
* We include the cuda_compat_dir directory on the ldconfig
@@ -480,9 +483,9 @@
* libraries in the standard library paths (libs_dir and
* libs32_dir).
* */
- argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cuda_compat_dir, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
+ argv = argv1;
} else {
- argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL};
+ argv = argv2;
}
if (*argv[0] == '@') {
I would have created a PR if it weren't for the real name requirement.
Bug introduced in commit 8ed5824:
|
if ((cnt->flags & OPT_CUDA_COMPAT_MODE_LDCONFIG) && (cnt->cuda_compat_dir != NULL)) { |
|
/* |
|
* We include the cuda_compat_dir directory on the ldconfig |
|
* command line. This ensures that the CUDA Forward compat |
|
* libraries take precendence over the user-mode driver |
|
* libraries in the standard library paths (libs_dir and |
|
* libs32_dir). |
|
* */ |
|
log_info("prefering CUDA Forward Compatibility dir when running ldconfig"); |
|
argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cuda_compat_dir, cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; |
|
} else { |
|
argv = (char * []){cnt->cfg.ldconfig, "-f", "/etc/ld.so.conf", "-C", "/etc/ld.so.cache", cnt->cfg.libs_dir, cnt->cfg.libs32_dir, NULL}; |
|
} |
|
|
|
if (*argv[0] == '@') { |
The right-hand-side of the argv assignments are temporary array compound literals. By the C11 language standard section 6.2.4.2:
The value of a pointer becomes indeterminate when the object it points to (or just past) reaches the end of its lifetime.
and 6.2.4.8:
A non-lvalue expression with structure or union type, where the structure or union contains a member with array type ... Its lifetime ends when the evaluation of the containing full expression or full declarator ends.
Section J.2 states that:
The behavior is undefined in the following circumstances: ... An object is referred to outside of its lifetime (6.2.4).
For this reason, the compiler (both GCC and Clang) omitted the assignments to argv inside both conditional branches, leaving argv uninitialized. Then the following argv[0] expression returns junk, which happens to be NULL. Therefore the test *argv[0] == '@' dereferences that NULL and segfaults.
This bug manifests as program crashes as reported in several places:

tl;dr my personal fix
Extend the lifetime of the arrays assigned to
argvto cover its usage.I would have created a PR if it weren't for the real name requirement.
Bug introduced in commit 8ed5824:
libnvidia-container/src/nvc_ldcache.c
Lines 475 to 489 in caf057b
The right-hand-side of the
argvassignments are temporary array compound literals. By the C11 language standard section 6.2.4.2:and 6.2.4.8:
Section J.2 states that:
For this reason, the compiler (both GCC and Clang) omitted the assignments to
argvinside both conditional branches, leavingargvuninitialized. Then the followingargv[0]expression returns junk, which happens to beNULL. Therefore the test*argv[0] == '@'dereferences thatNULLand segfaults.This bug manifests as program crashes as reported in several places: