cudaPackages: support multiple platforms#256324
Conversation
e13c68e to
59c6666
Compare
yannham
left a comment
There was a problem hiding this comment.
I hope it's not too noisy, but I'm trying out this PR on a Jetson device and I will comment here as I find stuff that doesn't work or evaluate.
|
@yannham @SomeoneSerge if you could figure out why the latest commit (c0bad78) gives an infinite recursion error when trying to evaluate any member of nixpkgs/pkgs/top-level/cuda-packages.nix Line 94 in c0bad78 and is fine when it is commented out, that'd be much appreciated. From what I've seen in the CLI, it seems like the issue has to do with the CUDNN extensions use of the Here's just trying to evaluate the entire Details$ nix repl --impure . --show-trace
Welcome to Nix 2.18.1. Type :? for help.
Loading installable 'git+file:///home/connorbaker/nixpkgs#'...
Added 5 variables.
nix-repl> legacyPackages.x86_64-linux.cudaPackages
error:
… while evaluating the attribute 'x86_64-linux.cudaPackages'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/all-packages.nix:7281:3:
7280| # such cuDNN release as of 2023-01-10.
7281| cudaPackages = cudaPackages_11;
| ^
7282|
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/all-packages.nix:7271:23:
7270| cudaPackages_11_7 = recurseIntoAttrs (callPackage ./cuda-packages.nix { cudaVersion = "11.7"; });
7271| cudaPackages_11_8 = recurseIntoAttrs (callPackage ./cuda-packages.nix { cudaVersion = "11.8"; });
| ^
7272| cudaPackages_11 = cudaPackages_11_8;
… while calling 'recurseIntoAttrs'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/attrsets.nix:1018:5:
1017| # An attribute set to scan for derivations.
1018| attrs:
| ^
1019| attrs // { recurseForDerivations = true; };
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/all-packages.nix:7271:41:
7270| cudaPackages_11_7 = recurseIntoAttrs (callPackage ./cuda-packages.nix { cudaVersion = "11.7"; });
7271| cudaPackages_11_8 = recurseIntoAttrs (callPackage ./cuda-packages.nix { cudaVersion = "11.8"; });
| ^
7272| cudaPackages_11 = cudaPackages_11_8;
… while calling 'callPackageWith'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:122:35:
121| */
122| callPackageWith = autoArgs: fn: args:
| ^
123| let
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:173:34:
172|
173| in if missingArgs == [] then makeOverridable f allArgs else abort error;
| ^
174|
… while calling 'makeOverridable'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:72:24:
71| */
72| makeOverridable = f: origArgs:
| ^
73| let
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:74:16:
73| let
74| result = f origArgs;
| ^
75|
… while calling anonymous lambda
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/cuda-packages.nix:1:1:
1| {
| ^
2| config,
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/cuda-packages.nix:101:18:
100|
101| cudaPackages = customisation.makeScope newScope (fixedPoints.extends composedExtension passthruFunction);
| ^
102| in
… while calling 'makeScope'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:268:25:
267| which can form the parent scope for later package sets. */
268| makeScope = newScope: f:
| ^
269| let self = f self // {
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/customisation.nix:269:16:
268| makeScope = newScope: f:
269| let self = f self // {
| ^
270| newScope = scope: newScope (self // scope);
… while calling 'extends'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:91:24:
90| */
91| extends = f: rattrs: self: let super = rattrs self; in super // f self super;
| ^
92|
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:91:67:
90| */
91| extends = f: rattrs: self: let super = rattrs self; in super // f self super;
| ^
92|
… while calling 'composeExtensions'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:99:18:
98| composeExtensions =
99| f: g: final: prev:
| ^
100| let fApplied = f final prev;
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:102:22:
101| prev' = prev // fApplied;
102| in fApplied // g final prev';
| ^
103|
… while calling 'composeExtensions'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:99:18:
98| composeExtensions =
99| f: g: final: prev:
| ^
100| let fApplied = f final prev;
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:102:22:
101| prev' = prev // fApplied;
102| in fApplied // g final prev';
| ^
103|
… while calling 'composeExtensions'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:99:18:
98| composeExtensions =
99| f: g: final: prev:
| ^
100| let fApplied = f final prev;
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:102:22:
101| prev' = prev // fApplied;
102| in fApplied // g final prev';
| ^
103|
… while calling 'composeExtensions'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:99:18:
98| composeExtensions =
99| f: g: final: prev:
| ^
100| let fApplied = f final prev;
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/fixed-points.nix:100:22:
99| f: g: final: prev:
100| let fApplied = f final prev;
| ^
101| prev' = prev // fApplied;
… while calling anonymous lambda
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/development/cuda-modules/cudnn/extension.nix:3:8:
2| # https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-880/support-matrix/index.html
3| final: prev: let
| ^
4| inherit (final) callPackage;
… from call site
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/development/cuda-modules/cudnn/extension.nix:43:33:
42| # newestToOldestCudnnPackages :: List (AttrSet Packages)
43| newestToOldestCudnnPackages = lists.reverseList cudnnPackages;
| ^
44|
… while calling 'reverseList'
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/lib/lists.nix:510:17:
509| */
510| reverseList = xs:
| ^
511| let l = length xs; in genList (n: elemAt xs (l - n - 1)) l;
error: infinite recursion encountered
at /nix/store/hpalq6ph2zy2hxx5s1n1mkj7gbky6crn-source/pkgs/top-level/cuda-packages.nix:47:13:
46| nvccCompatibilities = builtins.import ../development/cuda-modules/nvccCompatibilities.nix;
47| flags = final.callPackage ../development/cuda-modules/flags.nix {};
| ^
48|
|
|
@ConnorBaker Oh I see you're using a lot of I see you did that to get around infinite recursion errors though. The reason you got those in the first place is because the attribute names of the overlay need to be computable without relying on My recommendation is to pass all data that is needed to generate the overlay attribute set names via function arguments outside of the overlay. |
|
@infinisil so should I fear I may have misunderstood how to use overlays / extensions. Is there a Nix hour you’d recommend? |
c6ad5a3 to
222ba41
Compare
Nah, final: prev: {
# Both assigning and accessing `foo` -> use `prev`
foo = prev.foo.override {
# Not assigning anything for the overlay -> use `final`
bar = final.bar_2;
};
# Assigning `baz` but accessing `baz_2` -> use `final`
baz = final.baz_2;
}The last change looks like the right direction to me, and indeed I see you don't even need the In fact if in all the internal overlays you never need |
222ba41 to
e728935
Compare
1c7ab5e to
e77263d
Compare
0989906 to
c54efcd
Compare
|
I was able to build PyTorch with CUDA for the Jetson! Still need to verify it works, but it built! |
c54efcd to
4022705
Compare
737ddb3 to
d761aed
Compare
|
Log |
ce282e2 to
c4a4f59
Compare
063380e to
6179d88
Compare
It's CAFFE -- it doesn't have any maintainers and hasn't been updated in about six years https://github.com/BVLC/caffe. |
|
Backport failed for Please cherry-pick the changes locally and resolve any conflicts. git fetch origin release-23.11
git worktree add -d .worktree/backport-256324-to-release-23.11 origin/release-23.11
cd .worktree/backport-256324-to-release-23.11
git switch --create backport-256324-to-release-23.11
git cherry-pick -x 4a25023c2ef6f05904d27867aab8e71c034198a9 397d95d07fd095a3fba459a694bd284be3c47899 8e800cedaf24f5ad9717463b809b0beef7677000 bfaefd0873a91aaffaae4254da5734f2fb311f48 0a7dacf94d879f7040c67ff7c3b0540ffe8a8782 aaf735eac97989ee924f2b3296d0d5200a6d734c 6179d88b7d98b7114840facfafb709b1f37b9aa8 |
But did you fix the eval? |
I didn't think to because it's marked as broken if CUDA support is enabled. I'll make a followup PR to handle it. EDIT: Tracked in #272761. |
|
Backport is tracked in #272784. |
|
This change seems to break torch on darwin. I'm not sure what is going on at a first glance, could you please check? short log full log |
|
A quick comment. There are two problems at play:
|
|
This pull request has been mentioned on NixOS Discourse. There might be relevant details there: |
|
This pull request has been mentioned on NixOS Discourse. There might be relevant details there: |
Important
This PR is to be merged after:
Note
For reviewers: Please, please review by commit instead of viewing the diff all at once. The first two commits move the JSON and Nix configuration expressions we need for build systems or to avoid IFD; they can largely be ignored. The commits with changes to Nix's handling of CUDA packages come after those two.
The important bits are less than 2,000 lines of Nix.
Note
This PR was formatted with piegamesde/nixfmt@21ef16e.
Description of changes
cuda-modules, and added some documentation in the form of a README.md there.cudatoolkit'sversions.tomlto a Nix expression,releases.nix.nvccCompatibilities.nixnvccCompatibilities.nix.cuda-redist-find-features(ConnorBaker/cuda-redist-find-features@603407b).Things to be done
Future Work
Tracked in #271217.
Things done
sandbox = trueset innix.conf? (See Nix manual)nix-shell -p nixpkgs-review --run "nixpkgs-review rev HEAD". Note: all changes have to be committed, also see nixpkgs-review usage./result/bin/)nixpkgs-review logs
Note
Logs for runs below are available here: https://drive.google.com/drive/folders/1y_-NOIMiIoBRmIbVqnnTMdZLISQDj7fa?usp=share_link
x86_64-linux
Log archive name:
x86_64-linux-cap-7_5-pr-256324-36-logs.tar.zstResult of
nixpkgs-review pr 256324 --extra-nixpkgs-config '{ allowUnfree = true; allowBroken = false; cudaSupport = true; cudaCapabilities = [ "7.5" ]; }'run on x86_64-linux 1144 packages failed to build:
1715 packages built:
aarch64-linux
Jetson
Log archive name:
aarch64-linux-cap-7_2-pr-256324-36-logs.tar.zstResult of
nixpkgs-review pr 256324 --extra-nixpkgs-config '{ allowUnfree = true; allowBroken = false; cudaSupport = true; cudaCapabilities = [ "7.2" ]; }'run on aarch64-linux 1124 packages failed to build:
705 packages built:
Non-Jetson
Log archive name:
aarch64-linux-cap-7_5-pr-256324-33-logs.tar.zst.Result of
nixpkgs-review pr 256324 --extra-nixpkgs-config '{ allowUnfree = true; allowBroken = false; cudaSupport = true; cudaCapabilities = [ "7.5" ]; }'run on aarch64-linux 1112 packages failed to build:
729 packages built: