-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Description
I started to use this file here:
# The function call
#
# gitSource ./toplevel subpath
#
# creates a Nix store path of ./toplevel/subpath that includes only those files
# tracked by git. More precisely: mentioned in the git index (i.e. git add is enough
# to get them to be included, you do not have to commit).
#
# This is a whitelist-based alternative to manually listing files or using
# nix-gitignore.
# Internally, it works by calling git ls-files at evaluation time. To
# avoid copying all of `.git` to the git store, it only copies the least amount
# of files necessary for `git ls-files` to work; this is a bit fragile, but
# very fast.
with builtins;
# We read the git index once, before getting the subdir parameter, so that it
# is shared among multiple invocations of gitSource:
let
filter_from_list = root: files:
let
all_paren_dirs = p:
if p == "." || p == "/"
then []
else [ p ] ++ all_paren_dirs (dirOf p);
whitelist_set = listToAttrs (
concatMap (p:
let full_path = toString (root + "/${p}"); in
map (p': { name = p'; value = true; }) (all_paren_dirs full_path)
) files
);
in
p: t: hasAttr (toString p) whitelist_set;
has_prefix = prefix: s:
prefix == builtins.substring 0 (builtins.stringLength prefix) s;
remove_prefix = prefix: s:
builtins.substring
(builtins.stringLength prefix)
(builtins.stringLength s - builtins.stringLength prefix)
s;
lines = s: filter (x : x != [] && x != "") (split "\n" s);
in
if builtins.pathExists ../.git
then
let
nixpkgs = (import ./nixpkgs.nix).nixpkgs {};
git_dir =
if builtins.pathExists ../.git/index
then ../.git
else # likely a git worktree, so follow the indirection
let
git_content = lines (readFile ./../.git);
first_line = head git_content;
prefix = "gitdir: ";
ok = length git_content == 1 && has_prefix prefix first_line;
in
if ok
then /. + remove_prefix prefix first_line
else abort "gitSource.nix: Cannot parse ${toString ./../.git}";
whitelist_file =
nixpkgs.runCommand "git-ls-files" {envVariable = true;} ''
cp ${git_dir + "/index"} index
echo "ref: refs/heads/master" > HEAD
mkdir objects refs
${nixpkgs.git}/bin/git --git-dir . ls-files > $out
'';
whitelist = lines (readFile (whitelist_file.out));
filter = filter_from_list ../. whitelist;
in
subdir: path {
name = baseNameOf (toString subdir);
path = if isString subdir then (../. + "/${subdir}") else subdir;
filter = filter;
}
else
trace "gitSource.nix: ${toString ../.} does not seem to be a git repository,\nassuming it is a clean checkout." (
subdir: path {
name = baseNameOf (toString subdir);
path = if isString subdir then (../. + "/${subdir}") else subdir;
}
)
This is essentially doing what this code fragment from builtins.fetchGit does:
nix/src/libexpr/primops/fetchGit.cc
Lines 51 to 73 in fb0ad89
| GitInfo gitInfo; | |
| gitInfo.rev = "0000000000000000000000000000000000000000"; | |
| gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); | |
| auto files = tokenizeString<std::set<std::string>>( | |
| runProgram("git", true, { "-C", uri, "ls-files", "-z" }), "\0"s); | |
| PathFilter filter = [&](const Path & p) -> bool { | |
| assert(hasPrefix(p, uri)); | |
| std::string file(p, uri.size() + 1); | |
| auto st = lstat(p); | |
| if (S_ISDIR(st.st_mode)) { | |
| auto prefix = file + "/"; | |
| auto i = files.lower_bound(prefix); | |
| return i != files.end() && hasPrefix(*i, prefix); | |
| } | |
| return files.count(file); | |
| }; | |
| gitInfo.storePath = store->addToStore("source", uri, true, htSHA256, filter); |
The main difference to using something like builtsin.fetchGit is
- You can get a subdirectory, and files outside the directory never reach the store
- Even if the working directory is not dirty, you don’t go through the
git fetch/git archive/tardance
And JFTR, the main benefits over approaches that parse .gitignore, or regex-based whitelists, are:
- It does not pick up random files that you happen to have dropped in the repository (e.g.
gperf.out) which may be large, or have sensitive data that you don’t want to reach/nix/store. - It does not pick up files that you have not added to
gityet, avoiding the case where a localnix-buildworks, but not for your colleages, because you forgot to add source files.
But obviously there is a hack here, with this import from derivation, and it would be much cleaner if there was a
builtins.gitLsFiles that would take a path (which can be a subdirectory of the repository), run
auto files = tokenizeString<std::set<std::string>>(runProgram("git", true, { "-C", uri, "ls-files", "-z" }), "\0"s);
and return that as a list of strings. This would also allow useful things like combining it with further filters.
Since the code is essentially already there, as part of builtins.fetchGit, would it be feasible to expose it as a builtin?