Skip to content

Commit a327e8e

Browse files
committed
devtools: Make github-merge compute SHA512 from git, instead of worktree
This changes tree_sha512sum() to requests the objects for hashing from git instead of from the working tree. The change should make the process more deterministic (it hashes what will be pushed) and hopefully avoids the frequent miscomputed SHA512's that happen now.
1 parent 8040ae6 commit a327e8e

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

contrib/devtools/github-merge.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,53 @@ def get_symlink_files():
7878
ret.append(f.decode('utf-8').split("\t")[1])
7979
return ret
8080

81-
def tree_sha512sum():
82-
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
81+
def tree_sha512sum(commit='HEAD'):
82+
# request metadata for entire tree, recursively
83+
files = []
84+
blob_by_name = {}
85+
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
86+
name_sep = line.index(b'\t')
87+
metadata = line[:name_sep].split() # perms, 'blob', blobid
88+
assert(metadata[1] == b'blob')
89+
name = line[name_sep+1:]
90+
files.append(name)
91+
blob_by_name[name] = metadata[2]
92+
93+
files.sort()
94+
# open connection to git-cat-file in batch mode to request data for all blobs
95+
# this is much faster than launching it per file
96+
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
8397
overall = hashlib.sha512()
8498
for f in files:
99+
blob = blob_by_name[f]
100+
# request blob
101+
p.stdin.write(blob + b'\n')
102+
p.stdin.flush()
103+
# read header: blob, "blob", size
104+
reply = p.stdout.readline().split()
105+
assert(reply[0] == blob and reply[1] == b'blob')
106+
size = int(reply[2])
107+
# hash the blob data
85108
intern = hashlib.sha512()
86-
fi = open(f, 'rb')
87-
while True:
88-
piece = fi.read(65536)
89-
if piece:
109+
ptr = 0
110+
while ptr < size:
111+
bs = min(65536, size - ptr)
112+
piece = p.stdout.read(bs)
113+
if len(piece) == bs:
90114
intern.update(piece)
91115
else:
92-
break
93-
fi.close()
116+
raise IOError('Premature EOF reading git cat-file output')
117+
ptr += bs
94118
dig = intern.hexdigest()
119+
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
120+
# update overall hash with file hash
95121
overall.update(dig.encode("utf-8"))
96122
overall.update(" ".encode("utf-8"))
97123
overall.update(f)
98124
overall.update("\n".encode("utf-8"))
125+
p.stdin.close()
126+
if p.wait():
127+
raise IOError('Non-zero return value executing git cat-file')
99128
return overall.hexdigest()
100129

101130

0 commit comments

Comments
 (0)