Skip to content

Commit 1049fe8

Browse files
ThomasCJYcopybara-github
authored andcommitted
[Bazel] Improve Mobile-Install Incremental Manifest Generating by applying multi-thread
**Background** While using `mobile-Install`, we noticed that it constantly takes more to run on incremental build. Take our app for example, the incremental build metrics for a single line kotlin code change looks like this: | Command | Time | |---------------------------|------| | bazel build + adb install | 63s | | mobile-install | 91s | After digging into it, I found that the bottleneck is the "Incremental Manifest Generating" action which takes a lot of time (35+ sec) for multidex build. The time is spent on the checksum calculation for all dex files. The SHA256 checksum for each dex file takes around 1-2 sec. Currently in our app we have 10 dex shards and each dex zip contains 2-3 dex files, processing them sequentially takes more than 30 seconds. **Change** In this PR, I added multithread support for this script so that the checksum calculation can be done concurrently and it improved the "Incremental Manifest Generating" to be done in 6 second (80%+ improvement). **Result & Test** After applying this change, the total incremental build time has been reduced to 43 seconds, with a 30s+ improvement from `Incremental Manifest Generating` step. Before: <img width=400 src="https://user-images.githubusercontent.com/6951238/92814439-f678ef80-f377-11ea-967f-92767a08587e.png"> After: <img width=400 src="https://user-images.githubusercontent.com/6951238/92814445-fb3da380-f377-11ea-8de6-ff8c6b77c3f8.png"> You can also easily verify this from command line: ``` jchen tmp % time python ../build_incremental_dexmanifest.py ../output/outmanifest.txt shard1.dex.zip shard10.dex.zip shard2.dex.zip shard3.dex.zip shard4.dex.zip shard5.dex.zip shard6.dex.zip shard7.dex.zip shard8.dex.zip shard9.dex.zip python ../build_incremental_dexmanifest.py ../output/outmanifest.txt 0.70s user 0.72s system 31% cpu 4.583 total jchen tmp % time python ../build_incremental_dexmanifest_before.py ../output/outmanifest.txt shard1.dex.zip shard10.dex.zip shard2.dex.zip shard3.dex.zip shard4.dex.zip shard5.dex.zip shard6.dex.zip shard7.dex.zip shard8.dex.zip shard9.dex.zip python ../build_incremental_dexmanifest_before.py ../output/outmanifest.txt 0.65s user 0.64s system 3% cpu 37.908 total ``` Closes #12085. PiperOrigin-RevId: 340996883
1 parent f9f8ce7 commit 1049fe8

1 file changed

Lines changed: 53 additions & 21 deletions

File tree

tools/android/build_incremental_dexmanifest.py

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@
3232

3333
import hashlib
3434
import os
35+
from queue import Queue
3536
import shutil
3637
import sys
3738
import tempfile
39+
from threading import Thread
3840
import zipfile
3941

4042

@@ -47,6 +49,8 @@ def __init__(self):
4749
self.output_dex_counter = 1
4850
self.checksums = set()
4951
self.tmpdir = None
52+
self.queue = Queue()
53+
self.threads_list = list()
5054

5155
def __enter__(self):
5256
self.tmpdir = tempfile.mkdtemp()
@@ -55,8 +59,14 @@ def __enter__(self):
5559
def __exit__(self, unused_type, unused_value, unused_traceback):
5660
shutil.rmtree(self.tmpdir, True)
5761

58-
def Checksum(self, filename):
59-
"""Compute the SHA-256 checksum of a file."""
62+
def Checksum(self, filename, input_dex_or_zip, zippath):
63+
"""Compute the SHA-256 checksum of a file.
64+
65+
This method could be invoked concurrently.
66+
67+
Therefore we need to include other metadata like input_dex_or_zip to
68+
keep the context.
69+
"""
6070
h = hashlib.sha256()
6171
with open(filename, "rb") as f:
6272
while True:
@@ -66,30 +76,42 @@ def Checksum(self, filename):
6676

6777
h.update(data)
6878

69-
return h.hexdigest()
79+
return h.hexdigest(), input_dex_or_zip, zippath
7080

71-
def AddDex(self, input_dex_or_zip, zippath, dex):
72-
"""Adds a dex file to the output.
81+
def AddDexes(self, dex_metadata_list):
82+
"""Adds all dex file together to the output.
7383
84+
Sort the result to make sure the dexes order are always the same given
85+
the same input.
7486
Args:
75-
input_dex_or_zip: the input file written to the manifest
76-
zippath: the zip path written to the manifest or None if the input file
77-
is not a .zip .
78-
dex: the dex file to be added
87+
dex_metadata_list: A list of [fs_checksum, input_dex_or_zip, zippath],
88+
where fs_checksum is the SHA-256 checksum for dex file, input_dex_or_zip
89+
is the input file written to the manifest, zippath is the zip path
90+
written to the manifest or None if the input file is not a .zip.
7991
8092
Returns:
8193
None.
8294
"""
83-
84-
fs_checksum = self.Checksum(dex)
85-
if fs_checksum in self.checksums:
86-
return
87-
88-
self.checksums.add(fs_checksum)
89-
zip_dex = "incremental_classes%d.dex" % self.output_dex_counter
90-
self.output_dex_counter += 1
91-
self.manifest_lines.append("%s %s %s %s" %(
92-
input_dex_or_zip, zippath if zippath else "-", zip_dex, fs_checksum))
95+
dex_metadata_list_sorted = sorted(
96+
dex_metadata_list, key=lambda x: (x[1], x[2]))
97+
for dex_metadata in dex_metadata_list_sorted:
98+
fs_checksum, input_dex_or_zip, zippath = dex_metadata[0], dex_metadata[
99+
1], dex_metadata[2]
100+
if fs_checksum in self.checksums:
101+
return
102+
self.checksums.add(fs_checksum)
103+
zip_dex = "incremental_classes%d.dex" % self.output_dex_counter
104+
self.output_dex_counter += 1
105+
self.manifest_lines.append(
106+
"%s %s %s %s" %
107+
(input_dex_or_zip, zippath if zippath else "-", zip_dex, fs_checksum))
108+
109+
def ComputeChecksumConcurrently(self, input_dex_or_zip, zippath, dex):
110+
"""Call Checksum concurrently to improve build performance when an app contains multiple dex files."""
111+
t = Thread(target=lambda q, arg1, arg2, arg3: q.put(self.Checksum(arg1, arg2, arg3)), \
112+
args=(self.queue, dex, input_dex_or_zip, zippath))
113+
t.start()
114+
self.threads_list.append(t)
93115

94116
def Run(self, argv):
95117
"""Creates a dex manifest."""
@@ -116,9 +138,19 @@ def Run(self, argv):
116138

117139
input_dex_zip.extract(input_dex_dex, input_dex_dir)
118140
fs_dex = input_dex_dir + "/" + input_dex_dex
119-
self.AddDex(input_filename, input_dex_dex, fs_dex)
141+
self.ComputeChecksumConcurrently(input_filename, input_dex_dex,
142+
fs_dex)
120143
elif input_filename.endswith(".dex"):
121-
self.AddDex(input_filename, None, input_filename)
144+
self.ComputeChecksumConcurrently(input_filename, None, input_filename)
145+
# Collect results from all threads
146+
for t in self.threads_list:
147+
t.join()
148+
149+
results = []
150+
while not self.queue.empty():
151+
fs_checksum, input_dex_or_zip, zippath = self.queue.get()
152+
results.append([fs_checksum, input_dex_or_zip, zippath])
153+
self.AddDexes(results)
122154

123155
with open(argv[0], "wb") as manifest:
124156
manifest.write(("\n".join(self.manifest_lines)).encode("utf-8"))

0 commit comments

Comments
 (0)