Skip to content

Commit eebb110

Browse files
committed
Update on "compile time benchmarks for AOTDispatcher (partitioner)"
compile time benchmark for the min cut partitioner. I'm hoping that this is a reasonable benchmark because: (1) it consists of a single input + many weights that are used sequentially (2) contains a mix of recompute vs non-recomputed ops (matmul + sin) (3) it is relatively simple from running locally: ``` collecting compile time instruction count for aotdispatcher_partitioner_cpu compile time instruction count for iteration 0 is 21764219181 compile time instruction count for iteration 1 is 12475020009 compile time instruction count for iteration 2 is 12463710140 compile time instruction count for iteration 3 is 12455676489 compile time instruction count for iteration 4 is 12451344330 ``` cc voznesenskym penguinwu EikanWang jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx chenyang78 kadeng chauhang amjames rec [ghstack-poisoned]
2 parents fff0de3 + 2927306 commit eebb110

File tree

229 files changed

+5045
-2411
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

229 files changed

+5045
-2411
lines changed

.ci/docker/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ case "$image" in
379379
GCC_VERSION=11
380380
CONDA_CMAKE=yes
381381
HALIDE=yes
382+
TRITON=yes
382383
;;
383384
pytorch-linux-focal-linter)
384385
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
5fe38ffd73c2ac6ed6323b554205186696631c6f
1+
cf34004b8a67d290a962da166f5aa2fc66751326

.ci/pytorch/test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ test_inductor_cpp_wrapper_abi_compatible() {
376376

377377
echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
378378
PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
379-
python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro
379+
python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro inductor/test_extension_backend
380380

381381
TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
382382
--training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
@@ -403,7 +403,7 @@ pr_time_benchmarks() {
403403
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
404404
echo "benchmark results on current PR: "
405405
cat "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
406-
406+
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
407407
}
408408

409409
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then

.github/ISSUE_TEMPLATE.md

Lines changed: 0 additions & 38 deletions
This file was deleted.

.github/ISSUE_TEMPLATE/ci-sev.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ about: Tracking incidents for PyTorch's CI infra.
55

66
> NOTE: Remember to label this issue with "`ci: sev`"
77
8-
**MERGE BLOCKING** <!-- remove this line if you don't want this SEV to block merges -->
8+
<!-- uncomment the below line if you don't want this SEV to block merges -->
9+
<!-- **MERGE BLOCKING** -->
910

1011
## Current Status
1112
*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.

.github/scripts/runner_determinator.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class Experiment(NamedTuple):
7979
rollout_perc: float = (
8080
0 # Percentage of workflows to experiment on when user is not opted-in.
8181
)
82+
all_branches: bool = (
83+
False # If True, the experiment is also enabled on the exception branches
84+
)
8285

8386
# Add more fields as needed
8487

@@ -212,7 +215,7 @@ def get_potential_pr_author(
212215

213216
def is_exception_branch(branch: str) -> bool:
214217
"""
215-
Branches that get opted out of all experiments and should always use Meta runners
218+
Branches that get opted out of experiments by default, until they're explicitly enabled.
216219
"""
217220
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
218221

@@ -338,7 +341,10 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
338341

339342

340343
def get_runner_prefix(
341-
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
344+
rollout_state: str,
345+
workflow_requestors: Iterable[str],
346+
branch: str,
347+
is_canary: bool = False,
342348
) -> str:
343349
settings = parse_settings(rollout_state)
344350
user_optins = parse_users(rollout_state)
@@ -348,6 +354,12 @@ def get_runner_prefix(
348354
for experiment_name, experiment_settings in settings.experiments.items():
349355
enabled = False
350356

357+
if not experiment_settings.all_branches and is_exception_branch(branch):
358+
log.info(
359+
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
360+
)
361+
continue
362+
351363
# Is any workflow_requestor opted in to this experiment?
352364
opted_in_users = [
353365
requestor
@@ -407,35 +419,34 @@ def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -
407419
def main() -> None:
408420
args = parse_args()
409421

410-
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
411-
log.info(
412-
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
422+
runner_label_prefix = DEFAULT_LABEL_PREFIX
423+
424+
try:
425+
rollout_state = get_rollout_state_from_issue(
426+
args.github_token, args.github_issue_repo, args.github_issue
413427
)
414-
runner_label_prefix = DEFAULT_LABEL_PREFIX
415-
else:
416-
try:
417-
rollout_state = get_rollout_state_from_issue(
418-
args.github_token, args.github_issue_repo, args.github_issue
419-
)
420428

421-
username = get_potential_pr_author(
422-
args.github_token,
423-
args.github_repo,
424-
args.github_actor,
425-
args.github_ref_type,
426-
args.github_branch,
427-
)
429+
username = get_potential_pr_author(
430+
args.github_token,
431+
args.github_repo,
432+
args.github_actor,
433+
args.github_ref_type,
434+
args.github_branch,
435+
)
428436

429-
is_canary = args.github_repo == "pytorch/pytorch-canary"
437+
is_canary = args.github_repo == "pytorch/pytorch-canary"
430438

431-
runner_label_prefix = get_runner_prefix(
432-
rollout_state, (args.github_issue_owner, username), is_canary
433-
)
439+
runner_label_prefix = get_runner_prefix(
440+
rollout_state,
441+
(args.github_issue_owner, username),
442+
args.github_branch,
443+
is_canary,
444+
)
434445

435-
except Exception as e:
436-
log.error(
437-
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
438-
)
446+
except Exception as e:
447+
log.error(
448+
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
449+
)
439450

440451
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
441452

.github/scripts/test_runner_determinator.py

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
import runner_determinator as rd
55

66

7+
USER_BRANCH = "somebranch"
8+
EXCEPTION_BRANCH = "main"
9+
10+
711
class TestRunnerDeterminatorIssueParser(TestCase):
812
def test_parse_settings(self) -> None:
913
settings_text = """
@@ -66,6 +70,40 @@ def test_parse_settings_in_code_block(self) -> None:
6670
"otherExp settings not parsed correctly",
6771
)
6872

73+
def test_parse_all_branches_setting(self) -> None:
74+
settings_text = """
75+
```
76+
experiments:
77+
lf:
78+
rollout_perc: 25
79+
all_branches: true
80+
otherExp:
81+
all_branches: True
82+
rollout_perc: 0
83+
```
84+
85+
---
86+
87+
Users:
88+
@User1,lf
89+
@User2,lf,otherExp
90+
91+
"""
92+
93+
settings = rd.parse_settings(settings_text)
94+
95+
self.assertTupleEqual(
96+
rd.Experiment(rollout_perc=25, all_branches=True),
97+
settings.experiments["lf"],
98+
"lf settings not parsed correctly",
99+
)
100+
self.assertTrue(settings.experiments["otherExp"].all_branches)
101+
self.assertTupleEqual(
102+
rd.Experiment(rollout_perc=0, all_branches=True),
103+
settings.experiments["otherExp"],
104+
"otherExp settings not parsed correctly",
105+
)
106+
69107
def test_parse_users(self) -> None:
70108
settings_text = """
71109
experiments:
@@ -119,7 +157,7 @@ def test_opted_in_user(self) -> None:
119157
@User2,lf,otherExp
120158
121159
"""
122-
prefix = rd.get_runner_prefix(settings_text, ["User1"])
160+
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
123161
self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
124162

125163
def test_opted_in_user_two_experiments(self) -> None:
@@ -136,7 +174,7 @@ def test_opted_in_user_two_experiments(self) -> None:
136174
@User2,lf,otherExp
137175
138176
"""
139-
prefix = rd.get_runner_prefix(settings_text, ["User2"])
177+
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
140178
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
141179

142180
@patch("random.uniform", return_value=50)
@@ -154,7 +192,7 @@ def test_opted_out_user(self, mock_uniform: Mock) -> None:
154192
@User2,lf,otherExp
155193
156194
"""
157-
prefix = rd.get_runner_prefix(settings_text, ["User3"])
195+
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
158196
self.assertEqual("", prefix, "Runner prefix not correct for user")
159197

160198
@patch("random.uniform", return_value=10)
@@ -174,7 +212,7 @@ def test_opted_out_user_was_pulled_in_by_rollout(self, mock_uniform: Mock) -> No
174212
"""
175213

176214
# User3 is opted out, but is pulled into both experiments by the 10% rollout
177-
prefix = rd.get_runner_prefix(settings_text, ["User3"])
215+
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
178216
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
179217

180218
def test_lf_prefix_always_comes_first(self) -> None:
@@ -192,7 +230,7 @@ def test_lf_prefix_always_comes_first(self) -> None:
192230
193231
"""
194232

195-
prefix = rd.get_runner_prefix(settings_text, ["User2"])
233+
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
196234
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
197235

198236
def test_ignores_commented_users(self) -> None:
@@ -210,7 +248,7 @@ def test_ignores_commented_users(self) -> None:
210248
211249
"""
212250

213-
prefix = rd.get_runner_prefix(settings_text, ["User1"])
251+
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
214252
self.assertEqual("", prefix, "Runner prefix not correct for user")
215253

216254
def test_ignores_extra_experiments(self) -> None:
@@ -229,9 +267,44 @@ def test_ignores_extra_experiments(self) -> None:
229267
230268
"""
231269

232-
prefix = rd.get_runner_prefix(settings_text, ["User1"])
270+
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
233271
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
234272

273+
def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
274+
self,
275+
) -> None:
276+
settings_text = """
277+
experiments:
278+
lf:
279+
rollout_perc: 100
280+
---
281+
282+
Users:
283+
@User,lf,otherExp
284+
285+
"""
286+
287+
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
288+
self.assertEqual("", prefix, "Runner prefix not correct for user")
289+
290+
def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
291+
self,
292+
) -> None:
293+
settings_text = """
294+
experiments:
295+
lf:
296+
rollout_perc: 100
297+
all_branches: true
298+
---
299+
300+
Users:
301+
@User,lf,otherExp
302+
303+
"""
304+
305+
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
306+
self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
307+
235308

236309
if __name__ == "__main__":
237310
main()

0 commit comments

Comments
 (0)