Skip to content

Commit 01d982b

Browse files
committed
Update on "[export] Move swap to a different file"
Refactor so that unflattener doesn't become too messy Differential Revision: [D63719648](https://our.internmc.facebook.com/intern/diff/D63719648/) [ghstack-poisoned]
2 parents 9402d19 + 0317c33 commit 01d982b

File tree

423 files changed

+9555
-4082
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

423 files changed

+9555
-4082
lines changed

.ci/docker/build.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,12 @@ case "$image" in
355355
CONDA_CMAKE=yes
356356
VISION=yes
357357
;;
358+
pytorch-linux-jammy-py3-clang18-asan)
359+
ANACONDA_PYTHON_VERSION=3.10
360+
CLANG_VERSION=18
361+
CONDA_CMAKE=yes
362+
VISION=yes
363+
;;
358364
pytorch-linux-jammy-py3.9-gcc11)
359365
ANACONDA_PYTHON_VERSION=3.9
360366
GCC_VERSION=11
@@ -381,6 +387,13 @@ case "$image" in
381387
HALIDE=yes
382388
TRITON=yes
383389
;;
390+
pytorch-linux-jammy-py3.12-triton-cpu)
391+
CUDA_VERSION=12.4
392+
ANACONDA_PYTHON_VERSION=3.12
393+
GCC_VERSION=11
394+
CONDA_CMAKE=yes
395+
TRITON_CPU=yes
396+
;;
384397
pytorch-linux-focal-linter)
385398
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
386399
# We will need to update mypy version eventually, but that's for another day. The task
@@ -510,6 +523,7 @@ docker build \
510523
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
511524
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
512525
--build-arg "TRITON=${TRITON}" \
526+
--build-arg "TRITON_CPU=${TRITON_CPU}" \
513527
--build-arg "ONNX=${ONNX}" \
514528
--build-arg "DOCS=${DOCS}" \
515529
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f

.ci/docker/common/install_clang.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@ if [ -n "$CLANG_VERSION" ]; then
1313
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
1414
# work around ubuntu apt-get conflicts
1515
sudo apt-get -y -f install
16+
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
17+
if [[ $CLANG_VERSION == 18 ]]; then
18+
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
19+
fi
1620
fi
1721

1822
sudo apt-get update
19-
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
20-
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
23+
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
24+
if [[ $CLANG_VERSION == 18 ]]; then
25+
apt-get install -y --no-install-recommends libomp-18-dev
26+
fi
2127

2228
# Install dev version of LLVM.
2329
if [ -n "$LLVMDEV" ]; then

.ci/docker/common/install_triton.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ conda_reinstall() {
1515
if [ -n "${XPU_VERSION}" ]; then
1616
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
1717
TRITON_TEXT_FILE="triton-xpu"
18+
elif [ -n "${TRITON_CPU}" ]; then
19+
TRITON_REPO="https://github.com/triton-lang/triton-cpu"
20+
TRITON_TEXT_FILE="triton-cpu"
1821
else
19-
TRITON_REPO="https://github.com/openai/triton"
22+
TRITON_REPO="https://github.com/triton-lang/triton"
2023
TRITON_TEXT_FILE="triton"
2124
fi
2225

@@ -44,9 +47,10 @@ chown -R jenkins /var/lib/jenkins/triton
4447
chgrp -R jenkins /var/lib/jenkins/triton
4548
pushd /var/lib/jenkins/
4649

47-
as_jenkins git clone ${TRITON_REPO} triton
50+
as_jenkins git clone --recursive ${TRITON_REPO} triton
4851
cd triton
4952
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
53+
as_jenkins git submodule update --init --recursive
5054
cd python
5155

5256
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527

.ci/docker/ubuntu/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ COPY ci_commit_pins/triton.txt triton.txt
147147
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
148148
RUN rm install_triton.sh common_utils.sh triton.txt
149149

150+
ARG TRITON_CPU
151+
COPY ./common/install_triton.sh install_triton.sh
152+
COPY ./common/common_utils.sh common_utils.sh
153+
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
154+
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
155+
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
156+
150157
ARG EXECUTORCH
151158
# Build and install executorch
152159
COPY ./common/install_executorch.sh install_executorch.sh

.ci/pytorch/common_utils.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,22 @@ function install_torchrec_and_fbgemm() {
191191
pip_uninstall torchrec-nightly
192192
pip_uninstall fbgemm-gpu-nightly
193193
pip_install setuptools-git-versioning scikit-build pyre-extensions
194+
195+
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
196+
# seems to be an sccache-related issue
197+
if [[ "$IS_A100_RUNNER" == "1" ]]; then
198+
unset CMAKE_CUDA_COMPILER_LAUNCHER
199+
sudo mv /opt/cache/bin /opt/cache/bin-backup
200+
fi
201+
194202
# See https://github.com/pytorch/pytorch/issues/106971
195203
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
196204
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
205+
206+
if [[ "$IS_A100_RUNNER" == "1" ]]; then
207+
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
208+
sudo mv /opt/cache/bin-backup /opt/cache/bin
209+
fi
197210
}
198211

199212
function clone_pytorch_xla() {

.ci/pytorch/test.sh

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,11 @@ test_inductor_halide() {
606606
assert_git_not_dirty
607607
}
608608

609+
test_inductor_triton_cpu() {
610+
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
611+
assert_git_not_dirty
612+
}
613+
609614
test_dynamo_benchmark() {
610615
# Usage: test_dynamo_benchmark huggingface 0
611616
TEST_REPORTS_DIR=$(pwd)/test/test-reports
@@ -660,15 +665,6 @@ test_inductor_torchbench_smoketest_perf() {
660665
# The threshold value needs to be actively maintained to make this check useful
661666
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
662667

663-
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
664-
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
665-
# The threshold value needs to be actively maintained to make this check useful
666-
# The perf number of nanogpt seems not very stable, e.g.
667-
# https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
668-
# and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
669-
# we switch to use some other model.
670-
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
671-
672668
# Check memory compression ratio for a few models
673669
for test in hf_Albert timm_vision_transformer; do
674670
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
@@ -1439,6 +1435,8 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
14391435
test_inductor_distributed
14401436
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
14411437
test_inductor_halide
1438+
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
1439+
test_inductor_triton_cpu
14421440
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
14431441
test_inductor_micro_benchmark
14441442
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
@@ -1462,7 +1460,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
14621460
# https://github.com/opencv/opencv-python/issues/885
14631461
pip_install opencv-python==4.8.0.74
14641462
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
1465-
checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
1463+
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
14661464
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
14671465
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
14681466
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \

.ci/pytorch/win-build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ fi
2626
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
2727

2828
set +ex
29-
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
29+
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
3030
PYLONG_API_CHECK=$?
3131
if [[ $PYLONG_API_CHECK == 0 ]]; then
3232
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"

.circleci/scripts/binary_linux_test.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,11 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
2727
source activate testenv >/dev/null
2828
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
2929
python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
30-
# Prior to Python 3.8 paths were suffixed with an 'm'
31-
if [[ -d "\${python_path}/bin" ]]; then
32-
export PATH="\${python_path}/bin:\$PATH"
33-
elif [[ -d "\${python_path}m/bin" ]]; then
34-
export PATH="\${python_path}m/bin:\$PATH"
30+
if [[ "\$python_nodot" = *t ]]; then
31+
python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
32+
python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
3533
fi
34+
export PATH="\${python_path}/bin:\$PATH"
3635
fi
3736
3837
EXTRA_CONDA_FLAGS=""

.clang-format

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ ContinuationIndentWidth: 4
4444
Cpp11BracedListStyle: true
4545
DerivePointerAlignment: false
4646
DisableFormat: false
47-
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
47+
ForEachMacros:
48+
- FOR_EACH_RANGE
49+
- FOR_EACH
4850
IncludeCategories:
4951
- Regex: '^<.*\.h(pp)?>'
5052
Priority: 1
@@ -58,6 +60,24 @@ IndentWrappedFunctionNames: false
5860
KeepEmptyLinesAtTheStartOfBlocks: false
5961
MacroBlockBegin: ''
6062
MacroBlockEnd: ''
63+
Macros:
64+
- >-
65+
PyObject_HEAD_INIT(type)={
66+
/* this is not exactly match with PyObject_HEAD_INIT in Python source code
67+
* but it is enough for clang-format */
68+
{ 0xFFFFFFFF },
69+
(type)
70+
},
71+
- >-
72+
PyVarObject_HEAD_INIT(type, size)={
73+
{
74+
/* manually expand PyObject_HEAD_INIT(type) above
75+
* because clang-format do not support recursive expansion */
76+
{ 0xFFFFFFFF },
77+
(type)
78+
},
79+
(size)
80+
},
6181
MaxEmptyLinesToKeep: 1
6282
NamespaceIndentation: None
6383
PenaltyBreakBeforeFirstCallParameter: 1
@@ -79,7 +99,11 @@ SpacesInContainerLiterals: true
7999
SpacesInCStyleCastParentheses: false
80100
SpacesInParentheses: false
81101
SpacesInSquareBrackets: false
82-
Standard: Cpp11
102+
Standard: c++17
103+
StatementMacros:
104+
- PyObject_HEAD
105+
- PyObject_VAR_HEAD
106+
- PyException_HEAD
83107
TabWidth: 8
84108
UseTab: Never
85109
---

0 commit comments

Comments
 (0)