Skip to content

Commit fad661f

Browse files
committed
Update on "[ONNX] Enable _jit_pass_onnx_fold_if only when dynamic_axes is None (#50582)"
Fixing pytorch/vision#3251 (PR #49410 triggers the torch vision test build failure, on three tests test_faster_rcnn, test_mask_rcnn, test_keypoint_rcnn. ) The offending PR is fine on pytorch UT, because the torchvision and pytorch test has a gap when we merge them - we are using different test API on two sides, therefore causing some discrepancy. This PR bridge the gap for the above three tests, and disable _jit_pass_onnx_fold_if pass until it gets fixed. Allow _jit_pass_onnx_fold_if only when dynamic_axes is None. Differential Revision: [D26050886](https://our.internmc.facebook.com/intern/diff/D26050886) [ghstack-poisoned]
2 parents 1be4d44 + 5023805 commit fad661f

File tree

98 files changed

+2985
-572
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+2985
-572
lines changed

.github/workflows/lint.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ jobs:
170170
# FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
171171
# in a follow up PR.
172172
# /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
173+
# deploy/interpreter files are excluded due to using macros and other techniquies
174+
# that are not easily converted to accepted c++
173175
python tools/clang_tidy.py \
174176
--verbose \
175177
--paths torch/csrc/ \
@@ -186,6 +188,10 @@ jobs:
186188
-g"-torch/csrc/autograd/FunctionsManual.cpp" \
187189
-g"-torch/csrc/generic/*.cpp" \
188190
-g"-torch/csrc/jit/codegen/cuda/runtime/*" \
191+
-g"-torch/csrc/deploy/interpreter/interpreter.cpp" \
192+
-g"-torch/csrc/deploy/interpreter/interpreter.h" \
193+
-g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \
194+
-g"-torch/csrc/deploy/interpreter/test_main.cpp" \
189195
"$@" > ${GITHUB_WORKSPACE}/clang-tidy-output.txt
190196
191197
cat ${GITHUB_WORKSPACE}/clang-tidy-output.txt

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ torch/csrc/autograd/generated/*
6666
torch/testing/_internal/generated/annotated_fn_args.py
6767
torch/testing/_internal/data/*.pt
6868
torch/csrc/cudnn/cuDNN.cpp
69+
torch/csrc/deploy/interpreter/cpython
70+
torch/csrc/deploy/interpreter/frozen
71+
torch/csrc/deploy/interpreter/third_party/typing_extensions.py
6972
torch/csrc/generated
7073
torch/csrc/generic/TensorMethods.cpp
7174
torch/csrc/jit/generated/*

.jenkins/pytorch/build.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@ if [[ "$BUILD_ENVIRONMENT" == *-mobile-code-analysis* ]]; then
2323
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile-code-analysis.sh" "$@"
2424
fi
2525

26+
if [[ "$BUILD_ENVIRONMENT" == *linux-xenial-cuda10.2-cudnn7-py3-gcc7* ]]; then
27+
# Enabling DEPLOY build (embedded torch python interpreter, experimental)
28+
# only on one config for now, can expand later
29+
export USE_DEPLOY=ON
30+
31+
# Deploy feature builds cpython. It requires these packages.
32+
# TODO move this to dockerfile?
33+
sudo apt-get -qq update
34+
sudo apt-get -qq install libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev
35+
fi
36+
2637
echo "Python version:"
2738
python --version
2839

.jenkins/pytorch/test.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,11 @@ test_vec256() {
354354
fi
355355
}
356356

357+
test_torch_deploy() {
358+
SIMPLE_MODEL_PATH=torch/csrc/deploy/example/simple.pt LIBINTERPRETER_PATH=build/lib/libinterpreter.so build/bin/interpreter_test
359+
assert_git_not_dirty
360+
}
361+
357362
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
358363
(cd test && python -c "import torch; print(torch.__config__.show())")
359364
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
@@ -371,6 +376,9 @@ elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
371376
# TODO: run some C++ tests
372377
echo "no-op at the moment"
373378
elif [[ "${BUILD_ENVIRONMENT}" == *-test1 || "${JOB_BASE_NAME}" == *-test1 ]]; then
379+
if [[ "${BUILD_ENVIRONMENT}" == pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test1 ]]; then
380+
test_torch_deploy
381+
fi
374382
install_torchvision
375383
test_python_shard1
376384
elif [[ "${BUILD_ENVIRONMENT}" == *-test2 || "${JOB_BASE_NAME}" == *-test2 ]]; then

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,3 +919,8 @@ endif()
919919

920920
include(cmake/Summary.cmake)
921921
caffe2_print_configuration_summary()
922+
923+
# ---[ Torch Deploy
924+
if(USE_DEPLOY)
925+
add_subdirectory(torch/csrc/deploy)
926+
endif()

aten/src/ATen/native/BatchLinearAlgebra.cpp

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ extern "C" void ssyevd_(char *jobz, char *uplo, int *n, float *a, int *lda, floa
8282
// geev
8383
extern "C" void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info);
8484
extern "C" void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info);
85+
extern "C" void cgeev_(char *jobvl, char *jobvr, int *n,
86+
std::complex<float> *a, int *lda,
87+
std::complex<float> *w,
88+
std::complex<float> *vl, int *ldvl,
89+
std::complex<float> *vr, int *ldvr,
90+
std::complex<float> *work, int *lwork,
91+
float *rwork,
92+
int *info);
93+
extern "C" void zgeev_(char *jobvl, char *jobvr, int *n,
94+
std::complex<double> *a, int *lda,
95+
std::complex<double> *w,
96+
std::complex<double> *vl, int *ldvl,
97+
std::complex<double> *vr, int *ldvr,
98+
std::complex<double> *work, int *lwork,
99+
double *rwork,
100+
int *info);
85101

86102
// gesdd
87103
extern "C" void zgesdd_(char *jobz, int *m, int *n, std::complex<double> *a, int *lda,
@@ -307,14 +323,44 @@ template<> void lapackSyevd<float>(char jobz, char uplo, int n, float *a, int ld
307323
ssyevd_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, &liwork, info);
308324
}
309325

310-
template<> void lapackEig<double>(char jobvl, char jobvr, int n, double *a, int lda, double *wr, double *wi, double* vl, int ldvl, double *vr, int ldvr, double *work, int lwork, int *info) {
326+
template<> void lapackEig<double>(char jobvl, char jobvr, int n, double *a, int lda, double *w, double* vl, int ldvl, double *vr, int ldvr, double *work, int lwork, double *rwork, int *info) {
327+
// lapack [sd]geev wants to separate output arrays: wr and wi for the real
328+
// and imaginary parts
329+
double *wr = w;
330+
double *wi = w + n;
331+
(void)rwork; // unused
311332
dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);
312333
}
313334

314-
template<> void lapackEig<float>(char jobvl, char jobvr, int n, float *a, int lda, float *wr, float *wi, float* vl, int ldvl, float *vr, int ldvr, float *work, int lwork, int *info) {
335+
template<> void lapackEig<float>(char jobvl, char jobvr, int n, float *a, int lda, float *w, float* vl, int ldvl, float *vr, int ldvr, float *work, int lwork, float *rwork, int *info) {
336+
// lapack [sd]geev wants to separate output arrays: wr and wi for the real
337+
// and imaginary parts
338+
float *wr = w;
339+
float *wi = w + n;
340+
(void)rwork; // unused
315341
sgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);
316342
}
317343

344+
template<> void lapackEig<c10::complex<double>, double>(char jobvl, char jobvr, int n, c10::complex<double> *a, int lda, c10::complex<double> *w, c10::complex<double> *vl, int ldvl, c10::complex<double> *vr, int ldvr, c10::complex<double> *work, int lwork, double *rwork, int *info) {
345+
zgeev_(&jobvl, &jobvr, &n,
346+
reinterpret_cast<std::complex<double>*>(a), &lda,
347+
reinterpret_cast<std::complex<double>*>(w),
348+
reinterpret_cast<std::complex<double>*>(vl), &ldvl,
349+
reinterpret_cast<std::complex<double>*>(vr), &ldvr,
350+
reinterpret_cast<std::complex<double>*>(work), &lwork,
351+
rwork, info);
352+
}
353+
354+
template<> void lapackEig<c10::complex<float>, float>(char jobvl, char jobvr, int n, c10::complex<float> *a, int lda, c10::complex<float> *w, c10::complex<float> *vl, int ldvl, c10::complex<float> *vr, int ldvr, c10::complex<float> *work, int lwork, float *rwork, int *info) {
355+
cgeev_(&jobvl, &jobvr, &n,
356+
reinterpret_cast<std::complex<float>*>(a), &lda,
357+
reinterpret_cast<std::complex<float>*>(w),
358+
reinterpret_cast<std::complex<float>*>(vl), &ldvl,
359+
reinterpret_cast<std::complex<float>*>(vr), &ldvr,
360+
reinterpret_cast<std::complex<float>*>(work), &lwork,
361+
rwork, info);
362+
}
363+
318364
template<> void lapackSvd<c10::complex<double>, double>(char jobz, int m, int n, c10::complex<double> *a, int lda,
319365
double *s, c10::complex<double> *u, int ldu, c10::complex<double> *vt, int ldvt, c10::complex<double> *work, int lwork, double *rwork, int *iwork, int *info) {
320366
zgesdd_(&jobz, &m, &n, reinterpret_cast<std::complex<double>*>(a), &lda, s, reinterpret_cast<std::complex<double>*>(u), &ldu,
@@ -1441,7 +1487,11 @@ std::tuple<Tensor&, Tensor&> eig_out(Tensor& e, Tensor& v, const Tensor& self, b
14411487
TORCH_CHECK(v.dtype() == self.dtype(), "Expected 'v' to have dtype ", self.dtype(), " but got ", v.dtype());
14421488
int64_t n = self.size(-1);
14431489

1444-
at::native::resize_output(e, {n, 2});
1490+
if (isComplexType(at::typeMetaToScalarType(self.dtype()))) {
1491+
at::native::resize_output(e, {n});
1492+
} else {
1493+
at::native::resize_output(e, {n, 2});
1494+
}
14451495
if (eigenvectors) {
14461496
at::native::resize_output(v, self.sizes());
14471497
}
@@ -1566,6 +1616,8 @@ std::tuple<Tensor, Tensor, Tensor> _svd_helper_cpu(const Tensor& self, bool some
15661616
VT_working_copy.zero_();
15671617
}
15681618
// so far we have computed VT, but torch.svd returns V instead. Adjust accordingly.
1619+
// Note that the 'apply_svd' routine returns VT = V^T (for real inputs) or VT = V^H (for complex inputs), not V.
1620+
VT_working_copy = VT_working_copy.conj();
15691621
VT_working_copy.transpose_(-2, -1);
15701622
return std::make_tuple(U_working_copy, S_working_copy, VT_working_copy);
15711623
}
@@ -1596,8 +1648,8 @@ std::tuple<Tensor&, Tensor&, Tensor&> svd_out(Tensor& U, Tensor& S, Tensor& V,
15961648
1. the 2nd parameter is bool some=True, which if effectively the opposite
15971649
of full_matrices=True
15981650
1599-
2. svd returns V, while linalg.svd returns VT. To accommodate the
1600-
difference, we transpose() V upon return
1651+
2. svd returns V, while linalg.svd returns VT = V^T (for real inputs) or VT = V^H (for complex inputs).
1652+
To accommodate the difference, we transpose() and conj() V upon return
16011653
*/
16021654

16031655
std::tuple<Tensor, Tensor, Tensor> linalg_svd(const Tensor& self, bool full_matrices, bool compute_uv) {
@@ -1608,7 +1660,7 @@ std::tuple<Tensor, Tensor, Tensor> linalg_svd(const Tensor& self, bool full_matr
16081660
Tensor U, S, V;
16091661
std::tie(U, S, V) = at::_svd_helper(self, some, compute_uv);
16101662
if (compute_uv) {
1611-
Tensor VT = V.transpose(-2, -1);
1663+
Tensor VT = V.conj().transpose(-2, -1);
16121664
return std::make_tuple(U, S, VT);
16131665
} else {
16141666
Tensor empty_U = at::empty({0}, self.options());

aten/src/ATen/native/BatchLinearAlgebra.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ namespace at { namespace native {
1414
// Define per-batch functions to be used in the implementation of batched
1515
// linear algebra operations
1616

17-
template<class scalar_t>
18-
void lapackEig(char jobvl, char jobvr, int n, scalar_t *a, int lda, scalar_t *wr, scalar_t *wi, scalar_t* vl, int ldvl, scalar_t *vr, int ldvr, scalar_t *work, int lwork, int *info);
17+
template<class scalar_t, class value_t=scalar_t>
18+
void lapackEig(char jobvl, char jobvr, int n, scalar_t *a, int lda, scalar_t *w, scalar_t* vl, int ldvl, scalar_t *vr, int ldvr, scalar_t *work, int lwork, value_t *rwork, int *info);
1919

2020
template<class scalar_t>
2121
void lapackOrgqr(int m, int n, int k, scalar_t *a, int lda, scalar_t *tau, scalar_t *work, int lwork, int *info);

aten/src/ATen/native/BatchLinearAlgebraKernel.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <ATen/Dispatch.h>
33
#include <ATen/native/BatchLinearAlgebra.h>
44
#include <ATen/native/LinearAlgebraUtils.h>
5+
#include <ATen/native/cpu/zmath.h>
56

67
#include <TH/TH.h> // for USE_LAPACK
78

@@ -15,29 +16,38 @@ void apply_eig(const Tensor& self, bool eigenvectors, Tensor& vals_, Tensor& vec
1516
TORCH_CHECK(false, "Calling torch.eig on a CPU tensor requires compiling ",
1617
"PyTorch with LAPACK. Please use PyTorch built with LAPACK support.");
1718
#else
19+
using value_t = typename c10::scalar_value_type<scalar_t>::type;
20+
1821
char jobvr = eigenvectors ? 'V' : 'N';
1922
int64_t n = self.size(-1);
2023
auto self_data = self.data_ptr<scalar_t>();
2124

2225
auto vals_data = vals_.data_ptr<scalar_t>();
2326
scalar_t* wr = vals_data;
24-
scalar_t* wi = vals_data + n;
2527

2628
scalar_t* vecs_data = eigenvectors ? vecs_.data_ptr<scalar_t>() : nullptr;
2729
int ldvr = eigenvectors ? n : 1;
2830

31+
Tensor rwork;
32+
value_t* rwork_data = nullptr;
33+
if (self.is_complex()) {
34+
ScalarType real_dtype = toValueType(typeMetaToScalarType(self.dtype()));
35+
rwork = at::empty({n*2}, self.options().dtype(real_dtype));
36+
rwork_data = rwork.data_ptr<value_t>();
37+
}
38+
2939
if (n > 0) {
3040
// call lapackEig once to get the optimal size for work data
3141
scalar_t wkopt;
3242
int info;
33-
lapackEig<scalar_t>('N', jobvr, n, self_data, n, wr, wi,
34-
nullptr, 1, vecs_data, ldvr, &wkopt, -1, &info);
35-
int lwork = static_cast<int>(wkopt);
43+
lapackEig<scalar_t, value_t>('N', jobvr, n, self_data, n, wr,
44+
nullptr, 1, vecs_data, ldvr, &wkopt, -1, rwork_data, &info);
45+
int lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
3646

3747
// call again to do the actual work
3848
Tensor work = at::empty({lwork}, self.dtype());
39-
lapackEig<scalar_t>('N', jobvr, n, self_data, n, wr, wi,
40-
nullptr, 1, vecs_data, ldvr, work.data_ptr<scalar_t>(), lwork, &info);
49+
lapackEig<scalar_t, value_t>('N', jobvr, n, self_data, n, wr,
50+
nullptr, 1, vecs_data, ldvr, work.data_ptr<scalar_t>(), lwork, rwork_data, &info);
4151
*info_ptr = info;
4252
}
4353
#endif
@@ -55,13 +65,23 @@ std::tuple<Tensor, Tensor> eig_kernel_impl(const Tensor& self, bool& eigenvector
5565
self_.copy_(self);
5666

5767
auto options = self.options().memory_format(LEGACY_CONTIGUOUS_MEMORY_FORMAT);
58-
Tensor vals_ = at::empty_strided({n, 2}, {1, n}, options);
68+
69+
// the API is slightly different for the complex vs real case: if the input
70+
// is complex, eigenvals will be a vector of complex. If the input is real,
71+
// eigenvals will be a (n, 2) matrix containing the real and imaginary parts
72+
// in each column
73+
Tensor vals_;
74+
if (self.is_complex()) {
75+
vals_ = at::empty({n}, options);
76+
} else {
77+
vals_ = at::empty_strided({n, 2}, {1, n}, options);
78+
}
5979
Tensor vecs_ = eigenvectors
6080
? at::empty_strided({n, n}, {1, n}, options)
6181
: Tensor();
6282

6383
int64_t info;
64-
AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "eig_cpu", [&]{
84+
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(self.scalar_type(), "eig_cpu", [&]{
6585
apply_eig<scalar_t>(self_, eigenvectors, vals_, vecs_, &info);
6686
});
6787
singleCheckErrors(info, "eig_cpu");

aten/src/ATen/native/LinearAlgebra.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,16 +147,14 @@ Tensor linalg_pinv(const Tensor& input, const Tensor& rcond, bool hermitian) {
147147

148148
// If not Hermitian use singular value decomposition, else use eigenvalue decomposition
149149
if (!hermitian) {
150-
// until https://github.com/pytorch/pytorch/issues/45821 is resolved
151-
// svd() returns conjugated V for complex-valued input
152-
Tensor U, S, V_conj;
150+
Tensor U, S, V;
153151
// TODO: replace input.svd with linalg_svd
154-
std::tie(U, S, V_conj) = input.svd();
152+
// using linalg_svd breaks pytorch/xla, see https://github.com/pytorch/xla/issues/2755
153+
std::tie(U, S, V) = input.svd();
155154
Tensor max_val = at::narrow(S, /*dim=*/-1, /*start=*/0, /*length=*/1); // singular values are sorted in descending order
156155
Tensor S_pseudoinv = at::where(S > (rcond.unsqueeze(-1) * max_val), S.reciprocal(), at::zeros({}, S.options())).to(input.dtype());
157-
// computes V @ diag(S_pseudoinv) @ U.T.conj()
158-
// TODO: replace V_conj.conj() -> V once https://github.com/pytorch/pytorch/issues/45821 is resolved
159-
return at::matmul(V_conj.conj() * S_pseudoinv.unsqueeze(-2), U.conj().transpose(-2, -1));
156+
// computes V @ diag(S_pseudoinv) @ U.conj().T
157+
return at::matmul(V * S_pseudoinv.unsqueeze(-2), U.conj().transpose(-2, -1));
160158
} else {
161159
Tensor S, U;
162160
std::tie(S, U) = at::linalg_eigh(input);

0 commit comments

Comments
 (0)