Skip to content

Commit 6a4b51a

Browse files
jianyuhfacebook-github-bot
authored andcommitted
Add the intra-op parallelism for equal operator (#28810)
Summary: Pull Request resolved: #28810 Similar to #28464 and #28477, we would like to enable the intra-op parallelism for layer norm. This will be mapped to the parallel performance win for the BERT/RoBERTa model. Test Plan: CI Differential Revision: D18165752 fbshipit-source-id: 354cede4c36893acbd69711f49aa6a51dc94397f
1 parent 9ae6fd2 commit 6a4b51a

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

aten/src/TH/generic/THTensorMoreMath.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ptrdiff_t THTensor_(numel)(THTensor *t)
1919

2020
int THTensor_(equal)(THTensor *ta, THTensor* tb)
2121
{
22-
int equal = 1;
22+
std::atomic<int> equal{1};
2323
if(!THTensor_(isSameSizeAs)(ta, tb))
2424
return 0;
2525

@@ -28,9 +28,21 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb)
2828
scalar_t *tbp = tb->data<scalar_t>();
2929
ptrdiff_t sz = THTensor_(nElement)(ta);
3030
ptrdiff_t i;
31-
for (i=0; i<sz; ++i){
32-
if(tap[i] != tbp[i]) return 0;
33-
}
31+
at::parallel_for(
32+
0,
33+
sz,
34+
TH_OMP_OVERHEAD_THRESHOLD,
35+
[&](int64_t begin, int64_t end) {
36+
for (auto iter = begin; iter < end; iter++) {
37+
if (!equal) {
38+
break;
39+
}
40+
if (tap[iter] != tbp[iter]) {
41+
equal = 0;
42+
break;
43+
}
44+
}
45+
});
3446
} else {
3547
// Short-circuit the apply function on inequality
3648
TH_TENSOR_APPLY2(scalar_t, ta, scalar_t, tb,
@@ -39,7 +51,7 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb)
3951
TH_TENSOR_APPLY_hasFinished = 1; break;
4052
})
4153
}
42-
return equal;
54+
return equal.load();
4355
}
4456

4557
// Helper function to be used in a reduction operation.

0 commit comments

Comments
 (0)