Skip to content

Commit b8ff29c

Browse files
sparker-armV8 LUCI CQ
authored andcommitted
[compiler][arm64] Fix Dup and Shuffle recognition
The 'dup and shuffle' optimization worked on the assumption that, for swizzles, we could drop the second argument and replace it with the result of the dup. This would result in the dup producing the eight most significant bytes and the canonical shuffle would produce the other eight. However, because we are dealing with swizzles, it may be that the instruction used to implement the canonical shuffle doesn't read from two registers. In this case the dup is never read and so the top bytes are not what we expect. Bug: 455592080 Change-Id: Ic205b01dec1a82631b1de1fce2d98e82d9af3804 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7095236 Reviewed-by: Darius Mercadier <[email protected]> Commit-Queue: Darius Mercadier <[email protected]> Reviewed-by: Marja Hölttä <[email protected]> Cr-Commit-Position: refs/heads/main@{#103394}
1 parent 751d0ff commit b8ff29c

3 files changed

Lines changed: 31 additions & 28 deletions

File tree

src/compiler/backend/arm64/instruction-selector-arm64.cc

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5819,19 +5819,14 @@ void InstructionSelector::VisitI8x16Shuffle(OpIndex node) {
58195819
int lane_size = kBitsPerByte * kSimd128Size / lanes;
58205820
Emit(kArm64S128Dup | LaneSizeField::encode(lane_size), dup,
58215821
g.UseRegister(dup_input), g.UseImmediate(dup_index));
5822-
if (is_swizzle) {
5823-
Emit(shuffle_op, g.DefineAsRegister(node), g.UseRegister(input0), dup);
5824-
return;
5825-
} else {
5826-
// For non-swizzles, we first need to perform the shuffles with the two
5827-
// original inputs, into a temp register.
5828-
InstructionOperand temp = g.TempSimd128Register();
5829-
Emit(shuffle_op, temp, g.UseRegister(input0), g.UseRegister(input1));
5830-
// Then we need to move the dup result into the top 8 bytes.
5831-
Emit(kArm64S128MoveLane | LaneSizeField::encode(64),
5832-
g.DefineSameAsFirst(node), temp, dup, g.UseImmediate(1),
5833-
g.UseImmediate(1));
5834-
}
5822+
// First need to perform the shuffles with the two original inputs, into a
5823+
// temp register.
5824+
InstructionOperand temp = g.TempSimd128Register();
5825+
Emit(shuffle_op, temp, g.UseRegister(input0), g.UseRegister(input1));
5826+
// Then we need to move the dup result into the top 8 bytes.
5827+
Emit(kArm64S128MoveLane | LaneSizeField::encode(64),
5828+
g.DefineSameAsFirst(node), temp, dup, g.UseImmediate(1),
5829+
g.UseImmediate(1));
58355830
};
58365831

58375832
std::array<uint8_t, kSimd128HalfSize> bottom_shuffle;

test/mjsunit/wasm/half-dup-shuffles.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,14 @@ function Test(config) {
5454

5555
(function SplatAndShuffleTest(config) {
5656
const dup_byte_0 = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ]
57+
const dup_half_0 = [0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01 ]
5758
const dup_half_8 = [0x10, 0x11, 0x10, 0x11, 0x10, 0x11, 0x10, 0x11 ]
5859
const dup_word_1 = [0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 ]
5960

6061
const even_bytes = [0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e ]
6162
const interleave_high_halves = [0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b ]
6263
const transpose_odd_words = [0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 ]
64+
const reverse_16x2 = [0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05 ]
6365

6466
const splat_and_shuffle_tests =[
6567
{
@@ -77,6 +79,11 @@ function Test(config) {
7779
dup_shuffle: dup_word_1,
7880
shuffle: transpose_odd_words,
7981
},
82+
{
83+
name: "dup and reverse 16x2",
84+
dup_shuffle: dup_half_0,
85+
shuffle: reverse_16x2,
86+
},
8087
];
8188

8289
for (const config of splat_and_shuffle_tests) {

test/unittests/compiler/arm64/turboshaft-instruction-selector-arm64-unittest.cc

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3627,23 +3627,23 @@ std::ostream& operator<<(std::ostream& os, const DupAndShuffleInst& inst) {
36273627
const DupAndShuffleInst kDupAndShuffles[] = {
36283628
{"Dup 0 and UnzipLeft",
36293629
kArm64S128UnzipLeft,
3630-
2,
3630+
3,
36313631
0,
36323632
16,
36333633
0,
36343634
true,
36353635
{{0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1}}},
36363636
{"Dup 1 and UnzipLeft",
36373637
kArm64S128UnzipLeft,
3638-
2,
3638+
3,
36393639
0,
36403640
16,
36413641
1,
36423642
true,
36433643
{{0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 2, 3, 2, 3, 2, 3}}},
36443644
{"Dup 0 and UnzipRight",
36453645
kArm64S128UnzipRight,
3646-
2,
3646+
3,
36473647
0,
36483648
16,
36493649
0,
@@ -3738,28 +3738,29 @@ TEST_P(TurboshaftInstructionSelectorDupAndShuffleTest, DupAndShuffle) {
37383738
Stream s = m.Build();
37393739
EXPECT_EQ(inst.expected_num_insts, s.size());
37403740

3741-
if (inst.expected_num_insts > 1) {
3741+
if (inst.expected_num_insts == 3) {
3742+
// The dup
37423743
EXPECT_EQ(kArm64S128Dup, s[0]->arch_opcode());
3744+
EXPECT_EQ(inst.lane_size, LaneSizeField::decode(s[0]->opcode()));
37433745
EXPECT_EQ(s.ToVreg(s[0]->InputAt(0)),
37443746
s.ToVreg(m.Parameter(inst.expected_param_index)));
37453747
EXPECT_EQ(s.ToInt32(s[0]->InputAt(1)), inst.index);
37463748

3747-
EXPECT_EQ(inst.lane_size, LaneSizeField::decode(s[0]->opcode()));
3749+
// The shuffle
37483750
EXPECT_EQ(inst.arch_opcode, s[1]->arch_opcode());
37493751
EXPECT_EQ(inst.lane_size, LaneSizeField::decode(s[1]->opcode()));
3750-
3751-
if (inst.expected_num_insts == 3) {
3752-
EXPECT_EQ(s.ToVreg(s[1]->InputAt(0)), s.ToVreg(m.Parameter(0)));
3753-
EXPECT_EQ(s.ToVreg(s[1]->InputAt(1)), s.ToVreg(m.Parameter(1)));
3754-
EXPECT_EQ(kArm64S128MoveLane, s[2]->arch_opcode());
3755-
EXPECT_EQ(1U, s[2]->OutputCount());
3752+
EXPECT_EQ(s.ToVreg(s[1]->InputAt(0)), s.ToVreg(m.Parameter(0)));
3753+
if (inst.is_swizzle) {
3754+
EXPECT_EQ(s.ToVreg(s[1]->InputAt(1)), s.ToVreg(m.Parameter(0)));
37563755
} else {
3757-
EXPECT_EQ(s.ToVreg(s[1]->InputAt(0)),
3758-
s.ToVreg(m.Parameter(inst.expected_param_index)));
3759-
EXPECT_EQ(s.ToVreg(s[1]->InputAt(1)), s.ToVreg(s[0]->Output()));
3760-
EXPECT_EQ(1U, s[1]->OutputCount());
3756+
EXPECT_EQ(s.ToVreg(s[1]->InputAt(1)), s.ToVreg(m.Parameter(1)));
37613757
}
3758+
3759+
// Copy the top half of the dup into the result register.
3760+
EXPECT_EQ(kArm64S128MoveLane, s[2]->arch_opcode());
3761+
EXPECT_EQ(1U, s[2]->OutputCount());
37623762
} else {
3763+
DCHECK_EQ(inst.expected_num_insts, 1);
37633764
EXPECT_EQ(inst.arch_opcode, s[0]->arch_opcode());
37643765
}
37653766
}

0 commit comments

Comments
 (0)