Skip to content

Commit ab23ff3

Browse files
ngzhianCommit Bot
authored andcommitted
[ia32][wasm-simd] Fix aligned moves in codegen
For SIMD instructions that use aligned moves (like movaps or movapd), we don't have correct memory alignment for SIMD moves yet. Switch to to movupd. Bug: v8:9198 Bug: v8:10831 Change-Id: Ic60fba5d08dda9676f6091ce505ac7be54957d00 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2380240 Commit-Queue: Zhi An Ng <[email protected]> Reviewed-by: Bill Budge <[email protected]> Cr-Commit-Position: refs/heads/master@{#69613}
1 parent c44efad commit ab23ff3

7 files changed

Lines changed: 74 additions & 4 deletions

File tree

src/codegen/ia32/assembler-ia32.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
963963
void movapd(XMMRegister dst, Operand src) {
964964
sse2_instr(dst, src, 0x66, 0x0F, 0x28);
965965
}
966+
void movupd(XMMRegister dst, Operand src) {
967+
sse2_instr(dst, src, 0x66, 0x0F, 0x10);
968+
}
966969

967970
void movmskpd(Register dst, XMMRegister src);
968971
void movmskps(Register dst, XMMRegister src);
@@ -1338,6 +1341,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
13381341
void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
13391342
void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
13401343
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
1344+
void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
13411345
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
13421346
vshufps(dst, src1, Operand(src2), imm8);
13431347
}

src/codegen/ia32/macro-assembler-ia32.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
294294
AVX_OP2_WITH_TYPE(Movaps, movaps, XMMRegister, XMMRegister)
295295
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, XMMRegister)
296296
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, const Operand&)
297+
AVX_OP2_WITH_TYPE(Movupd, movupd, XMMRegister, const Operand&)
297298
AVX_OP2_WITH_TYPE(Pmovmskb, pmovmskb, Register, XMMRegister)
298299
AVX_OP2_WITH_TYPE(Movmskps, movmskps, Register, XMMRegister)
299300

src/compiler/backend/ia32/code-generator-ia32.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,7 +1968,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
19681968
tmp = i.TempSimd128Register(0);
19691969
// The minpd instruction doesn't propagate NaNs and +0's in its first
19701970
// operand. Perform minpd in both orders, merge the resuls, and adjust.
1971-
__ Movapd(tmp, src1);
1971+
__ Movupd(tmp, src1);
19721972
__ Minpd(tmp, tmp, src);
19731973
__ Minpd(dst, src, src1);
19741974
// propagate -0's and NaNs, which may be non-canonical.
@@ -1987,7 +1987,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
19871987
tmp = i.TempSimd128Register(0);
19881988
// The maxpd instruction doesn't propagate NaNs and +0's in its first
19891989
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
1990-
__ Movapd(tmp, src1);
1990+
__ Movupd(tmp, src1);
19911991
__ Maxpd(tmp, tmp, src);
19921992
__ Maxpd(dst, src, src1);
19931993
// Find discrepancies.
@@ -2383,7 +2383,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
23832383
XMMRegister dst = i.OutputSimd128Register();
23842384
Operand src1 = i.InputOperand(1);
23852385
// See comment above for correction of maxps.
2386-
__ movaps(kScratchDoubleReg, src1);
2386+
__ vmovups(kScratchDoubleReg, src1);
23872387
__ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst);
23882388
__ vmaxps(dst, dst, src1);
23892389
__ vxorps(dst, dst, kScratchDoubleReg);

src/diagnostics/ia32/disasm-ia32.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
11731173
int mod, regop, rm, vvvv = vex_vreg();
11741174
get_modrm(*current, &mod, &regop, &rm);
11751175
switch (opcode) {
1176+
case 0x10:
1177+
AppendToBuffer("vmovupd %s,", NameOfXMMRegister(regop));
1178+
current += PrintRightXMMOperand(current);
1179+
break;
11761180
case 0x28:
11771181
AppendToBuffer("vmovapd %s,", NameOfXMMRegister(regop));
11781182
current += PrintRightXMMOperand(current);
@@ -2108,7 +2112,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
21082112
data += 2;
21092113
} else if (*data == 0x0F) {
21102114
data++;
2111-
if (*data == 0x28) {
2115+
if (*data == 0x10) {
2116+
data++;
2117+
int mod, regop, rm;
2118+
get_modrm(*data, &mod, &regop, &rm);
2119+
AppendToBuffer("movupd %s,", NameOfXMMRegister(regop));
2120+
data += PrintRightXMMOperand(data);
2121+
} else if (*data == 0x28) {
21122122
data++;
21132123
int mod, regop, rm;
21142124
get_modrm(*data, &mod, &regop, &rm);

test/cctest/test-disasm-ia32.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ TEST(DisasmIa320) {
473473

474474
__ movapd(xmm0, xmm1);
475475
__ movapd(xmm0, Operand(edx, 4));
476+
__ movupd(xmm0, Operand(edx, 4));
476477

477478
__ movd(xmm0, edi);
478479
__ movd(xmm0, Operand(ebx, ecx, times_4, 10000));
@@ -689,6 +690,7 @@ TEST(DisasmIa320) {
689690
__ vmovaps(xmm0, xmm1);
690691
__ vmovapd(xmm0, xmm1);
691692
__ vmovapd(xmm0, Operand(ebx, ecx, times_4, 10000));
693+
__ vmovupd(xmm0, Operand(ebx, ecx, times_4, 10000));
692694
__ vshufps(xmm0, xmm1, xmm2, 3);
693695
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
694696
__ vhaddps(xmm0, xmm1, xmm2);

test/mjsunit/mjsunit.status

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,4 +1443,10 @@
14431443
'compiler/serializer-transition-propagation': [SKIP],
14441444
}], # variant == nci or variant == nci_as_midtier
14451445

1446+
['nosse41', {
1447+
# Requires scalar lowering for 64x2 SIMD instructions, which are not
1448+
# implemented yet.
1449+
'regress/wasm/regress-10831': [SKIP],
1450+
}], # nosse3
1451+
14461452
]
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Copyright 2020 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
// Flags: --experimental-wasm-simd
6+
7+
load('test/mjsunit/wasm/wasm-module-builder.js');
8+
9+
// This test is shrunk from a test case provided at https://crbug.com/v8/10831.
10+
// This exercises a aligned-load bug in ia32. Some SIMD operations were using
11+
// instructions that required aligned operands (like movaps and movapd), but we
12+
// don't have the right memory alignment yet, see https://crbug.com/v8/9198,
13+
// resulting in a SIGSEGV when running the generated code.
14+
const builder = new WasmModuleBuilder();
15+
builder.addType(makeSig([], [kWasmI32]));
16+
// Generate function 1 (out of 1).
17+
builder.addFunction(undefined, 0 /* sig */)
18+
.addBodyWithEnd([
19+
// signature: i_v
20+
// body:
21+
kExprI32Const, 0xfc, 0xb6, 0xed, 0x02, // i32.const
22+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
23+
kExprI32Const, 0xfc, 0x00, // i32.const
24+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
25+
kSimdPrefix, kExprI64x2Sub, 0x01, // i64x2.sub
26+
kExprI32Const, 0x00, // i32.const
27+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
28+
kExprI32Const, 0x81, 0x96, 0xf0, 0xe3, 0x07, // i32.const
29+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
30+
kSimdPrefix, kExprF64x2Max, 0x01, // f64x2.max
31+
kSimdPrefix, kExprI64x2Sub, 0x01, // i64x2.sub
32+
kExprI32Const, 0x00, // i32.const
33+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
34+
kExprI32Const, 0x00, // i32.const
35+
kExprI32Const, 0x0b, // i32.const
36+
kExprI32LtU, // i32.lt_u
37+
kSimdPrefix, kExprI8x16ReplaceLane, 0x00, // i8x16.replace_lane
38+
kExprI32Const, 0xfc, 0xf8, 0x01, // i32.const
39+
kSimdPrefix, kExprI8x16Splat, // i8x16.splat
40+
kSimdPrefix, kExprF64x2Max, 0x01, // f64x2.max
41+
kSimdPrefix, kExprI16x8MaxS, 0x01, // i16x8.max_s
42+
kSimdPrefix, kExprV8x16AllTrue, // v8x16.all_true
43+
kExprEnd, // end @70
44+
]);
45+
builder.addExport('main', 0);
46+
const instance = builder.instantiate();
47+
print(instance.exports.main());

0 commit comments

Comments
 (0)