Skip to content

Commit 9e29f7d

Browse files
authored
[X86] Remove extra MOV after widening atomic store (#197619)
This change adds patterns to optimize out an extra MOV present after widening the atomic store. Covers `<2 x i8>` (SSE4.1+), `<2 x i16>`, `<4 x i8>`, `<2 x i32>`, `<2 x float>`, `<4 x i16>`, `<2 x ptr addrspace(270)>`. Store-side counterpart to #148898. Stacked on top of #197618; and below of #197860.
1 parent 4b91251 commit 9e29f7d

6 files changed

Lines changed: 129 additions & 105 deletions

File tree

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,6 +2394,41 @@ def atomic_store_128 :
23942394
let MemoryVT = i128;
23952395
}
23962396

2397+
// Matches store or atomic_store, no alignment requirement.
2398+
def any_store : PatFrags<(ops node:$val, node:$ptr),
2399+
[(store node:$val, node:$ptr),
2400+
(atomic_store node:$val, node:$ptr)]>;
2401+
2402+
def any_store_8 : PatFrags<(ops node:$val, node:$ptr),
2403+
[(store node:$val, node:$ptr),
2404+
(atomic_store node:$val, node:$ptr)]> {
2405+
let MemoryVT = i8;
2406+
}
2407+
2408+
def any_store_16 : PatFrags<(ops node:$val, node:$ptr),
2409+
[(store node:$val, node:$ptr),
2410+
(atomic_store node:$val, node:$ptr)]> {
2411+
let MemoryVT = i16;
2412+
}
2413+
2414+
def any_store_32 : PatFrags<(ops node:$val, node:$ptr),
2415+
[(store node:$val, node:$ptr),
2416+
(atomic_store node:$val, node:$ptr)]> {
2417+
let MemoryVT = i32;
2418+
}
2419+
2420+
def any_store_64 : PatFrags<(ops node:$val, node:$ptr),
2421+
[(store node:$val, node:$ptr),
2422+
(atomic_store node:$val, node:$ptr)]> {
2423+
let MemoryVT = i64;
2424+
}
2425+
2426+
def any_store_128 : PatFrags<(ops node:$val, node:$ptr),
2427+
[(store node:$val, node:$ptr),
2428+
(atomic_store node:$val, node:$ptr)]> {
2429+
let MemoryVT = i128;
2430+
}
2431+
23972432
//===----------------------------------------------------------------------===//
23982433
// Selection DAG Pattern Support.
23992434
//

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2945,7 +2945,7 @@ bool X86::mayFoldIntoStore(SDValue Op) {
29452945
return false;
29462946
User = *User->user_begin();
29472947
}
2948-
return ISD::isNormalStore(User);
2948+
return ISD::isNormalStore(User) || User->getOpcode() == ISD::ATOMIC_STORE;
29492949
}
29502950

29512951
bool X86::mayFoldIntoZeroExtend(SDValue Op) {

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3869,8 +3869,8 @@ def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$s
38693869
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
38703870
(ins i32mem:$dst, VR128X:$src),
38713871
"vmovd\t{$src, $dst|$dst, $src}",
3872-
[(store (i32 (extractelt (v4i32 VR128X:$src),
3873-
(iPTR 0))), addr:$dst)]>,
3872+
[(any_store_32 (i32 (extractelt (v4i32 VR128X:$src),
3873+
(iPTR 0))), addr:$dst)]>,
38743874
EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
38753875
} // ExeDomain = SSEPackedInt
38763876

@@ -3893,8 +3893,8 @@ def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
38933893
def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
38943894
(ins i64mem:$dst, VR128X:$src),
38953895
"vmovq\t{$src, $dst|$dst, $src}",
3896-
[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3897-
addr:$dst)]>,
3896+
[(any_store_64 (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3897+
addr:$dst)]>,
38983898
EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
38993899
Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
39003900

@@ -11476,8 +11476,8 @@ multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
1147611476
def mri : AVX512Ii8<opc, MRMDestMem, (outs),
1147711477
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
1147811478
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11479-
[(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11480-
addr:$dst)]>,
11479+
[(any_store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11480+
addr:$dst)]>,
1148111481
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
1148211482
}
1148311483

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4206,8 +4206,8 @@ def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
42064206
def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs),
42074207
(ins i32mem:$dst, VR128:$src),
42084208
"movd\t{$src, $dst|$dst, $src}",
4209-
[(store (i32 (extractelt (v4i32 VR128:$src),
4210-
(iPTR 0))), addr:$dst)]>,
4209+
[(any_store_32 (i32 (extractelt (v4i32 VR128:$src),
4210+
(iPTR 0))), addr:$dst)]>,
42114211
VEX, Sched<[WriteVecStore]>;
42124212
def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
42134213
"movd\t{$src, $dst|$dst, $src}",
@@ -4216,8 +4216,8 @@ def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
42164216
Sched<[WriteVecMoveToGpr]>;
42174217
def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
42184218
"movd\t{$src, $dst|$dst, $src}",
4219-
[(store (i32 (extractelt (v4i32 VR128:$src),
4220-
(iPTR 0))), addr:$dst)]>,
4219+
[(any_store_32 (i32 (extractelt (v4i32 VR128:$src),
4220+
(iPTR 0))), addr:$dst)]>,
42214221
Sched<[WriteVecStore]>;
42224222
} // ExeDomain = SSEPackedInt
42234223

@@ -4346,13 +4346,13 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
43464346
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
43474347
def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
43484348
"movq\t{$src, $dst|$dst, $src}",
4349-
[(store (i64 (extractelt (v2i64 VR128:$src),
4350-
(iPTR 0))), addr:$dst)]>,
4349+
[(any_store_64 (i64 (extractelt (v2i64 VR128:$src),
4350+
(iPTR 0))), addr:$dst)]>,
43514351
VEX, WIG;
43524352
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
43534353
"movq\t{$src, $dst|$dst, $src}",
4354-
[(store (i64 (extractelt (v2i64 VR128:$src),
4355-
(iPTR 0))), addr:$dst)]>;
4354+
[(any_store_64 (i64 (extractelt (v2i64 VR128:$src),
4355+
(iPTR 0))), addr:$dst)]>;
43564356
} // ExeDomain, SchedRW
43574357

43584358
// For disassembler only
@@ -5280,8 +5280,8 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
52805280
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
52815281
!strconcat(OpcodeStr,
52825282
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5283-
[(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
5284-
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5283+
[(any_store_16 (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
5284+
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52855285
}
52865286

52875287
let Predicates = [HasAVX, NoBWI] in

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 47 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -353,30 +353,37 @@ define void @store_atomic_vec1_double_align(ptr %x, <1 x double> %v) nounwind {
353353
}
354354

355355
define void @store_atomic_vec2_i8(ptr %x, <2 x i8> %v) {
356-
; CHECK-SSE-O3-LABEL: store_atomic_vec2_i8:
357-
; CHECK-SSE-O3: # %bb.0:
358-
; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
359-
; CHECK-SSE-O3-NEXT: movw %ax, (%rdi)
360-
; CHECK-SSE-O3-NEXT: retq
356+
; CHECK-SSE2-O3-LABEL: store_atomic_vec2_i8:
357+
; CHECK-SSE2-O3: # %bb.0:
358+
; CHECK-SSE2-O3-NEXT: movd %xmm0, %eax
359+
; CHECK-SSE2-O3-NEXT: movw %ax, (%rdi)
360+
; CHECK-SSE2-O3-NEXT: retq
361+
;
362+
; CHECK-SSE4-O3-LABEL: store_atomic_vec2_i8:
363+
; CHECK-SSE4-O3: # %bb.0:
364+
; CHECK-SSE4-O3-NEXT: pextrw $0, %xmm0, (%rdi)
365+
; CHECK-SSE4-O3-NEXT: retq
361366
;
362367
; CHECK-AVX-O3-LABEL: store_atomic_vec2_i8:
363368
; CHECK-AVX-O3: # %bb.0:
364-
; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
365-
; CHECK-AVX-O3-NEXT: movw %ax, (%rdi)
369+
; CHECK-AVX-O3-NEXT: vpextrw $0, %xmm0, (%rdi)
366370
; CHECK-AVX-O3-NEXT: retq
367371
;
368-
; CHECK-SSE-O0-LABEL: store_atomic_vec2_i8:
369-
; CHECK-SSE-O0: # %bb.0:
370-
; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
371-
; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax
372-
; CHECK-SSE-O0-NEXT: movw %ax, (%rdi)
373-
; CHECK-SSE-O0-NEXT: retq
372+
; CHECK-SSE2-O0-LABEL: store_atomic_vec2_i8:
373+
; CHECK-SSE2-O0: # %bb.0:
374+
; CHECK-SSE2-O0-NEXT: movd %xmm0, %eax
375+
; CHECK-SSE2-O0-NEXT: # kill: def $ax killed $ax killed $eax
376+
; CHECK-SSE2-O0-NEXT: movw %ax, (%rdi)
377+
; CHECK-SSE2-O0-NEXT: retq
378+
;
379+
; CHECK-SSE4-O0-LABEL: store_atomic_vec2_i8:
380+
; CHECK-SSE4-O0: # %bb.0:
381+
; CHECK-SSE4-O0-NEXT: pextrw $0, %xmm0, (%rdi)
382+
; CHECK-SSE4-O0-NEXT: retq
374383
;
375384
; CHECK-AVX-O0-LABEL: store_atomic_vec2_i8:
376385
; CHECK-AVX-O0: # %bb.0:
377-
; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
378-
; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
379-
; CHECK-AVX-O0-NEXT: movw %ax, (%rdi)
386+
; CHECK-AVX-O0-NEXT: vpextrw $0, %xmm0, (%rdi)
380387
; CHECK-AVX-O0-NEXT: retq
381388
store atomic <2 x i8> %v, ptr %x release, align 4
382389
ret void
@@ -385,26 +392,22 @@ define void @store_atomic_vec2_i8(ptr %x, <2 x i8> %v) {
385392
define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) {
386393
; CHECK-SSE-O3-LABEL: store_atomic_vec2_i16:
387394
; CHECK-SSE-O3: # %bb.0:
388-
; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
389-
; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
395+
; CHECK-SSE-O3-NEXT: movss %xmm0, (%rdi)
390396
; CHECK-SSE-O3-NEXT: retq
391397
;
392398
; CHECK-AVX-O3-LABEL: store_atomic_vec2_i16:
393399
; CHECK-AVX-O3: # %bb.0:
394-
; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
395-
; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
400+
; CHECK-AVX-O3-NEXT: vmovss %xmm0, (%rdi)
396401
; CHECK-AVX-O3-NEXT: retq
397402
;
398403
; CHECK-SSE-O0-LABEL: store_atomic_vec2_i16:
399404
; CHECK-SSE-O0: # %bb.0:
400-
; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
401-
; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
405+
; CHECK-SSE-O0-NEXT: movd %xmm0, (%rdi)
402406
; CHECK-SSE-O0-NEXT: retq
403407
;
404408
; CHECK-AVX-O0-LABEL: store_atomic_vec2_i16:
405409
; CHECK-AVX-O0: # %bb.0:
406-
; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
407-
; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
410+
; CHECK-AVX-O0-NEXT: vmovd %xmm0, (%rdi)
408411
; CHECK-AVX-O0-NEXT: retq
409412
store atomic <2 x i16> %v, ptr %x release, align 4
410413
ret void
@@ -413,26 +416,22 @@ define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) {
413416
define void @store_atomic_vec2_ptr270(ptr %x, <2 x ptr addrspace(270)> %v) {
414417
; CHECK-SSE-O3-LABEL: store_atomic_vec2_ptr270:
415418
; CHECK-SSE-O3: # %bb.0:
416-
; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
417-
; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
419+
; CHECK-SSE-O3-NEXT: movlps %xmm0, (%rdi)
418420
; CHECK-SSE-O3-NEXT: retq
419421
;
420422
; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr270:
421423
; CHECK-AVX-O3: # %bb.0:
422-
; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
423-
; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
424+
; CHECK-AVX-O3-NEXT: vmovlps %xmm0, (%rdi)
424425
; CHECK-AVX-O3-NEXT: retq
425426
;
426427
; CHECK-SSE-O0-LABEL: store_atomic_vec2_ptr270:
427428
; CHECK-SSE-O0: # %bb.0:
428-
; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
429-
; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
429+
; CHECK-SSE-O0-NEXT: movq %xmm0, (%rdi)
430430
; CHECK-SSE-O0-NEXT: retq
431431
;
432432
; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr270:
433433
; CHECK-AVX-O0: # %bb.0:
434-
; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
435-
; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
434+
; CHECK-AVX-O0-NEXT: vmovq %xmm0, (%rdi)
436435
; CHECK-AVX-O0-NEXT: retq
437436
store atomic <2 x ptr addrspace(270)> %v, ptr %x release, align 8
438437
ret void
@@ -441,26 +440,22 @@ define void @store_atomic_vec2_ptr270(ptr %x, <2 x ptr addrspace(270)> %v) {
441440
define void @store_atomic_vec2_i32_align(ptr %x, <2 x i32> %v) {
442441
; CHECK-SSE-O3-LABEL: store_atomic_vec2_i32_align:
443442
; CHECK-SSE-O3: # %bb.0:
444-
; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
445-
; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
443+
; CHECK-SSE-O3-NEXT: movlps %xmm0, (%rdi)
446444
; CHECK-SSE-O3-NEXT: retq
447445
;
448446
; CHECK-AVX-O3-LABEL: store_atomic_vec2_i32_align:
449447
; CHECK-AVX-O3: # %bb.0:
450-
; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
451-
; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
448+
; CHECK-AVX-O3-NEXT: vmovlps %xmm0, (%rdi)
452449
; CHECK-AVX-O3-NEXT: retq
453450
;
454451
; CHECK-SSE-O0-LABEL: store_atomic_vec2_i32_align:
455452
; CHECK-SSE-O0: # %bb.0:
456-
; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
457-
; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
453+
; CHECK-SSE-O0-NEXT: movq %xmm0, (%rdi)
458454
; CHECK-SSE-O0-NEXT: retq
459455
;
460456
; CHECK-AVX-O0-LABEL: store_atomic_vec2_i32_align:
461457
; CHECK-AVX-O0: # %bb.0:
462-
; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
463-
; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
458+
; CHECK-AVX-O0-NEXT: vmovq %xmm0, (%rdi)
464459
; CHECK-AVX-O0-NEXT: retq
465460
store atomic <2 x i32> %v, ptr %x release, align 8
466461
ret void
@@ -469,26 +464,22 @@ define void @store_atomic_vec2_i32_align(ptr %x, <2 x i32> %v) {
469464
define void @store_atomic_vec2_float_align(ptr %x, <2 x float> %v) {
470465
; CHECK-SSE-O3-LABEL: store_atomic_vec2_float_align:
471466
; CHECK-SSE-O3: # %bb.0:
472-
; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
473-
; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
467+
; CHECK-SSE-O3-NEXT: movlps %xmm0, (%rdi)
474468
; CHECK-SSE-O3-NEXT: retq
475469
;
476470
; CHECK-AVX-O3-LABEL: store_atomic_vec2_float_align:
477471
; CHECK-AVX-O3: # %bb.0:
478-
; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
479-
; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
472+
; CHECK-AVX-O3-NEXT: vmovlps %xmm0, (%rdi)
480473
; CHECK-AVX-O3-NEXT: retq
481474
;
482475
; CHECK-SSE-O0-LABEL: store_atomic_vec2_float_align:
483476
; CHECK-SSE-O0: # %bb.0:
484-
; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
485-
; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
477+
; CHECK-SSE-O0-NEXT: movq %xmm0, (%rdi)
486478
; CHECK-SSE-O0-NEXT: retq
487479
;
488480
; CHECK-AVX-O0-LABEL: store_atomic_vec2_float_align:
489481
; CHECK-AVX-O0: # %bb.0:
490-
; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
491-
; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
482+
; CHECK-AVX-O0-NEXT: vmovq %xmm0, (%rdi)
492483
; CHECK-AVX-O0-NEXT: retq
493484
store atomic <2 x float> %v, ptr %x release, align 8
494485
ret void
@@ -497,26 +488,22 @@ define void @store_atomic_vec2_float_align(ptr %x, <2 x float> %v) {
497488
define void @store_atomic_vec4_i8(ptr %x, <4 x i8> %v) nounwind {
498489
; CHECK-SSE-O3-LABEL: store_atomic_vec4_i8:
499490
; CHECK-SSE-O3: # %bb.0:
500-
; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
501-
; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
491+
; CHECK-SSE-O3-NEXT: movss %xmm0, (%rdi)
502492
; CHECK-SSE-O3-NEXT: retq
503493
;
504494
; CHECK-AVX-O3-LABEL: store_atomic_vec4_i8:
505495
; CHECK-AVX-O3: # %bb.0:
506-
; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
507-
; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
496+
; CHECK-AVX-O3-NEXT: vmovss %xmm0, (%rdi)
508497
; CHECK-AVX-O3-NEXT: retq
509498
;
510499
; CHECK-SSE-O0-LABEL: store_atomic_vec4_i8:
511500
; CHECK-SSE-O0: # %bb.0:
512-
; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
513-
; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
501+
; CHECK-SSE-O0-NEXT: movd %xmm0, (%rdi)
514502
; CHECK-SSE-O0-NEXT: retq
515503
;
516504
; CHECK-AVX-O0-LABEL: store_atomic_vec4_i8:
517505
; CHECK-AVX-O0: # %bb.0:
518-
; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
519-
; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
506+
; CHECK-AVX-O0-NEXT: vmovd %xmm0, (%rdi)
520507
; CHECK-AVX-O0-NEXT: retq
521508
store atomic <4 x i8> %v, ptr %x release, align 4
522509
ret void
@@ -525,26 +512,22 @@ define void @store_atomic_vec4_i8(ptr %x, <4 x i8> %v) nounwind {
525512
define void @store_atomic_vec4_i16(ptr %x, <4 x i16> %v) nounwind {
526513
; CHECK-SSE-O3-LABEL: store_atomic_vec4_i16:
527514
; CHECK-SSE-O3: # %bb.0:
528-
; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
529-
; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
515+
; CHECK-SSE-O3-NEXT: movlps %xmm0, (%rdi)
530516
; CHECK-SSE-O3-NEXT: retq
531517
;
532518
; CHECK-AVX-O3-LABEL: store_atomic_vec4_i16:
533519
; CHECK-AVX-O3: # %bb.0:
534-
; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
535-
; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
520+
; CHECK-AVX-O3-NEXT: vmovlps %xmm0, (%rdi)
536521
; CHECK-AVX-O3-NEXT: retq
537522
;
538523
; CHECK-SSE-O0-LABEL: store_atomic_vec4_i16:
539524
; CHECK-SSE-O0: # %bb.0:
540-
; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
541-
; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
525+
; CHECK-SSE-O0-NEXT: movq %xmm0, (%rdi)
542526
; CHECK-SSE-O0-NEXT: retq
543527
;
544528
; CHECK-AVX-O0-LABEL: store_atomic_vec4_i16:
545529
; CHECK-AVX-O0: # %bb.0:
546-
; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
547-
; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
530+
; CHECK-AVX-O0-NEXT: vmovq %xmm0, (%rdi)
548531
; CHECK-AVX-O0-NEXT: retq
549532
store atomic <4 x i16> %v, ptr %x release, align 8
550533
ret void

0 commit comments

Comments
 (0)