Skip to content

Commit bbd69ee

Browse files
authored
[TargetLowering] In prepareUREMEqFold/prepareSREMEqFold, fix K=-1 for i64 elements. (#188600)
K is an unsigned, it will be zero extended to uint64_t for the APInt constructor. If the ShSVT has more than 32 bits, we won't create an all ones ConstantSDNode. To fix this, explicitly push an all ones constant to KAmts. This also fixes an APInt ImplicitTrunc. This allows turnVectorIntoSplatVector to work for this case.
1 parent 797916b commit bbd69ee

File tree

3 files changed

+239
-10
lines changed

3 files changed

+239
-10
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7166,17 +7166,15 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
71667166
if (TautologicalLane) {
71677167
// Set P and K amount to a bogus values so we can try to splat them.
71687168
P = 0;
7169-
K = -1;
7169+
KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
71707170
// And ensure that comparison constant is tautological,
71717171
// it will always compare true/false.
71727172
Q.setAllBits();
7173+
} else {
7174+
KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
71737175
}
71747176

71757177
PAmts.push_back(DAG.getConstant(P, DL, SVT));
7176-
KAmts.push_back(
7177-
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7178-
/*implicitTrunc=*/true),
7179-
DL, ShSVT));
71807178
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
71817179
return true;
71827180
};
@@ -7437,18 +7435,16 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
74377435
// Set P, A and K to a bogus values so we can try to splat them.
74387436
P = 0;
74397437
A.setAllBits();
7440-
K = -1;
7438+
KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
74417439

74427440
// x ?% 1 == 0 <--> true <--> x u<= -1
74437441
Q.setAllBits();
7442+
} else {
7443+
KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
74447444
}
74457445

74467446
PAmts.push_back(DAG.getConstant(P, DL, SVT));
74477447
AAmts.push_back(DAG.getConstant(A, DL, SVT));
7448-
KAmts.push_back(
7449-
DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7450-
/*implicitTrunc=*/true),
7451-
DL, ShSVT));
74527448
QAmts.push_back(DAG.getConstant(Q, DL, SVT));
74537449
return true;
74547450
};

llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,3 +2489,122 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
24892489
%cmpres = and <32 x i1> %cmp4, %cmp
24902490
ret <32 x i1> %cmpres
24912491
}
2492+
2493+
; One one divisor in even divisor
2494+
define <2 x i64> @test_srem_even_one_i64(<2 x i64> %X) nounwind {
2495+
; CHECK-SSE2-LABEL: test_srem_even_one_i64:
2496+
; CHECK-SSE2: # %bb.0:
2497+
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2498+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2499+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2500+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2501+
; CHECK-SSE2-NEXT: psrlq $32, %xmm3
2502+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
2503+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
2504+
; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0
2505+
; CHECK-SSE2-NEXT: psllq $32, %xmm0
2506+
; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0
2507+
; CHECK-SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2508+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2509+
; CHECK-SSE2-NEXT: psllq $63, %xmm1
2510+
; CHECK-SSE2-NEXT: psrlq $1, %xmm0
2511+
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
2512+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2513+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2514+
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2515+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
2516+
; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2517+
; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
2518+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2519+
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
2520+
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2521+
; CHECK-SSE2-NEXT: retq
2522+
;
2523+
; CHECK-SSE41-LABEL: test_srem_even_one_i64:
2524+
; CHECK-SSE41: # %bb.0:
2525+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2526+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
2527+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm2
2528+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3
2529+
; CHECK-SSE41-NEXT: psrlq $32, %xmm3
2530+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
2531+
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
2532+
; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0
2533+
; CHECK-SSE41-NEXT: psllq $32, %xmm0
2534+
; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0
2535+
; CHECK-SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2536+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
2537+
; CHECK-SSE41-NEXT: psllq $63, %xmm1
2538+
; CHECK-SSE41-NEXT: psrlq $1, %xmm0
2539+
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2540+
; CHECK-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2541+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2542+
; CHECK-SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2543+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
2544+
; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2545+
; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
2546+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2547+
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2548+
; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2549+
; CHECK-SSE41-NEXT: retq
2550+
;
2551+
; CHECK-AVX1-LABEL: test_srem_even_one_i64:
2552+
; CHECK-AVX1: # %bb.0:
2553+
; CHECK-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2554+
; CHECK-AVX1-NEXT: # xmm1 = mem[0,0]
2555+
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2556+
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
2557+
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2558+
; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2559+
; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2560+
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2561+
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2562+
; CHECK-AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2563+
; CHECK-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2564+
; CHECK-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
2565+
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
2566+
; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2567+
; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2568+
; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2569+
; CHECK-AVX1-NEXT: retq
2570+
;
2571+
; CHECK-AVX2-LABEL: test_srem_even_one_i64:
2572+
; CHECK-AVX2: # %bb.0:
2573+
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2574+
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2575+
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
2576+
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2577+
; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2578+
; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2579+
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
2580+
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2581+
; CHECK-AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2582+
; CHECK-AVX2-NEXT: vpsllq $63, %xmm0, %xmm1
2583+
; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0
2584+
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
2585+
; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2586+
; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2587+
; CHECK-AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2588+
; CHECK-AVX2-NEXT: retq
2589+
;
2590+
; CHECK-AVX512VL-LABEL: test_srem_even_one_i64:
2591+
; CHECK-AVX512VL: # %bb.0:
2592+
; CHECK-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2593+
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2594+
; CHECK-AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
2595+
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2596+
; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
2597+
; CHECK-AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2598+
; CHECK-AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0
2599+
; CHECK-AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2600+
; CHECK-AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
2601+
; CHECK-AVX512VL-NEXT: vprorq $1, %xmm0, %xmm0
2602+
; CHECK-AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2603+
; CHECK-AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2604+
; CHECK-AVX512VL-NEXT: vpsrlq $63, %xmm0, %xmm0
2605+
; CHECK-AVX512VL-NEXT: retq
2606+
%srem = srem <2 x i64> %X, <i64 1, i64 14>
2607+
%cmp = icmp eq <2 x i64> %srem, <i64 0, i64 0>
2608+
%ret = zext <2 x i1> %cmp to <2 x i64>
2609+
ret <2 x i64> %ret
2610+
}

llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,3 +1962,117 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
19621962
%ret = zext <4 x i1> %cmp to <4 x i32>
19631963
ret <4 x i32> %ret
19641964
}
1965+
1966+
; One one divisor in odd divisor
1967+
define <2 x i64> @test_urem_even_one_i64(<2 x i64> %X) nounwind {
1968+
; CHECK-SSE2-LABEL: test_urem_even_one_i64:
1969+
; CHECK-SSE2: # %bb.0:
1970+
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
1971+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
1972+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1973+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
1974+
; CHECK-SSE2-NEXT: psrlq $32, %xmm3
1975+
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
1976+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
1977+
; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0
1978+
; CHECK-SSE2-NEXT: psllq $32, %xmm0
1979+
; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0
1980+
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
1981+
; CHECK-SSE2-NEXT: psllq $63, %xmm1
1982+
; CHECK-SSE2-NEXT: psrlq $1, %xmm0
1983+
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
1984+
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1985+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1986+
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1987+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
1988+
; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1989+
; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
1990+
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1991+
; CHECK-SSE2-NEXT: por %xmm1, %xmm0
1992+
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1993+
; CHECK-SSE2-NEXT: retq
1994+
;
1995+
; CHECK-SSE41-LABEL: test_urem_even_one_i64:
1996+
; CHECK-SSE41: # %bb.0:
1997+
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
1998+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
1999+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm2
2000+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3
2001+
; CHECK-SSE41-NEXT: psrlq $32, %xmm3
2002+
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
2003+
; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
2004+
; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0
2005+
; CHECK-SSE41-NEXT: psllq $32, %xmm0
2006+
; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0
2007+
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
2008+
; CHECK-SSE41-NEXT: psllq $63, %xmm1
2009+
; CHECK-SSE41-NEXT: psrlq $1, %xmm0
2010+
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2011+
; CHECK-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2012+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2013+
; CHECK-SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2014+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
2015+
; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2016+
; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
2017+
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2018+
; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2019+
; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2020+
; CHECK-SSE41-NEXT: retq
2021+
;
2022+
; CHECK-AVX1-LABEL: test_urem_even_one_i64:
2023+
; CHECK-AVX1: # %bb.0:
2024+
; CHECK-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2025+
; CHECK-AVX1-NEXT: # xmm1 = mem[0,0]
2026+
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2027+
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
2028+
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2029+
; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2030+
; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2031+
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2032+
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2033+
; CHECK-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2034+
; CHECK-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
2035+
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
2036+
; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2037+
; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2038+
; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2039+
; CHECK-AVX1-NEXT: retq
2040+
;
2041+
; CHECK-AVX2-LABEL: test_urem_even_one_i64:
2042+
; CHECK-AVX2: # %bb.0:
2043+
; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2044+
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2045+
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
2046+
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2047+
; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2048+
; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2049+
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
2050+
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2051+
; CHECK-AVX2-NEXT: vpsllq $63, %xmm0, %xmm1
2052+
; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0
2053+
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
2054+
; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2055+
; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2056+
; CHECK-AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2057+
; CHECK-AVX2-NEXT: retq
2058+
;
2059+
; CHECK-AVX512VL-LABEL: test_urem_even_one_i64:
2060+
; CHECK-AVX512VL: # %bb.0:
2061+
; CHECK-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2062+
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2063+
; CHECK-AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
2064+
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2065+
; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
2066+
; CHECK-AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2067+
; CHECK-AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0
2068+
; CHECK-AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2069+
; CHECK-AVX512VL-NEXT: vprorq $1, %xmm0, %xmm0
2070+
; CHECK-AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2071+
; CHECK-AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2072+
; CHECK-AVX512VL-NEXT: vpsrlq $63, %xmm0, %xmm0
2073+
; CHECK-AVX512VL-NEXT: retq
2074+
%urem = urem <2 x i64> %X, <i64 14, i64 1>
2075+
%cmp = icmp eq <2 x i64> %urem, <i64 0, i64 0>
2076+
%ret = zext <2 x i1> %cmp to <2 x i64>
2077+
ret <2 x i64> %ret
2078+
}

0 commit comments

Comments
 (0)