@@ -2489,3 +2489,122 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
24892489 %cmpres = and <32 x i1 > %cmp4 , %cmp
24902490 ret <32 x i1 > %cmpres
24912491}
2492+
2493+ ; One one divisor in even divisor
2494+ define <2 x i64 > @test_srem_even_one_i64 (<2 x i64 > %X ) nounwind {
2495+ ; CHECK-SSE2-LABEL: test_srem_even_one_i64:
2496+ ; CHECK-SSE2: # %bb.0:
2497+ ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2498+ ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2499+ ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2500+ ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2501+ ; CHECK-SSE2-NEXT: psrlq $32, %xmm3
2502+ ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
2503+ ; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
2504+ ; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0
2505+ ; CHECK-SSE2-NEXT: psllq $32, %xmm0
2506+ ; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0
2507+ ; CHECK-SSE2-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2508+ ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2509+ ; CHECK-SSE2-NEXT: psllq $63, %xmm1
2510+ ; CHECK-SSE2-NEXT: psrlq $1, %xmm0
2511+ ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
2512+ ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2513+ ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2514+ ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2515+ ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
2516+ ; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2517+ ; CHECK-SSE2-NEXT: pand %xmm2, %xmm1
2518+ ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2519+ ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
2520+ ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2521+ ; CHECK-SSE2-NEXT: retq
2522+ ;
2523+ ; CHECK-SSE41-LABEL: test_srem_even_one_i64:
2524+ ; CHECK-SSE41: # %bb.0:
2525+ ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2526+ ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
2527+ ; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm2
2528+ ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3
2529+ ; CHECK-SSE41-NEXT: psrlq $32, %xmm3
2530+ ; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
2531+ ; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1840700269,1840700269]
2532+ ; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0
2533+ ; CHECK-SSE41-NEXT: psllq $32, %xmm0
2534+ ; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0
2535+ ; CHECK-SSE41-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2536+ ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
2537+ ; CHECK-SSE41-NEXT: psllq $63, %xmm1
2538+ ; CHECK-SSE41-NEXT: psrlq $1, %xmm0
2539+ ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2540+ ; CHECK-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2541+ ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2542+ ; CHECK-SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2543+ ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
2544+ ; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2545+ ; CHECK-SSE41-NEXT: pand %xmm2, %xmm1
2546+ ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2547+ ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
2548+ ; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2549+ ; CHECK-SSE41-NEXT: retq
2550+ ;
2551+ ; CHECK-AVX1-LABEL: test_srem_even_one_i64:
2552+ ; CHECK-AVX1: # %bb.0:
2553+ ; CHECK-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2554+ ; CHECK-AVX1-NEXT: # xmm1 = mem[0,0]
2555+ ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2556+ ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
2557+ ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2558+ ; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2559+ ; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2560+ ; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2561+ ; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2562+ ; CHECK-AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2563+ ; CHECK-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2564+ ; CHECK-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
2565+ ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
2566+ ; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2567+ ; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2568+ ; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2569+ ; CHECK-AVX1-NEXT: retq
2570+ ;
2571+ ; CHECK-AVX2-LABEL: test_srem_even_one_i64:
2572+ ; CHECK-AVX2: # %bb.0:
2573+ ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2574+ ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2575+ ; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
2576+ ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2577+ ; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1840700269,1840700269]
2578+ ; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2579+ ; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
2580+ ; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2581+ ; CHECK-AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2582+ ; CHECK-AVX2-NEXT: vpsllq $63, %xmm0, %xmm1
2583+ ; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0
2584+ ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
2585+ ; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2586+ ; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2587+ ; CHECK-AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2588+ ; CHECK-AVX2-NEXT: retq
2589+ ;
2590+ ; CHECK-AVX512VL-LABEL: test_srem_even_one_i64:
2591+ ; CHECK-AVX512VL: # %bb.0:
2592+ ; CHECK-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7905747460161236407,7905747460161236407]
2593+ ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
2594+ ; CHECK-AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
2595+ ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
2596+ ; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
2597+ ; CHECK-AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
2598+ ; CHECK-AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0
2599+ ; CHECK-AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
2600+ ; CHECK-AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
2601+ ; CHECK-AVX512VL-NEXT: vprorq $1, %xmm0, %xmm0
2602+ ; CHECK-AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
2603+ ; CHECK-AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2604+ ; CHECK-AVX512VL-NEXT: vpsrlq $63, %xmm0, %xmm0
2605+ ; CHECK-AVX512VL-NEXT: retq
2606+ %srem = srem <2 x i64 > %X , <i64 1 , i64 14 >
2607+ %cmp = icmp eq <2 x i64 > %srem , <i64 0 , i64 0 >
2608+ %ret = zext <2 x i1 > %cmp to <2 x i64 >
2609+ ret <2 x i64 > %ret
2610+ }
0 commit comments