Skip to content

Commit 682b850

Browse files
committed
Add a bunch more X86 AVX2 instructions and their corresponding intrinsics.
llvm-svn: 143529
1 parent 8b867b4 commit 682b850

File tree

4 files changed

+584
-2
lines changed

4 files changed

+584
-2
lines changed

llvm/include/llvm/IntrinsicsX86.td

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,110 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
15251525
llvm_v16i16_ty], [IntrNoMem]>;
15261526
}
15271527

1528+
// Absolute value ops
1529+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1530+
def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
1531+
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
1532+
def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
1533+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
1534+
def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
1535+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
1536+
}
1537+
1538+
// Horizontal arithmetic ops
1539+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1540+
def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
1541+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1542+
llvm_v16i16_ty], [IntrNoMem]>;
1543+
def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
1544+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1545+
llvm_v8i32_ty], [IntrNoMem]>;
1546+
def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
1547+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1548+
llvm_v16i16_ty], [IntrNoMem]>;
1549+
def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
1550+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1551+
llvm_v16i16_ty], [IntrNoMem]>;
1552+
def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
1553+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1554+
llvm_v8i32_ty], [IntrNoMem]>;
1555+
def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
1556+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1557+
llvm_v16i16_ty], [IntrNoMem]>;
1558+
def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
1559+
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
1560+
llvm_v32i8_ty], [IntrNoMem]>;
1561+
}
1562+
1563+
// Sign ops
1564+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1565+
def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
1566+
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1567+
llvm_v32i8_ty], [IntrNoMem]>;
1568+
def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
1569+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1570+
llvm_v16i16_ty], [IntrNoMem]>;
1571+
def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
1572+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1573+
llvm_v8i32_ty], [IntrNoMem]>;
1574+
}
1575+
1576+
// Packed multiply high with round and scale
1577+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1578+
def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
1579+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1580+
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1581+
}
1582+
1583+
// Vector sign and zero extend
1584+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1585+
def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
1586+
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
1587+
[IntrNoMem]>;
1588+
def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
1589+
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
1590+
[IntrNoMem]>;
1591+
def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
1592+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
1593+
[IntrNoMem]>;
1594+
def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
1595+
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
1596+
[IntrNoMem]>;
1597+
def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
1598+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
1599+
[IntrNoMem]>;
1600+
def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
1601+
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
1602+
[IntrNoMem]>;
1603+
def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
1604+
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
1605+
[IntrNoMem]>;
1606+
def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
1607+
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
1608+
[IntrNoMem]>;
1609+
def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
1610+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
1611+
[IntrNoMem]>;
1612+
def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
1613+
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
1614+
[IntrNoMem]>;
1615+
def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
1616+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
1617+
[IntrNoMem]>;
1618+
def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
1619+
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
1620+
[IntrNoMem]>;
1621+
}
1622+
1623+
// Misc.
1624+
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1625+
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
1626+
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
1627+
def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
1628+
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1629+
llvm_v32i8_ty], [IntrNoMem]>;
1630+
}
1631+
15281632
//===----------------------------------------------------------------------===//
15291633
// MMX
15301634

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,11 +276,12 @@ def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
276276
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
277277

278278
// 256-bit memop pattern fragments
279-
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
280279
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
281280
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
282281
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
283282
def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
283+
def memopv16i16 : PatFrag<(ops node:$ptr), (v16i16 (memop node:$ptr))>;
284+
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
284285

285286
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
286287
// 16-byte boundary.
@@ -326,6 +327,8 @@ def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
326327
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
327328

328329
// 256-bit bitconvert pattern fragments
330+
def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
331+
def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
329332
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
330333
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
331334

0 commit comments

Comments
 (0)