Skip to content

Commit 84e305d

Browse files
committed
Stop emitting non-power-of-two vectors in basic LLVM codegen
1 parent ac89e16 commit 84e305d

File tree

3 files changed

+43
-15
lines changed

3 files changed

+43
-15
lines changed

compiler/rustc_codegen_llvm/src/type_of.rs

+4
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,11 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
405405

406406
// Vectors, even for non-power-of-two sizes, have the same layout as
407407
// arrays but don't count as aggregate types
408+
// While LLVM theoretically supports non-power-of-two sizes, and they
409+
// often work fine, sometimes x86-isel deals with them horribly
410+
// (see #115212) so for now only use power-of-two ones.
408411
if let FieldsShape::Array { count, .. } = self.layout.fields()
412+
&& count.is_power_of_two()
409413
&& let element = self.field(cx, 0)
410414
&& element.ty.is_integral()
411415
{

tests/codegen/mem-replace-simple-type.rs

+15-6
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,21 @@ pub fn replace_ref_str<'a>(r: &mut &'a str, v: &'a str) -> &'a str {
3333
}
3434

3535
#[no_mangle]
36-
// CHECK-LABEL: @replace_short_array(
37-
pub fn replace_short_array(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
36+
// CHECK-LABEL: @replace_short_array_3(
37+
pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
3838
// CHECK-NOT: alloca
39-
// CHECK: %[[R:.+]] = load <3 x i32>, ptr %r, align 4
40-
// CHECK: store <3 x i32> %[[R]], ptr %result
41-
// CHECK: %[[V:.+]] = load <3 x i32>, ptr %v, align 4
42-
// CHECK: store <3 x i32> %[[V]], ptr %r
39+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 12, i1 false)
40+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 12, i1 false)
41+
std::mem::replace(r, v)
42+
}
43+
44+
#[no_mangle]
45+
// CHECK-LABEL: @replace_short_array_4(
46+
pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] {
47+
// CHECK-NOT: alloca
48+
// CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4
49+
// CHECK: store <4 x i32> %[[R]], ptr %result
50+
// CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4
51+
// CHECK: store <4 x i32> %[[V]], ptr %r
4352
std::mem::replace(r, v)
4453
}

tests/codegen/swap-small-types.rs

+24-9
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ type RGB48 = [u16; 3];
1111
// CHECK-LABEL: @swap_rgb48_manually(
1212
#[no_mangle]
1313
pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
14-
// CHECK-NOT: alloca
15-
// CHECK: %[[TEMP0:.+]] = load <3 x i16>, ptr %x, align 2
16-
// CHECK: %[[TEMP1:.+]] = load <3 x i16>, ptr %y, align 2
17-
// CHECK: store <3 x i16> %[[TEMP1]], ptr %x, align 2
18-
// CHECK: store <3 x i16> %[[TEMP0]], ptr %y, align 2
14+
// FIXME: See #115212 for why this has an alloca again
15+
16+
// CHECK: alloca [3 x i16], align 2
17+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
18+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
19+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
1920

2021
let temp = *x;
2122
*x = *y;
@@ -25,11 +26,25 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
2526
// CHECK-LABEL: @swap_rgb48
2627
#[no_mangle]
2728
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
29+
// FIXME: See #115212 for why this has an alloca again
30+
31+
// CHECK: alloca [3 x i16], align 2
32+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
33+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
34+
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
35+
swap(x, y)
36+
}
37+
38+
type RGBA64 = [u16; 4];
39+
40+
// CHECK-LABEL: @swap_rgba64
41+
#[no_mangle]
42+
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
2843
// CHECK-NOT: alloca
29-
// CHECK: load <3 x i16>
30-
// CHECK: load <3 x i16>
31-
// CHECK: store <3 x i16>
32-
// CHECK: store <3 x i16>
44+
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
45+
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
46+
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
47+
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
3348
swap(x, y)
3449
}
3550

0 commit comments

Comments
 (0)