Skip to content

Commit 059f627

Browse files
kelnosjothanplietar
authored andcommitted
Teach rust core about Xtensa VaListImpl and add a custom lowering of vaarg for xtensa.
LLVM does not include an implementation of the va_arg instruction for Xtensa. From what I understand, this is a conscious decision and instead language frontends are encouraged to implement it themselves. The rationale seems to be that loading values correctly requires language and ABI-specific knowledge that LLVM lacks. This is true of most architectures, and rustc already provides implementation for a number of them. This commit extends the support to include Xtensa. See https://lists.llvm.org/pipermail/llvm-dev/2017-August/116337.html for some discussion on the topic. Unfortunately there does not seem to be a reference document for the semantics of the va_list and va_arg on Xtensa. The most reliable source is the GCC implementation, which this commit tries to follow. Clang also provides its own compatible implementation. This was tested for all the types that rustc allows in variadics. Co-authored-by: Brian Tarricone <[email protected]> Co-authored-by: Jonathan Bastien-Filiatrault <[email protected]> Co-authored-by: Paul Lietar <[email protected]>
1 parent efdd9e8 commit 059f627

File tree

3 files changed

+131
-2
lines changed

3 files changed

+131
-2
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

+111-2
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,23 @@ use crate::type_::Type;
1010
use crate::type_of::LayoutLlvmExt;
1111
use crate::value::Value;
1212

13+
fn round_up_to_alignment<'ll>(
14+
bx: &mut Builder<'_, 'll, '_>,
15+
mut value: &'ll Value,
16+
align: Align,
17+
) -> &'ll Value {
18+
value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
19+
return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
20+
}
21+
1322
fn round_pointer_up_to_alignment<'ll>(
1423
bx: &mut Builder<'_, 'll, '_>,
1524
addr: &'ll Value,
1625
align: Align,
1726
ptr_ty: &'ll Type,
1827
) -> &'ll Value {
1928
let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
20-
ptr_as_int = bx.add(ptr_as_int, bx.cx().const_i32(align.bytes() as i32 - 1));
21-
ptr_as_int = bx.and(ptr_as_int, bx.cx().const_i32(-(align.bytes() as i32)));
29+
ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
2230
bx.inttoptr(ptr_as_int, ptr_ty)
2331
}
2432

@@ -270,6 +278,106 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
270278
bx.load(val_type, val_addr, layout.align.abi)
271279
}
272280

281+
fn emit_xtensa_va_arg<'ll, 'tcx>(
282+
bx: &mut Builder<'_, 'll, 'tcx>,
283+
list: OperandRef<'tcx, &'ll Value>,
284+
target_ty: Ty<'tcx>,
285+
) -> &'ll Value {
286+
// Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
287+
// this, other than "what GCC does".
288+
//
289+
// The va_list type has three fields:
290+
// struct __va_list_tag {
291+
// int32_t *va_stk; // Arguments passed on the stack
292+
// int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
293+
// int32_t va_ndx; // Offset into the arguments, in bytes
294+
// };
295+
//
296+
// The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
297+
// Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
298+
// otherwise it must come from va_stk.
299+
//
300+
// Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
301+
// primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
302+
let va_list_addr = list.immediate();
303+
// FIXME: handle multi-field structs that split across regsave/stack?
304+
let layout = bx.cx.layout_of(target_ty);
305+
let from_stack = bx.append_sibling_block("va_arg.from_stack");
306+
let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
307+
let end = bx.append_sibling_block("va_arg.end");
308+
309+
// (*va).va_ndx
310+
let va_reg_offset = 4;
311+
let va_ndx_offset = va_reg_offset + 4;
312+
let offset_ptr =
313+
bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
314+
315+
let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
316+
let offset = round_up_to_alignment(bx, offset, layout.align.abi);
317+
318+
let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
319+
320+
// Update the offset in va_list, by adding the slot's size.
321+
let offset_next = bx.add(offset, bx.const_i32(slot_size));
322+
323+
// Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
324+
// If that is within the regsave area, then load from there. Otherwise load from the stack area.
325+
let regsave_size = bx.const_i32(24);
326+
let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
327+
bx.cond_br(use_regsave, from_regsave, from_stack);
328+
329+
bx.switch_to_block(from_regsave);
330+
// update va_ndx
331+
bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
332+
333+
// (*va).va_reg
334+
let regsave_area_ptr =
335+
bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
336+
let regsave_area =
337+
bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
338+
let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
339+
bx.br(end);
340+
341+
bx.switch_to_block(from_stack);
342+
343+
// The first time we switch from regsave to stack we needs to adjust our offsets a bit.
344+
// va_stk is set up such that the first stack argument is always at va_stk + 32.
345+
// The corrected offset is written back into the va_list struct.
346+
347+
// let offset_corrected = cmp::max(offset, 32);
348+
let stack_offset_start = bx.const_i32(32);
349+
let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
350+
let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
351+
352+
// let offset_next_corrected = offset_corrected + slot_size;
353+
// va_ndx = offset_next_corrected;
354+
let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
355+
// update va_ndx
356+
bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
357+
358+
// let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
359+
let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
360+
let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
361+
let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
362+
bx.br(end);
363+
364+
bx.switch_to_block(end);
365+
366+
// On big-endian, for values smaller than the slot size we'd have to align the read to the end
367+
// of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
368+
// targets supported by rustc are litte-endian so don't worry about it.
369+
370+
// if from_regsave {
371+
// unsafe { *regsave_value_ptr }
372+
// } else {
373+
// unsafe { *stack_value_ptr }
374+
// }
375+
assert!(bx.tcx().sess.target.endian == Endian::Little);
376+
let value_ptr =
377+
bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
378+
return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
379+
}
380+
273381
pub(super) fn emit_va_arg<'ll, 'tcx>(
274382
bx: &mut Builder<'_, 'll, 'tcx>,
275383
addr: OperandRef<'tcx, &'ll Value>,
@@ -302,6 +410,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
302410
let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
303411
emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
304412
}
413+
"xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
305414
// For all other architecture/OS combinations fall back to using
306415
// the LLVM va_arg instruction.
307416
// https://llvm.org/docs/LangRef.html#va-arg-instruction

library/core/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ check-cfg = [
4343
'cfg(bootstrap)',
4444
'cfg(no_fp_fmt_parse)',
4545
'cfg(stdarch_intel_sde)',
46+
'cfg(target_arch, values("xtensa"))',
4647
# core use #[path] imports to portable-simd `core_simd` crate
4748
# and to stdarch `core_arch` crate which messes-up with Cargo list
4849
# of declared features, we therefor expect any feature cfg

library/core/src/ffi/va_list.rs

+19
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::ops::{Deref, DerefMut};
1515
not(target_arch = "aarch64"),
1616
not(target_arch = "powerpc"),
1717
not(target_arch = "s390x"),
18+
not(target_arch = "xtensa"),
1819
not(target_arch = "x86_64")
1920
),
2021
all(target_arch = "aarch64", target_vendor = "apple"),
@@ -37,6 +38,7 @@ pub struct VaListImpl<'f> {
3738
not(target_arch = "aarch64"),
3839
not(target_arch = "powerpc"),
3940
not(target_arch = "s390x"),
41+
not(target_arch = "xtensa"),
4042
not(target_arch = "x86_64")
4143
),
4244
all(target_arch = "aarch64", target_vendor = "apple"),
@@ -113,6 +115,18 @@ pub struct VaListImpl<'f> {
113115
_marker: PhantomData<&'f mut &'f c_void>,
114116
}
115117

118+
/// Xtensa ABI implementation of a `va_list`.
119+
#[cfg(target_arch = "xtensa")]
120+
#[repr(C)]
121+
#[derive(Debug)]
122+
#[lang = "va_list"]
123+
pub struct VaListImpl<'f> {
124+
stk: *mut i32,
125+
reg: *mut i32,
126+
ndx: i32,
127+
_marker: PhantomData<&'f mut &'f c_void>,
128+
}
129+
116130
/// A wrapper for a `va_list`
117131
#[repr(transparent)]
118132
#[derive(Debug)]
@@ -124,6 +138,7 @@ pub struct VaList<'a, 'f: 'a> {
124138
not(target_arch = "s390x"),
125139
not(target_arch = "x86_64")
126140
),
141+
target_arch = "xtensa",
127142
all(target_arch = "aarch64", target_vendor = "apple"),
128143
target_family = "wasm",
129144
target_os = "uefi",
@@ -138,6 +153,7 @@ pub struct VaList<'a, 'f: 'a> {
138153
target_arch = "s390x",
139154
target_arch = "x86_64"
140155
),
156+
not(target_arch = "xtensa"),
141157
any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
142158
not(target_family = "wasm"),
143159
not(target_os = "uefi"),
@@ -155,6 +171,7 @@ pub struct VaList<'a, 'f: 'a> {
155171
not(target_arch = "s390x"),
156172
not(target_arch = "x86_64")
157173
),
174+
target_arch = "xtensa",
158175
all(target_arch = "aarch64", target_vendor = "apple"),
159176
target_family = "wasm",
160177
target_os = "uefi",
@@ -173,8 +190,10 @@ impl<'f> VaListImpl<'f> {
173190
target_arch = "aarch64",
174191
target_arch = "powerpc",
175192
target_arch = "s390x",
193+
target_arch = "xtensa",
176194
target_arch = "x86_64"
177195
),
196+
not(target_arch = "xtensa"),
178197
any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
179198
not(target_family = "wasm"),
180199
not(target_os = "uefi"),

0 commit comments

Comments
 (0)