[lldb] Use llvm::APInt for VariantMember Discriminants#188487
[lldb] Use llvm::APInt for VariantMember Discriminants#188487
llvm::APInt for VariantMember Discriminants#188487Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-lldb Author: Walnut (Walnut356) Changesresolves #177812 It currently uses a This patch simply swaps the Sample program: #[repr(u128)]
enum BigDiscr {
Value(u64),
None = 0x16151413121110090807060504030201,
}
fn main() {
let big_discr_none = BigDiscr::None;
let big_disr_some = BigDiscr::Value(31);
let some_string = Some(String::from("asdf"));
let some_empty_string = Some(String::new());
let none_string: Option<String> = Option::None;
println!("Hello world!"); // break
}New output without visualizers: (sample::BigDiscr) big_discr_none = {
$variants$ = {
$variant$0 = {
$discr$ = 29352461300415899028694309177919734273
value = (__0 = 0)
}
$variant$29352461300415899028694309177919734273 = ($discr$ = 29352461300415899028694309177919734273, value = sample::BigDiscr::None:128 @ 0x000000710a7ff7d0)
}
}
(sample::BigDiscr) big_disr_some = {
$variants$ = {
$variant$0 = {
$discr$ = 0
value = (__0 = 31)
}
$variant$29352461300415899028694309177919734273 = ($discr$ = 0, value = sample::BigDiscr::None:128 @ 0x000000710a7ff7f0)
}
}
(core::option::Option<alloc::string::String>) some_string = {
$variants$ = {
$variant$9223372036854775808 = ($discr$ = 4, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff750)
$variant$ = {
value = {
__0 = {
vec = {...}
}
}
}
}
}
(core::option::Option<alloc::string::String>) some_empty_string = {
$variants$ = {
$variant$9223372036854775808 = ($discr$ = 0, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff780)
$variant$ = {
value = {
__0 = {
vec = {...}
}
}
}
}
}
(core::option::Option<alloc::string::String>) none_string = {
$variants$ = {
$variant$9223372036854775808 = ($discr$ = 9223372036854775808, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff7b8)
$variant$ = {
value = {
__0 = {
vec = {...}
}
}
}
}
}New output with Rust's current visualizers: # this one is incorrect because the visualizer scripts are wrong, not because of LLDB.
# the scripts use `GetValue()`, which returns a 64 bit number. They can be modified
# on Rust's end to use `GetData()` instead. The rest work as expected
(sample::BigDiscr) big_discr_none = Value(0) {
0 = 0
}
(sample::BigDiscr) big_disr_some = Value(31) {
0 = 31
}
(core::option::Option<alloc::string::String>) some_string = Some("asdf") {
0 = "asdf" {
[0] = 'a'
[1] = 's'
[2] = 'd'
[3] = 'f'
}
}
(core::option::Option<alloc::string::String>) some_empty_string = Some("") {
0 = ""
}1 Files Affected:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index cb33fc21bfba9..72b1fcaa65174 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -13,6 +13,7 @@
#include "DWARFDebugInfo.h"
#include "DWARFDeclContext.h"
#include "DWARFDefines.h"
+#include "DWARFFormValue.h"
#include "SymbolFileDWARF.h"
#include "SymbolFileDWARFDebugMap.h"
#include "SymbolFileDWARFDwo.h"
@@ -2568,7 +2569,7 @@ struct VariantMember {
explicit VariantMember(DWARFDIE &die, ModuleSP module_sp);
bool IsDefault() const;
- std::optional<uint32_t> discr_value;
+ std::optional<llvm::APInt> discr_value;
DWARFFormValue type_ref;
ConstString variant_name;
uint32_t byte_offset;
@@ -2596,8 +2597,31 @@ bool VariantMember::IsDefault() const { return !discr_value; }
VariantMember::VariantMember(DWARFDIE &die, lldb::ModuleSP module_sp) {
assert(die.Tag() == llvm::dwarf::DW_TAG_variant);
- this->discr_value =
- die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value);
+
+ DWARFFormValue discr_form;
+ die.GetDIE()->GetAttributeValue(die.GetCU(), DW_AT_discr_value, discr_form);
+
+ // rust can output 128-bit discrs (e.g. NonNull<u128>) as `DW_FORM_block1`.
+ // there is a `data16`, but the DIE function treats it as a block anyway.
+ // Handling is included for it just in case rust's output changes to the
+ // `data16` version
+ dw_form_t form = discr_form.Form();
+ if ((form == DW_FORM_block1 && discr_form.Unsigned() == 16) ||
+ form == DW_FORM_data16) {
+ const uint8_t *block_data = discr_form.BlockData();
+ uint64_t len = discr_form.Unsigned();
+ llvm::ArrayRef<uint64_t> data =
+ llvm::ArrayRef(reinterpret_cast<const uint64_t *>(block_data), 2);
+ this->discr_value = llvm::APInt(sizeof(uint64_t) * 2 * 8, data);
+ } else {
+ auto result = die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value);
+ if (result.has_value()) {
+ this->discr_value =
+ llvm::APInt(sizeof(uint64_t) * 8, result.value(), false);
+ } else {
+ this->discr_value = std::nullopt;
+ };
+ }
for (auto child_die : die.children()) {
switch (child_die.Tag()) {
@@ -3834,9 +3858,14 @@ void DWARFASTParserClang::ParseRustVariantPart(
m_ast.CompleteTagDeclarationDefinition(field_type);
- auto name = has_discriminant
- ? llvm::formatv("$variant${0}", member.discr_value.value())
- : std::string("$variant$");
+ auto name = std::string("$variant$");
+ if (has_discriminant) {
+ // u128::MAX = 340282366920938463463374607431768211455 which is 39 digits
+ // long + 1 for null term
+ llvm::SmallString<40> discr_str{};
+ member.discr_value.value().toStringUnsigned(discr_str);
+ name.append(discr_str.c_str());
+ }
auto variant_decl = m_ast.AddFieldToRecordType(
inner_holder, llvm::StringRef(name), field_type, 0);
|
|
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
|
@JDevlieghere ping |
| // rust can output 128-bit discrs (e.g. NonNull<u128>) as `DW_FORM_block1`. | ||
| // there is a `data16`, but the DIE function treats it as a block anyway. | ||
| // Handling is included for it just in case rust's output changes to the | ||
| // `data16` version |
There was a problem hiding this comment.
| // rust can output 128-bit discrs (e.g. NonNull<u128>) as `DW_FORM_block1`. | |
| // there is a `data16`, but the DIE function treats it as a block anyway. | |
| // Handling is included for it just in case rust's output changes to the | |
| // `data16` version | |
| // Rust can output 128-bit discrs (e.g. NonNull<u128>) as `DW_FORM_block1`. | |
| // There is a `data16`, but the DIE function treats it as a block anyway. | |
| // Handling is included for it just in case rust's output changes to the | |
| // `data16` version. |
| auto name = std::string("$variant$"); | ||
| if (has_discriminant) { | ||
| // u128::MAX = 340282366920938463463374607431768211455 which is 39 digits | ||
| // long + 1 for null term |
There was a problem hiding this comment.
| // long + 1 for null term | |
| // long + 1 for null terminator. |
|
|
||
| llvm::ArrayRef<uint64_t> data = | ||
| llvm::ArrayRef(reinterpret_cast<const uint64_t *>(block_data), 2); | ||
| this->discr_value = llvm::APInt(sizeof(uint64_t) * 2 * 8, data); |
There was a problem hiding this comment.
Nit: maybe use 16 here, to match the 16 on line 2609 and DW_FORM_data16.
| this->discr_value = llvm::APInt(sizeof(uint64_t) * 2 * 8, data); | |
| this->discr_value = llvm::APInt(sizeof(uint64_t) * 16, data); |
| auto result = die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value); | ||
| if (result.has_value()) { |
There was a problem hiding this comment.
| auto result = die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value); | |
| if (result.has_value()) { | |
| if (auto result = die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value)) { |
| auto result = die.GetAttributeValueAsOptionalUnsigned(DW_AT_discr_value); | ||
| if (result.has_value()) { | ||
| this->discr_value = | ||
| llvm::APInt(sizeof(uint64_t) * 8, result.value(), false); |
There was a problem hiding this comment.
| llvm::APInt(sizeof(uint64_t) * 8, result.value(), false); | |
| llvm::APInt(sizeof(uint64_t) * 8, result.value(), /*implicitTrunc=*/false); |
| if (has_discriminant) { | ||
| // u128::MAX = 340282366920938463463374607431768211455 which is 39 digits | ||
| // long + 1 for null term | ||
| llvm::SmallString<40> discr_str{}; |
There was a problem hiding this comment.
| llvm::SmallString<40> discr_str{}; | |
| llvm::SmallString<40> discr_str; |
| const uint8_t *block_data = discr_form.BlockData(); | ||
|
|
||
| llvm::ArrayRef<uint64_t> data = | ||
| llvm::ArrayRef(reinterpret_cast<const uint64_t *>(block_data), 2); |
There was a problem hiding this comment.
This assumes the host and DWARF byte order match. I think you want something like:
DataExtractor extractor(block_data, 16, die.GetCU()->GetByteOrder(),
die.GetCU()->GetAddressByteSize());
lldb::offset_t offset = 0;
uint64_t lo = extractor.GetU64(&offset);
uint64_t hi = extractor.GetU64(&offset);
uint64_t words[] = {lo, hi};
this->discr_value = llvm::APInt(128, words);
|
Would it be possible to add a test for this? Potentially by checking in a obj2yaml'd binary created by Rust? |
|
I'm not super familiar with LLDB's testing infrastructure Coincidentally, i also figured out LLDB will straight up crash trying to handle 128-bit C-style enums e.g.: #[repr(u128]
enum LargeDiscr {
A = 0x15141312111009080706050403020100,
B = 1,
}I'll open an issue about it since it's a whole separate thing.
I saw the tests in FWIW it doesn't look like any other part of the Rust |
Tried hacking around to get a small binary: #![no_std]
#![no_main]
#[allow(unused)]
#[repr(u128)]
enum BigDiscr {
Value(u64),
None = 0x16151413121110090807060504030201,
}
fn use_it(_: &BigDiscr) {}
#[panic_handler]
fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}
#[no_mangle]
extern "C" fn main() {
let none = BigDiscr::None;
let some = BigDiscr::Value(31);
use_it(&none);
use_it(&some);
}Compiled with |
Ah yeah, the smallest I can get even with that is ~30k. I don't have ready access to Linux at the moment, and cross-compiling without In terms of testing logic, I straight up don't know enough about DWARF to test with the kind of elegance that the other tests have. It looks like they're running individual tags through specific functions? I can hack something together with That lack of DWARF knowledge also means I'm not sure what (if anything) i can manually rip out of the YAML and still have the test function. Would it be fine to just pop the whole file into the Also, something I noticed while fiddling with the test data, is there any reason why |
You don't have to write a unit test for this (I think). Either a shell test (https://github.com/llvm/llvm-project/tree/main/lldb/test/Shell/SymbolFile/DWARF) or API test (https://github.com/llvm/llvm-project/tree/main/lldb/test/API; no DWARF specific directory afaik) should suffice. Usually you can start with a rather large YAML file and remove things you don't need. There are already some shell tests that use yaml2obj. |
I might be dumb but i have even less idea of how i would implement that. I cannot currently generate a complete
That's sorta my point. I don't know what I do or don't need because i have an incredibly limited understanding of how ELF and DWARF work. |
llvm::APInt for VariantMember Discriminantsllvm::APInt for VariantMember Discriminants
resolves #177812
It currently uses a
uint32_teven though Rust can output 64- and 128-bit discriminants. This became a more obvious issue when Rust started niche-optimizing based on the capacity of strings (which areNoHighBitvalues, guaranteed to be less thanu64::MAX / 2), rather than theNonNullheap pointer. With this optimization, theNonevariant of strings is 9223372036854775808, which LLDB truncates to 0. This means whenever the capacity is 0 (e.g.Some(String::new()), LLDB will mistakenly read it as theNonevariant. Additionally, when trying to determine theNonevariant, lldb will read the discr from memory correctly (9223372036854775808), and the visualizer scripts will compare that to the value in the variant name ($variant$0), see that it doesn't match, and incorrectly decides that it must not be theNonevariant.This patch simply swaps the
uint32_twith anllvm::APInt.Sample program:
New output without visualizers:
(sample::BigDiscr) big_discr_none = { $variants$ = { $variant$0 = { $discr$ = 29352461300415899028694309177919734273 value = (__0 = 0) } $variant$29352461300415899028694309177919734273 = ($discr$ = 29352461300415899028694309177919734273, value = sample::BigDiscr::None:128 @ 0x000000710a7ff7d0) } } (sample::BigDiscr) big_disr_some = { $variants$ = { $variant$0 = { $discr$ = 0 value = (__0 = 31) } $variant$29352461300415899028694309177919734273 = ($discr$ = 0, value = sample::BigDiscr::None:128 @ 0x000000710a7ff7f0) } } (core::option::Option<alloc::string::String>) some_string = { $variants$ = { $variant$9223372036854775808 = ($discr$ = 4, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff750) $variant$ = { value = { __0 = { vec = {...} } } } } } (core::option::Option<alloc::string::String>) some_empty_string = { $variants$ = { $variant$9223372036854775808 = ($discr$ = 0, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff780) $variant$ = { value = { __0 = { vec = {...} } } } } } (core::option::Option<alloc::string::String>) none_string = { $variants$ = { $variant$9223372036854775808 = ($discr$ = 9223372036854775808, value = core::option::Option<alloc::string::String>::None<alloc::string::String>:64 @ 0x000000710a7ff7b8) $variant$ = { value = { __0 = { vec = {...} } } } } }New output with Rust's current visualizers: