-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[lldb] Fix block address resolution for functions in multiple sections #137955
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Continuing the theme from llvm#116777 and llvm#124931, this patch ensures we compute the correct address when a functions is spread across multiple sections. Due to this, it's not sufficient to adjust the offset in the section+offset pair (Address::Slide). We must actually slide the file offset and then recompute the section using the result.
@llvm/pr-subscribers-lldb Author: Pavel Labath (labath) ChangesContinuing the theme from #116777 and #124931, this patch ensures we compute the correct address when a functions is spread across multiple sections. Due to this, it's not sufficient to adjust the offset in the section+offset pair (Address::Slide). We must actually slide the file offset and then recompute the section using the result. Full diff: https://github.com/llvm/llvm-project/pull/137955.diff 2 Files Affected:
diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp
index 9d01293ea64e0..b630b148a90c9 100644
--- a/lldb/source/Symbol/Block.cpp
+++ b/lldb/source/Symbol/Block.cpp
@@ -283,39 +283,42 @@ uint32_t Block::GetRangeIndexContainingAddress(const Address &addr) {
return m_ranges.FindEntryIndexThatContains(file_addr - func_file_addr);
}
+static AddressRange ToAddressRange(const Address &func_addr,
+ const Block::Range &range) {
+ assert(func_addr.GetModule());
+ return AddressRange(func_addr.GetFileAddress() + range.base, range.size,
+ func_addr.GetModule()->GetSectionList());
+}
+
bool Block::GetRangeAtIndex(uint32_t range_idx, AddressRange &range) {
if (range_idx >= m_ranges.GetSize())
return false;
- Function &function = GetFunction();
- const Range &vm_range = m_ranges.GetEntryRef(range_idx);
- range.GetBaseAddress() = function.GetAddress();
- range.GetBaseAddress().Slide(vm_range.GetRangeBase());
- range.SetByteSize(vm_range.GetByteSize());
+ Address addr = GetFunction().GetAddress();
+ if (!addr.GetModule())
+ return false;
+
+ range = ToAddressRange(addr, m_ranges.GetEntryRef(range_idx));
return true;
}
AddressRanges Block::GetRanges() {
+ Address addr = GetFunction().GetAddress();
+ if (!addr.GetModule())
+ return {};
+
AddressRanges ranges;
- Function &function = GetFunction();
- for (size_t i = 0, e = m_ranges.GetSize(); i < e; ++i) {
- ranges.emplace_back();
- auto &range = ranges.back();
- const Range &vm_range = m_ranges.GetEntryRef(i);
- range.GetBaseAddress() = function.GetAddress();
- range.GetBaseAddress().Slide(vm_range.GetRangeBase());
- range.SetByteSize(vm_range.GetByteSize());
- }
+ for (size_t i = 0, e = m_ranges.GetSize(); i < e; ++i)
+ ranges.push_back(ToAddressRange(addr, m_ranges.GetEntryRef(i)));
return ranges;
}
bool Block::GetStartAddress(Address &addr) {
- if (m_ranges.IsEmpty())
+ Address func_addr = GetFunction().GetAddress();
+ if (!func_addr.GetModule() || m_ranges.IsEmpty())
return false;
- Function &function = GetFunction();
- addr = function.GetAddress();
- addr.Slide(m_ranges.GetEntryRef(0).GetRangeBase());
+ addr = ToAddressRange(func_addr, m_ranges.GetEntryRef(0)).GetBaseAddress();
return true;
}
diff --git a/lldb/test/Shell/Commands/command-disassemble-sections.s b/lldb/test/Shell/Commands/command-disassemble-sections.s
new file mode 100644
index 0000000000000..d7ade39241b22
--- /dev/null
+++ b/lldb/test/Shell/Commands/command-disassemble-sections.s
@@ -0,0 +1,102 @@
+# REQUIRES: x86, lld
+
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux %t/file.s -o %t/file.o
+# RUN: ld.lld %t/file.o -o %t/file.out -T %t/file.lds
+# RUN: %lldb %t/file.out -o "disassemble --name func1" -o exit | FileCheck %s
+
+# CHECK: (lldb) disassemble --name func1
+# CHECK: file.out`func1:
+# CHECK-NEXT: file.out[0x0] <+0>: int $0x2a
+# CHECK: file.out`func1:
+# CHECK-NEXT: file.out[0x1000] <+4096>: int $0x2f
+
+
+#--- file.lds
+PHDRS {
+ text1 PT_LOAD;
+ text2 PT_LOAD;
+}
+SECTIONS {
+ . = 0;
+ .text.part1 : { *(.text.part1) } :text1
+ .text.part2 : { *(.text.part2) } :text2
+}
+
+#--- file.s
+ .section .text.part1,"ax",@progbits
+ .p2align 12
+func1:
+ int $42
+.Lfunc1_end:
+
+ .section .text.part2,"ax",@progbits
+ .p2align 12
+func1.__part.1:
+ int $47
+.Lfunc1.__part.1_end:
+
+
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 8 # DW_FORM_string
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 85 # DW_AT_ranges
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 85 # DW_AT_ranges
+ .byte 23 # DW_FORM_sec_offset
+ .byte 3 # DW_AT_name
+ .byte 8 # DW_FORM_string
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev DW_TAG_compile_unit
+ .asciz "Hand-written DWARF" # DW_AT_producer
+ .short 29 # DW_AT_language
+ .quad 0 # DW_AT_low_pc
+ .long .Ldebug_ranges0 # DW_AT_ranges
+ .byte 2 # Abbrev DW_TAG_subprogram
+ .long .Ldebug_ranges0 # DW_AT_ranges
+ .asciz "func1" # DW_AT_name
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+
+ .section .debug_rnglists,"",@progbits
+ .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length
+.Ldebug_list_header_start0:
+ .short 5 # Version
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+ .long 1 # Offset entry count
+.Lrnglists_table_base0:
+ .long .Ldebug_ranges0-.Lrnglists_table_base0
+.Ldebug_ranges0:
+ .byte 6 # DW_RLE_start_end
+ .quad func1
+ .quad .Lfunc1_end
+ .byte 6 # DW_RLE_start_end
+ .quad func1.__part.1
+ .quad .Lfunc1.__part.1_end
+ .byte 0 # DW_RLE_end_of_list
+.Ldebug_list_header_end0:
|
lldb/source/Symbol/Block.cpp
Outdated
static AddressRange ToAddressRange(const Address &func_addr, | ||
const Block::Range &range) { | ||
assert(func_addr.GetModule()); | ||
return AddressRange(func_addr.GetFileAddress() + range.base, range.size, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what exactly is func_addr.GetFileAddress()
going to return? The entry point of the function, as an offset to the address of the file it's contained in, or the lowest address contained within the function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A "file address" is an established (if somewhat confusing) concept. It roughly means "address, as specified in the (object) file". In elf terms, it's the "virtual address". After the file is loaded to memory, it gets a "load address" (by adding a constant to the file address).
func_addr.GetFileAddress()
returns the (file) address of the function entry point, though that's not completely relevant in this case. The point is that addresses in the block are stored as offsets from some address (which happens to be the function entry point, but in theory we could have pick something else as well). This undoes the transformation, which confusingly doesn't happen inside this class, but (e.g.) in SymbolFileDWARF::ParseBlocksRecursive.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So a block range is [offset, offset + size)
, where offset is relative to the parent function's entry point.
You want to make AddressRange which is a proper virtual address (minus the generally applied load offset). So you make it [function entry point + offset, function entry point + offset + size)
by adding the function entry point to the base of the range.
If any of that is a correct interpretation, consider adding a comment to explain that in the code:
// The block's base address is a relative offset to its parent function's entry point. We want to produce a virtual address
// so we need to add the value of that entry point here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is. I've added it to the header to make it more visible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm#137955) Continuing the theme from llvm#116777 and llvm#124931, this patch ensures we compute the correct address when a functions is spread across multiple sections. Due to this, it's not sufficient to adjust the offset in the section+offset pair (Address::Slide). We must actually slide the file offset and then recompute the section using the result. I found this out due to a failure to disassemble some parts of the function, so I'm testing with that, although it's likely there are other things that were broken due to this.
Continuing the theme from #116777 and #124931, this patch ensures we compute the correct address when a functions is spread across multiple sections. Due to this, it's not sufficient to adjust the offset in the section+offset pair (Address::Slide). We must actually slide the file offset and then recompute the section using the result.
I found this out due to a failure to disassemble some parts of the function, so I'm testing with that, although it's likely there are other things that were broken due to this.