Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 393eac1

Browse files
clayborgtstellar
authored andcommitted
Add hashing of the .text section to ProcessMinidump.
Breakpad will always have a UUID for binaries when it creates minidump files. If an ELF files has a GNU build ID, it will use that. If it doesn't, it will create one by hashing up to the first 4096 bytes of the .text section. LLDB was not able to load these binaries even when we had the right binary because the UUID didn't match. LLDB will use the GNU build ID first as the main UUID for a binary and fallback onto a 8 byte CRC if a binary doesn't have one. With this fix, we will check for the Breakpad hash or the Facebook hash (a modified version of the breakpad hash that collides a bit less) and accept binaries when these hashes match. Differential Revision: https://reviews.llvm.org/D86261 (cherry picked from commit 0e6c9a6)
1 parent 98fa273 commit 393eac1

File tree

6 files changed

+218
-0
lines changed

6 files changed

+218
-0
lines changed

lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp

+89
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,72 @@ class PlaceholderObjectFile : public ObjectFile {
121121
lldb::addr_t m_base;
122122
lldb::addr_t m_size;
123123
};
124+
125+
/// Duplicate the HashElfTextSection() from the breakpad sources.
126+
///
127+
/// Breakpad, a Google crash log reporting tool suite, creates minidump files
128+
/// for many different architectures. When using Breakpad to create ELF
129+
/// minidumps, it will check for a GNU build ID when creating a minidump file
130+
/// and if one doesn't exist in the file, it will say the UUID of the file is a
131+
/// checksum of up to the first 4096 bytes of the .text section. Facebook also
132+
/// uses breakpad and modified this hash to avoid collisions so we can
133+
/// calculate and check for this as well.
134+
///
135+
/// The breakpad code might end up hashing up to 15 bytes that immediately
136+
/// follow the .text section in the file, so this code must do exactly what it
137+
/// does so we can get an exact match for the UUID.
138+
///
139+
/// \param[in] module_sp The module to grab the .text section from.
140+
///
141+
/// \param[in/out] breakpad_uuid A vector that will receive the calculated
142+
/// breakpad .text hash.
143+
///
144+
/// \param[in/out] facebook_uuid A vector that will receive the calculated
145+
/// facebook .text hash.
146+
///
147+
void HashElfTextSection(ModuleSP module_sp, std::vector<uint8_t> &breakpad_uuid,
148+
std::vector<uint8_t> &facebook_uuid) {
149+
SectionList *sect_list = module_sp->GetSectionList();
150+
if (sect_list == nullptr)
151+
return;
152+
SectionSP sect_sp = sect_list->FindSectionByName(ConstString(".text"));
153+
if (!sect_sp)
154+
return;
155+
constexpr size_t kMDGUIDSize = 16;
156+
constexpr size_t kBreakpadPageSize = 4096;
157+
// The breakpad code has a bug where it might access beyond the end of a
158+
// .text section by up to 15 bytes, so we must ensure we round up to the
159+
// next kMDGUIDSize byte boundary.
160+
DataExtractor data;
161+
const size_t text_size = sect_sp->GetFileSize();
162+
const size_t read_size = std::min<size_t>(
163+
llvm::alignTo(text_size, kMDGUIDSize), kBreakpadPageSize);
164+
sect_sp->GetObjectFile()->GetData(sect_sp->GetFileOffset(), read_size, data);
165+
166+
breakpad_uuid.assign(kMDGUIDSize, 0);
167+
facebook_uuid.assign(kMDGUIDSize, 0);
168+
169+
// The only difference between the breakpad hash and the facebook hash is the
170+
// hashing of the text section size into the hash prior to hashing the .text
171+
// contents.
172+
for (size_t i = 0; i < kMDGUIDSize; i++)
173+
facebook_uuid[i] ^= text_size % 255;
174+
175+
// This code carefully duplicates how the hash was created in Breakpad
176+
// sources, including the error where it might has an extra 15 bytes past the
177+
// end of the .text section if the .text section is less than a page size in
178+
// length.
179+
const uint8_t *ptr = data.GetDataStart();
180+
const uint8_t *ptr_end = data.GetDataEnd();
181+
while (ptr < ptr_end) {
182+
for (unsigned i = 0; i < kMDGUIDSize; i++) {
183+
breakpad_uuid[i] ^= ptr[i];
184+
facebook_uuid[i] ^= ptr[i];
185+
}
186+
ptr += kMDGUIDSize;
187+
}
188+
}
189+
124190
} // namespace
125191

126192
ConstString ProcessMinidump::GetPluginNameStatic() {
@@ -494,10 +560,33 @@ void ProcessMinidump::ReadModuleList() {
494560
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
495561
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
496562
if (!match) {
563+
// Breakpad generates minindump files, and if there is no GNU build
564+
// ID in the binary, it will calculate a UUID by hashing first 4096
565+
// bytes of the .text section and using that as the UUID for a module
566+
// in the minidump. Facebook uses a modified breakpad client that
567+
// uses a slightly modified this hash to avoid collisions. Check for
568+
// UUIDs from the minindump that match these cases and accept the
569+
// module we find if they do match.
570+
std::vector<uint8_t> breakpad_uuid;
571+
std::vector<uint8_t> facebook_uuid;
572+
HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
573+
if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
574+
LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
575+
} else if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
576+
LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
577+
} else {
578+
// The UUID wasn't a partial match and didn't match the .text hash
579+
// so remove the module from the target, we will need to create a
580+
// placeholder object file.
497581
GetTarget().GetImages().Remove(module_sp);
498582
module_sp.reset();
583+
}
584+
} else {
585+
LLDB_LOG(log, "Partial uuid match for {0}.", name);
499586
}
500587
}
588+
} else {
589+
LLDB_LOG(log, "Full uuid match for {0}.", name);
501590
}
502591
if (module_sp) {
503592
// Watch out for place holder modules that have different paths, but the

lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py

+63
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,69 @@ def test_partial_uuid_mismatch(self):
179179
"/invalid/path/on/current/system/libuuidmismatch.so",
180180
"7295E17C-6668-9E05-CBB5-DEE5003865D5")
181181

182+
def test_breakpad_hash_match(self):
183+
"""
184+
Breakpad creates minidump files using CvRecord in each module whose
185+
signature is set to PDB70 where the UUID is a hash generated by
186+
breakpad of the .text section. This is only done when the
187+
executable has no ELF build ID.
188+
189+
This test verifies that if we have a minidump with a 16 byte UUID,
190+
that we are able to associate a symbol file with no ELF build ID
191+
and match it up by hashing the .text section.
192+
"""
193+
so_path = self.getBuildArtifact("libbreakpad.so")
194+
self.yaml2obj("libbreakpad.yaml", so_path)
195+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
196+
self.dbg.HandleCommand(cmd)
197+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
198+
self.assertEqual(1, len(modules))
199+
# LLDB makes up it own UUID as well when there is no build ID so we
200+
# will check that this matches.
201+
self.verify_module(modules[0], so_path, "D9C480E8")
202+
203+
def test_breakpad_overflow_hash_match(self):
204+
"""
205+
This is a similar to test_breakpad_hash_match, but it verifies that
206+
if the .text section does not end on a 16 byte boundary, then it
207+
will overflow into the next section's data by up to 15 bytes. This
208+
verifies that we are able to match what breakpad does as it will do
209+
this.
210+
"""
211+
so_path = self.getBuildArtifact("libbreakpad.so")
212+
self.yaml2obj("libbreakpad-overflow.yaml", so_path)
213+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
214+
self.dbg.HandleCommand(cmd)
215+
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
216+
self.assertEqual(1, len(modules))
217+
# LLDB makes up it own UUID as well when there is no build ID so we
218+
# will check that this matches.
219+
self.verify_module(modules[0], so_path, "48EB9FD7")
220+
221+
222+
def test_facebook_hash_match(self):
223+
"""
224+
Breakpad creates minidump files using CvRecord in each module whose
225+
signature is set to PDB70 where the UUID is a hash generated by
226+
breakpad of the .text section and Facebook modified this hash to
227+
avoid collisions. This is only done when the executable has no ELF
228+
build ID.
229+
230+
This test verifies that if we have a minidump with a 16 byte UUID,
231+
that we are able to associate a symbol file with no ELF build ID
232+
and match it up by hashing the .text section like Facebook does.
233+
"""
234+
so_path = self.getBuildArtifact("libbreakpad.so")
235+
self.yaml2obj("libbreakpad.yaml", so_path)
236+
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
237+
self.dbg.HandleCommand(cmd)
238+
modules = self.get_minidump_modules("linux-arm-facebook-uuid-match.yaml")
239+
self.assertEqual(1, len(modules))
240+
# LLDB makes up it own UUID as well when there is no build ID so we
241+
# will check that this matches.
242+
self.verify_module(modules[0], so_path, "D9C480E8")
243+
244+
182245
def test_relative_module_name(self):
183246
old_cwd = os.getcwd()
184247
self.addTearDownHook(lambda: os.chdir(old_cwd))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
--- !ELF
2+
FileHeader:
3+
Class: ELFCLASS32
4+
Data: ELFDATA2LSB
5+
Type: ET_DYN
6+
Machine: EM_ARM
7+
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
8+
Sections:
9+
Sections:
10+
- Name: .text
11+
Type: SHT_PROGBITS
12+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
13+
Address: 0x0000000000010000
14+
AddressAlign: 0x0000000000000001
15+
Content: 04
16+
- Name: .data
17+
Type: SHT_PROGBITS
18+
Flags: [ SHF_ALLOC, SHF_WRITE ]
19+
Address: 0x0000000000010001
20+
AddressAlign: 0x0000000000000001
21+
Content: 0000001400000003000000474E5500
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--- !ELF
2+
FileHeader:
3+
Class: ELFCLASS32
4+
Data: ELFDATA2LSB
5+
Type: ET_DYN
6+
Machine: EM_ARM
7+
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
8+
Sections:
9+
Sections:
10+
- Name: .text
11+
Type: SHT_PROGBITS
12+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
13+
Address: 0x0000000000010000
14+
AddressAlign: 0x0000000000000004
15+
Content: 040000001400000003000000474E5500
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--- !minidump
2+
Streams:
3+
- Type: SystemInfo
4+
Processor Arch: ARM
5+
Platform ID: Linux
6+
CSD Version: '15E216'
7+
CPU:
8+
CPUID: 0x00000000
9+
- Type: ModuleList
10+
Modules:
11+
- Base of Image: 0x0000000000001000
12+
Size of Image: 0x00001000
13+
Module Name: '/invalid/path/on/current/system/libbreakpad.so'
14+
CodeView Record: 52534453040000001400000003000000474e55000000000000
15+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--- !minidump
2+
Streams:
3+
- Type: SystemInfo
4+
Processor Arch: ARM
5+
Platform ID: Linux
6+
CSD Version: '15E216'
7+
CPU:
8+
CPUID: 0x00000000
9+
- Type: ModuleList
10+
Modules:
11+
- Base of Image: 0x0000000000001000
12+
Size of Image: 0x00001000
13+
Module Name: '/invalid/path/on/current/system/libbreakpad.so'
14+
CodeView Record: 52534453141010100410101013101010575e45100000000000
15+
...

0 commit comments

Comments
 (0)