@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
840840 continue ;
841841
842842 if (cantFail (Symbol.getType ()) == SymbolRef::ST_File) {
843+ FileSymbols.emplace_back (Symbol);
843844 StringRef Name =
844845 cantFail (std::move (NameOrError), " cannot get symbol name for file" );
845846 // Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
13401341 }
13411342
13421343 registerFragments ();
1344+ FileSymbols.clear ();
13431345}
13441346
13451347Error RewriteInstance::discoverRtFiniAddress () {
@@ -1417,50 +1419,116 @@ void RewriteInstance::registerFragments() {
14171419 if (!BC->HasSplitFunctions )
14181420 return ;
14191421
1422+ // Process fragments with ambiguous parents separately as they are typically a
1423+ // vanishing minority of cases and require expensive symbol table lookups.
1424+ std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
14201425 for (auto &BFI : BC->getBinaryFunctions ()) {
14211426 BinaryFunction &Function = BFI.second ;
14221427 if (!Function.isFragment ())
14231428 continue ;
1424- unsigned ParentsFound = 0 ;
14251429 for (StringRef Name : Function.getNames ()) {
1426- StringRef BaseName, Suffix ;
1427- std::tie (BaseName, Suffix) = Name. split ( ' / ' ) ;
1430+ StringRef BaseName = NR. restore (Name) ;
1431+ const bool IsGlobal = BaseName == Name;
14281432 const size_t ColdSuffixPos = BaseName.find (" .cold" );
14291433 if (ColdSuffixPos == StringRef::npos)
14301434 continue ;
1431- // For cold function with local (foo.cold/1) symbol, prefer a parent with
1432- // local symbol as well (foo/1) over global symbol (foo).
1433- std::string ParentName = BaseName.substr (0 , ColdSuffixPos).str ();
1435+ StringRef ParentName = BaseName.substr (0 , ColdSuffixPos);
14341436 const BinaryData *BD = BC->getBinaryDataByName (ParentName);
1435- if (Suffix != " " ) {
1436- ParentName.append (Twine (" /" , Suffix).str ());
1437- const BinaryData *BDLocal = BC->getBinaryDataByName (ParentName);
1438- if (BDLocal || !BD)
1439- BD = BDLocal;
1440- }
1441- if (!BD) {
1442- if (opts::Verbosity >= 1 )
1443- BC->outs () << " BOLT-INFO: parent function not found for " << Name
1444- << " \n " ;
1437+ const uint64_t NumPossibleLocalParents =
1438+ NR.getUniquifiedNameCount (ParentName);
1439+ // The most common case: single local parent fragment.
1440+ if (!BD && NumPossibleLocalParents == 1 ) {
1441+ BD = BC->getBinaryDataByName (NR.getUniqueName (ParentName, 1 ));
1442+ } else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
1443+ // Global parent and either no local candidates (second most common), or
1444+ // the fragment is global as well (uncommon).
1445+ } else {
1446+ // Any other case: need to disambiguate using FILE symbols.
1447+ AmbiguousFragments.emplace_back (ParentName, &Function);
14451448 continue ;
14461449 }
1447- const uint64_t Address = BD->getAddress ();
1448- BinaryFunction *BF = BC->getBinaryFunctionAtAddress (Address);
1449- if (!BF) {
1450- if (opts::Verbosity >= 1 )
1451- BC->outs () << formatv (
1452- " BOLT-INFO: parent function not found at {0:x}\n " , Address);
1453- continue ;
1450+ if (BD) {
1451+ BinaryFunction *BF = BC->getFunctionForSymbol (BD->getSymbol ());
1452+ if (BF) {
1453+ BC->registerFragment (Function, *BF);
1454+ continue ;
1455+ }
14541456 }
1455- BC->registerFragment (Function, *BF);
1456- ++ParentsFound;
1457- }
1458- if (!ParentsFound) {
14591457 BC->errs () << " BOLT-ERROR: parent function not found for " << Function
14601458 << ' \n ' ;
14611459 exit (1 );
14621460 }
14631461 }
1462+
1463+ if (AmbiguousFragments.empty ())
1464+ return ;
1465+
1466+ if (!BC->hasSymbolsWithFileName ()) {
1467+ BC->errs () << " BOLT-ERROR: input file has split functions but does not "
1468+ " have FILE symbols. If the binary was stripped, preserve "
1469+ " FILE symbols with --keep-file-symbols strip option" ;
1470+ exit (1 );
1471+ }
1472+
1473+ // The first global symbol is identified by the symbol table sh_info value.
1474+ // Used as local symbol search stopping point.
1475+ auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
1476+ const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile ();
1477+ auto *SymTab = llvm::find_if (cantFail (Obj.sections ()), [](const auto &Sec) {
1478+ return Sec.sh_type == ELF::SHT_SYMTAB;
1479+ });
1480+ assert (SymTab);
1481+ // Symtab sh_info contains the value one greater than the symbol table index
1482+ // of the last local symbol.
1483+ ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef (SymTab, SymTab->sh_info );
1484+
1485+ for (auto &[ParentName, BF] : AmbiguousFragments) {
1486+ const uint64_t Address = BF->getAddress ();
1487+
1488+ // Get fragment's own symbol
1489+ const auto SymIt = FileSymRefs.find (Address);
1490+ if (SymIt == FileSymRefs.end ()) {
1491+ BC->errs ()
1492+ << " BOLT-ERROR: symbol lookup failed for function at address 0x"
1493+ << Twine::utohexstr (Address) << ' \n ' ;
1494+ exit (1 );
1495+ }
1496+
1497+ // Find containing FILE symbol
1498+ ELFSymbolRef Symbol = SymIt->second ;
1499+ auto FSI = llvm::upper_bound (FileSymbols, Symbol);
1500+ if (FSI == FileSymbols.begin ()) {
1501+ BC->errs () << " BOLT-ERROR: owning FILE symbol not found for symbol "
1502+ << cantFail (Symbol.getName ()) << ' \n ' ;
1503+ exit (1 );
1504+ }
1505+
1506+ ELFSymbolRef StopSymbol = LocalSymEnd;
1507+ if (FSI != FileSymbols.end ())
1508+ StopSymbol = *FSI;
1509+
1510+ uint64_t ParentAddress{0 };
1511+ // Iterate over local file symbols and check symbol names to match parent.
1512+ for (ELFSymbolRef Symbol (FSI[-1 ]); Symbol < StopSymbol; Symbol.moveNext ()) {
1513+ if (cantFail (Symbol.getName ()) == ParentName) {
1514+ ParentAddress = cantFail (Symbol.getAddress ());
1515+ break ;
1516+ }
1517+ }
1518+
1519+ // No local parent is found, use global parent function.
1520+ if (!ParentAddress)
1521+ if (BinaryData *ParentBD = BC->getBinaryDataByName (ParentName))
1522+ ParentAddress = ParentBD->getAddress ();
1523+
1524+ if (BinaryFunction *ParentBF =
1525+ BC->getBinaryFunctionAtAddress (ParentAddress)) {
1526+ BC->registerFragment (*BF, *ParentBF);
1527+ continue ;
1528+ }
1529+ BC->errs () << " BOLT-ERROR: parent function not found for " << *BF << ' \n ' ;
1530+ exit (1 );
1531+ }
14641532}
14651533
14661534void RewriteInstance::createPLTBinaryFunction (uint64_t TargetAddress,
0 commit comments