Skip to content

Commit

Permalink
[llvm-gsymutil] Add option to load callsites from DWARF (#119913)
Browse files Browse the repository at this point in the history
This change adds support for loading gSYM callsite information from
DWARF. Previously the only support was for loading callsites info from
YAML.

For testing, we add a pass where `macho-gsym-merged-callsites-dsym`
loads callsite info from DWARF rather than YAML.
  • Loading branch information
alx32 authored Dec 17, 2024
1 parent 8bb1bdf commit 558de0e
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 3 deletions.
15 changes: 13 additions & 2 deletions llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,17 @@ class OutputAggregator;
/// allows this class to be unit tested.
class DwarfTransformer {
public:

/// Create a DWARF transformer.
///
/// \param D The DWARF to use when converting to GSYM.
///
/// \param G The GSYM creator to populate with the function information
/// from the debug info.
DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {}
///
/// \param LDCS Flag to indicate whether we should load the call site
/// information from DWARF `DW_TAG_call_site` entries
DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
: DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}

/// Extract the DWARF from the supplied object file and convert it into the
/// Gsym format in the GsymCreator object that is passed in. Returns an
Expand Down Expand Up @@ -83,8 +86,16 @@ class DwarfTransformer {
/// \param Die The DWARF debug info entry to parse.
void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die);

/// Parse call site information from DWARF
///
/// \param CUI The compile unit info for the current CU.
/// \param Die The DWARFDie for the function.
/// \param FI The FunctionInfo for the function being populated.
void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);

DWARFContext &DICtx;
GsymCreator &Gsym;
bool LoadDwarfCallSites;

friend class DwarfTransformerTest;
};
Expand Down
62 changes: 62 additions & 0 deletions llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
FI.Inline = std::nullopt;
}
}

// If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
if (LoadDwarfCallSites)
parseCallSiteInfoFromDwarf(CUI, Die, FI);

Gsym.addFunctionInfo(std::move(FI));
}
} break;
Expand All @@ -553,6 +558,63 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
handleDie(Out, CUI, ChildDie);
}

void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
FunctionInfo &FI) {
// Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
// DWARF specification:
// - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
// - DW_AT_call_origin might point to a DIE of the function being called.
// For simplicity, we will just extract return_offset and possibly target name
// if available.

CallSiteInfoCollection CSIC;

for (DWARFDie Child : Die.children()) {
if (Child.getTag() != dwarf::DW_TAG_call_site)
continue;

CallSiteInfo CSI;
// DW_AT_call_return_pc: the return PC (address). We'll convert it to
// offset relative to FI's start.
auto ReturnPC =
dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
if (!ReturnPC || !FI.Range.contains(*ReturnPC))
continue;

CSI.ReturnOffset = *ReturnPC - FI.startAddress();

// Attempt to get function name from DW_AT_call_origin. If present, we can
// insert it as a match regex.
if (DWARFDie OriginDie =
Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {

// Include the full unmangled name if available, otherwise the short name.
if (const char *LinkName = OriginDie.getLinkageName()) {
uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
CSI.MatchRegex.push_back(LinkNameOff);
} else if (const char *ShortName = OriginDie.getShortName()) {
uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
CSI.MatchRegex.push_back(ShortNameOff);
}
}

// For now, we won't attempt to deduce InternalCall/ExternalCall flags
// from DWARF.
CSI.Flags = CallSiteInfo::Flags::None;

CSIC.CallSites.push_back(CSI);
}

if (!CSIC.CallSites.empty()) {
if (!FI.CallSites)
FI.CallSites = CallSiteInfoCollection();
// Append parsed DWARF callsites:
FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
CSIC.CallSites.begin(),
CSIC.CallSites.end());
}
}

Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
size_t NumBefore = Gsym.getNumFunctionInfos();
auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
# RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM

# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym

# Dump the GSYM file and check the output for callsite information
# RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s

# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
# CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]:
Expand Down
1 change: 1 addition & 0 deletions llvm/tools/llvm-gsymutil/Opts.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ defm convert :
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
def merged_functions :
FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
defm callsites_yaml_file :
Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
defm arch :
Expand Down
5 changes: 4 additions & 1 deletion llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ static bool Quiet;
static std::vector<uint64_t> LookupAddresses;
static bool LookupAddressesFromStdin;
static bool StoreMergedFunctionInfo = false;
static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;

static void parseArgs(int argc, char **argv) {
Expand Down Expand Up @@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) {
std::exit(1);
}
}

LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
}

/// @}
Expand Down Expand Up @@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,

// Make a DWARF transformer object and populate the ranges of the code
// so we don't end up adding invalid functions to GSYM data.
DwarfTransformer DT(*DICtx, Gsym);
DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
if (!TextRanges.empty())
Gsym.SetValidTextRanges(TextRanges);

Expand Down

0 comments on commit 558de0e

Please sign in to comment.