macho: sort subsection symbols by seniority

This commit is contained in:
Jakub Konka 2022-07-21 09:39:12 +02:00
parent 39df241df4
commit 7345976261
2 changed files with 133 additions and 90 deletions

View File

@ -2940,12 +2940,6 @@ fn createTentativeDefAtoms(self: *MachO) !void {
if (global.file) |file| {
const object = &self.objects.items[file];
try atom.contained.append(gpa, .{
.sym_index = global.sym_index,
.offset = 0,
});
try object.managed_atoms.append(gpa, atom);
try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom);
} else {
@ -5594,6 +5588,7 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
const global = atom.getSymbolWithLoc();
const sym = atom.getSymbolPtr(self);
const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
if (sym.n_desc == N_DESC_GCED) continue;
if (!sym.ext()) {
@ -5605,15 +5600,6 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
} else continue;
}
loop = true;
const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
// TODO don't dedup eh_frame info yet until we actually implement parsing unwind records
if (match.eql(.{
.seg = self.text_segment_cmd_index,
.sect = self.eh_frame_section_index,
})) continue;
self.logAtom(atom);
sym.n_desc = N_DESC_GCED;
self.removeAtomFromSection(atom, match);
@ -5644,6 +5630,8 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(self);
tlv_ptr_sym.n_desc = N_DESC_GCED;
}
loop = true;
}
}
}
@ -6831,7 +6819,6 @@ pub fn generateSymbolStabs(
};
const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name);
const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir);
const source_symtab = object.getSourceSymtab();
// Open scope
try locals.ensureUnusedCapacity(3);
@ -6857,71 +6844,27 @@ pub fn generateSymbolStabs(
.n_value = object.mtime,
});
var stabs_buf: [4]macho.nlist_64 = undefined;
for (object.managed_atoms.items) |atom| {
const stabs = try self.generateSymbolStabsForSymbol(
atom.getSymbolWithLoc(),
debug_info,
&stabs_buf,
);
try locals.appendSlice(stabs);
for (atom.contained.items) |sym_at_off| {
const sym_loc = SymbolWithLoc{
.sym_index = sym_at_off.sym_index,
.file = atom.file,
};
const sym = self.getSymbol(sym_loc);
const sym_name = self.getSymbolName(sym_loc);
if (sym.n_strx == 0) continue;
if (sym.n_desc == N_DESC_GCED) continue;
if (self.symbolIsTemp(sym_loc)) continue;
if (sym_at_off.sym_index >= source_symtab.len) continue; // synthetic, linker generated
const source_sym = source_symtab[sym_at_off.sym_index];
const size: ?u64 = size: {
if (source_sym.tentative()) break :size null;
for (debug_info.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
break :size range.end - range.start;
}
}
}
break :size null;
};
if (size) |ss| {
try locals.ensureUnusedCapacity(4);
locals.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_BNSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
locals.appendAssumeCapacity(.{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
locals.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_FUN,
.n_sect = 0,
.n_desc = 0,
.n_value = ss,
});
locals.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_ENSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = ss,
});
} else {
try locals.append(.{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
}
const contained_stabs = try self.generateSymbolStabsForSymbol(
sym_loc,
debug_info,
&stabs_buf,
);
try locals.appendSlice(contained_stabs);
}
}
@ -6935,6 +6878,78 @@ pub fn generateSymbolStabs(
});
}
fn generateSymbolStabsForSymbol(
self: *MachO,
sym_loc: SymbolWithLoc,
debug_info: DebugInfo,
buf: *[4]macho.nlist_64,
) ![]const macho.nlist_64 {
const gpa = self.base.allocator;
const object = self.objects.items[sym_loc.file.?];
const source_symtab = object.getSourceSymtab();
const sym = self.getSymbol(sym_loc);
const sym_name = self.getSymbolName(sym_loc);
if (sym.n_strx == 0) return buf[0..0];
if (sym.n_desc == N_DESC_GCED) return buf[0..0];
if (self.symbolIsTemp(sym_loc)) return buf[0..0];
if (sym_loc.sym_index >= source_symtab.len) return buf[0..0]; // synthetic, linker generated
const source_sym = source_symtab[sym_loc.sym_index];
const size: ?u64 = size: {
if (source_sym.tentative()) break :size null;
for (debug_info.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
break :size range.end - range.start;
}
}
}
break :size null;
};
if (size) |ss| {
buf[0] = .{
.n_strx = 0,
.n_type = macho.N_BNSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[1] = .{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[2] = .{
.n_strx = 0,
.n_type = macho.N_FUN,
.n_sect = 0,
.n_desc = 0,
.n_value = ss,
};
buf[3] = .{
.n_strx = 0,
.n_type = macho.N_ENSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = ss,
};
return buf;
} else {
buf[0] = .{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
return buf[0..1];
}
}
fn snapshotState(self: *MachO) !void {
const emit = self.base.options.emit orelse {
log.debug("no emit directory found; skipping snapshot...", .{});

View File

@ -197,11 +197,9 @@ const SymbolAtIndex = struct {
return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0);
}
/// Returns whether lhs is less than rhs by allocated address in object file.
/// Undefined symbols are pushed to the back (always evaluate to true).
fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
// We sort by type: defined < undefined, and
// afterwards by address in each group. Normally, dysymtab should
// be enough to guarantee the sort, but turns out not every compiler
// is kind enough to specify the symbols in the correct order.
const lhs = lhs_index.getSymbol(ctx);
const rhs = rhs_index.getSymbol(ctx);
if (lhs.sect()) {
@ -215,6 +213,29 @@ const SymbolAtIndex = struct {
return false;
}
}
/// Returns whether lhs is less senior than rhs. The rules are:
/// 1. ext
/// 2. weak
/// 3. local
/// 4. temp (local starting with `l` prefix).
fn lessThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
const lhs = lhs_index.getSymbol(ctx);
const rhs = rhs_index.getSymbol(ctx);
if (!rhs.ext()) {
const lhs_name = lhs_index.getSymbolName(ctx);
return mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L");
} else if (rhs.pext() or rhs.weakDef()) {
return !lhs.ext();
} else {
return false;
}
}
/// Like lessThanBySeniority but negated.
fn greaterThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
return !lessThanBySeniority(ctx, lhs_index, rhs_index);
}
};
fn filterSymbolsByAddress(
@ -295,6 +316,10 @@ pub fn splitIntoAtomsOneShot(
sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) });
}
// We sort by type: defined < undefined, and
// afterwards by address in each group. Normally, dysymtab should
// be enough to guarantee the sort, but turns out not every compiler
// is kind enough to specify the symbols in the correct order.
sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan);
// Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
@ -409,10 +434,18 @@ pub fn splitIntoAtomsOneShot(
);
next_sym_count += atom_syms.len;
// We want to bubble up the first externally defined symbol here.
assert(atom_syms.len > 0);
const sym_index = for (atom_syms) |atom_sym| {
if (atom_sym.getSymbol(context).ext()) break atom_sym.index;
} else atom_syms[0].index;
var sorted_atom_syms = std.ArrayList(SymbolAtIndex).init(gpa);
defer sorted_atom_syms.deinit();
try sorted_atom_syms.appendSlice(atom_syms);
sort.sort(
SymbolAtIndex,
sorted_atom_syms.items,
context,
SymbolAtIndex.greaterThanBySeniority,
);
const atom_size = blk: {
const end_addr = if (next_sym_count < filtered_syms.len)
filtered_syms[next_sym_count].getSymbol(context).n_value
@ -432,12 +465,12 @@ pub fn splitIntoAtomsOneShot(
const atom = try self.createAtomFromSubsection(
macho_file,
object_id,
sym_index,
sorted_atom_syms.items[0].index,
atom_size,
atom_align,
atom_code,
relocs,
atom_syms[1..],
sorted_atom_syms.items[1..],
match,
sect,
gc_roots,
@ -552,12 +585,7 @@ fn createAtomFromSubsection(
// the filtered symbols and note which symbol is contained within so that
// we can properly allocate addresses down the line.
// While we're at it, we need to update segment,section mapping of each symbol too.
try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1);
atom.contained.appendAssumeCapacity(.{
.sym_index = sym_index,
.offset = 0,
});
try atom.contained.ensureTotalCapacity(gpa, indexes.len);
for (indexes) |inner_sym_index| {
const inner_sym = &self.symtab.items[inner_sym_index.index];
inner_sym.n_sect = macho_file.getSectionOrdinal(match);