macho: parse data-in-code when writing LINKEDIT segment

This commit is contained in:
Jakub Konka 2022-07-18 12:03:06 +02:00
parent 2c184f9a5f
commit a089a6dc4f
3 changed files with 56 additions and 85 deletions

View File

@ -187,7 +187,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{},
load_commands_dirty: bool = false,
sections_order_dirty: bool = false,
has_dices: bool = false,
/// A helper var to indicate if we are at the start of the incremental updates, or
/// already somewhere further along the update-and-run chain.
@ -6139,55 +6138,74 @@ fn writeFunctionStarts(self: *MachO) !void {
self.load_commands_dirty = true;
}
fn writeDices(self: *MachO) !void {
if (!self.has_dices) return;
fn filterDataInCode(
dices: []const macho.data_in_code_entry,
start_addr: u64,
end_addr: u64,
) []const macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= self.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
fn writeDataInCode(self: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
var buf = std.ArrayList(u8).init(self.base.allocator);
defer buf.deinit();
var atom: *Atom = self.atoms.get(.{
.seg = self.text_segment_cmd_index orelse return,
.sect = self.text_section_index orelse return,
}) orelse return;
while (atom.prev) |prev| {
atom = prev;
}
var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator);
defer out_dice.deinit();
const text_sect = self.getSection(.{
.seg = self.text_segment_cmd_index.?,
.sect = self.text_section_index.?,
.seg = self.text_segment_cmd_index orelse return,
.sect = self.text_section_index orelse return,
});
while (true) {
if (atom.dices.items.len > 0) {
const sym = atom.getSymbol(self);
const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow;
for (self.objects.items) |object| {
const dice = object.parseDataInCode() orelse continue;
const source_symtab = object.getSourceSymtab();
try out_dice.ensureUnusedCapacity(dice.len);
try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry));
for (atom.dices.items) |dice| {
const rebased_dice = macho.data_in_code_entry{
.offset = base_off + dice.offset,
.length = dice.length,
.kind = dice.kind,
};
buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice));
for (object.managed_atoms.items) |atom| {
const sym = atom.getSymbol(self);
if (sym.n_desc == N_DESC_GCED) continue;
if (atom.sym_index >= source_symtab.len) continue; // synthetic, linker generated
const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) {
continue;
}
const source_sym = source_symtab[atom.sym_index];
const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow;
const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size);
const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse
return error.Overflow;
for (filtered_dice) |single| {
const offset = single.offset - source_addr + base;
out_dice.appendAssumeCapacity(.{
.offset = offset,
.length = single.length,
.kind = single.kind,
});
}
}
if (atom.next) |next| {
atom = next;
} else break;
}
const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment;
const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data;
const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64));
const datasize = buf.items.len;
const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry);
dice_cmd.dataoff = @intCast(u32, dataoff);
dice_cmd.datasize = @intCast(u32, datasize);
seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff;
@ -6197,7 +6215,7 @@ fn writeDices(self: *MachO) !void {
dice_cmd.dataoff + dice_cmd.datasize,
});
try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff);
try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff);
self.load_commands_dirty = true;
}
@ -6392,7 +6410,7 @@ fn writeLinkeditSegment(self: *MachO) !void {
try self.writeDyldInfoData();
try self.writeFunctionStarts();
try self.writeDices();
try self.writeDataInCode();
try self.writeSymtab();
try self.writeStrtab();

View File

@ -59,9 +59,6 @@ bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of lazy bindings (cf bindings above).
lazy_bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of data-in-code entries. This is currently specific to x86_64 only.
dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
/// Points to the previous and next neighbours
next: ?*Atom,
prev: ?*Atom,
@ -147,7 +144,6 @@ pub const empty = Atom{
};
pub fn deinit(self: *Atom, allocator: Allocator) void {
self.dices.deinit(allocator);
self.lazy_bindings.deinit(allocator);
self.bindings.deinit(allocator);
self.rebases.deinit(allocator);
@ -157,7 +153,6 @@ pub fn deinit(self: *Atom, allocator: Allocator) void {
}
pub fn clearRetainingCapacity(self: *Atom) void {
self.dices.clearRetainingCapacity();
self.lazy_bindings.clearRetainingCapacity();
self.bindings.clearRetainingCapacity();
self.rebases.clearRetainingCapacity();

View File

@ -177,7 +177,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void {
}
try self.parseSymtab(allocator);
self.parseDataInCode();
}
const Context = struct {
@ -264,25 +263,6 @@ fn filterRelocs(
return relocs[start..end];
}
fn filterDice(
dices: []const macho.data_in_code_entry,
start_addr: u64,
end_addr: u64,
) []const macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= self.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
/// Splits object into atoms assuming one-shot linking mode.
pub fn splitIntoAtomsOneShot(
self: *Object,
@ -378,15 +358,6 @@ pub fn splitIntoAtomsOneShot(
context,
);
macho_file.has_dices = macho_file.has_dices or blk: {
if (self.text_section_index) |index| {
if (index != id) break :blk false;
if (self.data_in_code_entries.len == 0) break :blk false;
break :blk true;
}
break :blk false;
};
if (subsections_via_symbols and filtered_syms.len > 0) {
// If the first nlist does not match the start of the section,
// then we need to encapsulate the memory range [section start, first symbol)
@ -574,19 +545,6 @@ fn createAtomFromSubsection(
.base_offset = @intCast(i32, base_offset),
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size);
try atom.dices.ensureTotalCapacity(gpa, dices.len);
for (dices) |dice| {
atom.dices.appendAssumeCapacity(.{
.offset = dice.offset - (math.cast(u32, sym.n_value) orelse return error.Overflow),
.length = dice.length,
.kind = dice.kind,
});
}
}
// Since this is atom gets a helper local temporary symbol that didn't exist
// in the object file which encompasses the entire section, we need traverse
// the filtered symbols and note which symbol is contained within so that
@ -651,11 +609,11 @@ pub fn getSourceSymtab(self: Object) []const macho.nlist_64 {
);
}
fn parseDataInCode(self: *Object) void {
const index = self.data_in_code_cmd_index orelse return;
pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry {
const index = self.data_in_code_cmd_index orelse return null;
const data_in_code = self.load_commands.items[index].linkedit_data;
const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize];
self.data_in_code_entries = mem.bytesAsSlice(
return mem.bytesAsSlice(
macho.data_in_code_entry,
@alignCast(@alignOf(macho.data_in_code_entry), raw_dice),
);