From a089a6dc4ff04a10360019185ecaacd0564eb84c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 18 Jul 2022 12:03:06 +0200 Subject: [PATCH] macho: parse data-in-code when writing LINKEDIT segment --- src/link/MachO.zig | 88 +++++++++++++++++++++++---------------- src/link/MachO/Atom.zig | 5 --- src/link/MachO/Object.zig | 48 ++------------------- 3 files changed, 56 insertions(+), 85 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1fb0981dc..165ff0752 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -187,7 +187,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, -has_dices: bool = false, /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. @@ -6139,55 +6138,74 @@ fn writeFunctionStarts(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeDices(self: *MachO) !void { - if (!self.has_dices) return; +fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} + +fn writeDataInCode(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var buf = std.ArrayList(u8).init(self.base.allocator); - defer buf.deinit(); - - var atom: *Atom = self.atoms.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (atom.prev) |prev| { - atom = prev; - } + var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); + defer out_dice.deinit(); const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, }); - while (true) { - if (atom.dices.items.len > 0) { - const sym = atom.getSymbol(self); - const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow; + for (self.objects.items) |object| { + const dice = object.parseDataInCode() orelse continue; + const source_symtab = object.getSourceSymtab(); + try out_dice.ensureUnusedCapacity(dice.len); - try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (atom.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + for (object.managed_atoms.items) |atom| { + const sym = atom.getSymbol(self); + if (sym.n_desc == N_DESC_GCED) continue; + if (atom.sym_index >= source_symtab.len) continue; // synthetic, linker generated + + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + continue; + } + + const source_sym = source_symtab[atom.sym_index]; + const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = single.offset - source_addr + base; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); } } - - if (atom.next) |next| { - atom = next; - } else break; } const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buf.items.len; + const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); dice_cmd.dataoff = @intCast(u32, dataoff); dice_cmd.datasize = @intCast(u32, datasize); seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; @@ -6197,7 +6215,7 @@ fn writeDices(self: *MachO) !void { dice_cmd.dataoff + dice_cmd.datasize, }); - try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); self.load_commands_dirty = true; } @@ -6392,7 +6410,7 @@ fn writeLinkeditSegment(self: *MachO) !void { try self.writeDyldInfoData(); try self.writeFunctionStarts(); - try self.writeDices(); + try self.writeDataInCode(); try self.writeSymtab(); try self.writeStrtab(); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 7aa4e1093..2db680889 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -59,9 +59,6 @@ bindings: std.ArrayListUnmanaged(Binding) = .{}, /// List of lazy bindings (cf bindings above). lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, -/// List of data-in-code entries. This is currently specific to x86_64 only. -dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - /// Points to the previous and next neighbours next: ?*Atom, prev: ?*Atom, @@ -147,7 +144,6 @@ pub const empty = Atom{ }; pub fn deinit(self: *Atom, allocator: Allocator) void { - self.dices.deinit(allocator); self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); @@ -157,7 +153,6 @@ pub fn deinit(self: *Atom, allocator: Allocator) void { } pub fn clearRetainingCapacity(self: *Atom) void { - self.dices.clearRetainingCapacity(); self.lazy_bindings.clearRetainingCapacity(); self.bindings.clearRetainingCapacity(); self.rebases.clearRetainingCapacity(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2901b5408..07237d31a 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -177,7 +177,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { } try self.parseSymtab(allocator); - self.parseDataInCode(); } const Context = struct { @@ -264,25 +263,6 @@ fn filterRelocs( return relocs[start..end]; } -fn filterDice( - dices: []const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, -) []const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; - } - }; - - const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); - - return dices[start..end]; -} - /// Splits object into atoms assuming one-shot linking mode. pub fn splitIntoAtomsOneShot( self: *Object, @@ -378,15 +358,6 @@ pub fn splitIntoAtomsOneShot( context, ); - macho_file.has_dices = macho_file.has_dices or blk: { - if (self.text_section_index) |index| { - if (index != id) break :blk false; - if (self.data_in_code_entries.len == 0) break :blk false; - break :blk true; - } - break :blk false; - }; - if (subsections_via_symbols and filtered_syms.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) @@ -574,19 +545,6 @@ fn createAtomFromSubsection( .base_offset = @intCast(i32, base_offset), }); - if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size); - try atom.dices.ensureTotalCapacity(gpa, dices.len); - - for (dices) |dice| { - atom.dices.appendAssumeCapacity(.{ - .offset = dice.offset - (math.cast(u32, sym.n_value) orelse return error.Overflow), - .length = dice.length, - .kind = dice.kind, - }); - } - } - // Since this is atom gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that @@ -651,11 +609,11 @@ pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { ); } -fn parseDataInCode(self: *Object) void { - const index = self.data_in_code_cmd_index orelse return; +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + const index = self.data_in_code_cmd_index orelse return null; const data_in_code = self.load_commands.items[index].linkedit_data; const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - self.data_in_code_entries = mem.bytesAsSlice( + return mem.bytesAsSlice( macho.data_in_code_entry, @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), );