macho: refactor undef symbol handling

Now, we don't erroneously write to the string table on every
write of global and undef symbols.
This commit is contained in:
Jakub Konka 2021-01-17 17:20:55 +01:00
parent 3562edf137
commit 8118336585
3 changed files with 80 additions and 108 deletions

View File

@ -1920,21 +1920,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
} else if (func_value.castTag(.extern_fn)) |func_payload| {
const decl = func_payload.data;
// We don't free the decl_name immediately unless it already exists.
// If it doesn't, it will get autofreed when we clean up the extern symbol table.
const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name});
defer self.bin_file.allocator.free(decl_name);
const already_defined = macho_file.extern_lazy_symbols.contains(decl_name);
const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index| blk: {
self.bin_file.allocator.free(decl_name);
break :blk @intCast(u32, index);
} else blk: {
const index = @intCast(u32, macho_file.extern_lazy_symbols.items().len);
try macho_file.extern_lazy_symbols.putNoClobber(self.bin_file.allocator, decl_name, .{
.name = decl_name,
.dylib_ordinal = 1, // TODO this is now hardcoded, since we only support libSystem.
});
break :blk index;
};
const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index|
@intCast(u32, index)
else
try macho_file.addExternSymbol(decl_name);
const start = self.code.items.len;
const len: usize = blk: {
switch (arch) {

View File

@ -1011,11 +1011,11 @@ pub fn deinit(self: *MachO) void {
ds.deinit(self.base.allocator);
}
for (self.extern_lazy_symbols.items()) |*entry| {
entry.value.deinit(self.base.allocator);
self.base.allocator.free(entry.key);
}
self.extern_lazy_symbols.deinit(self.base.allocator);
for (self.extern_nonlazy_symbols.items()) |*entry| {
entry.value.deinit(self.base.allocator);
self.base.allocator.free(entry.key);
}
self.extern_nonlazy_symbols.deinit(self.base.allocator);
self.pie_fixups.deinit(self.base.allocator);
@ -2042,9 +2042,16 @@ pub fn populateMissingMetadata(self: *MachO) !void {
}
if (!self.extern_nonlazy_symbols.contains("dyld_stub_binder")) {
const index = @intCast(u32, self.extern_nonlazy_symbols.items().len);
const name = try std.fmt.allocPrint(self.base.allocator, "dyld_stub_binder", .{});
const name = try self.base.allocator.dupe(u8, "dyld_stub_binder");
const offset = try self.makeString("dyld_stub_binder");
try self.extern_nonlazy_symbols.putNoClobber(self.base.allocator, name, .{
.name = name,
.inner = .{
.n_strx = offset,
.n_type = std.macho.N_UNDF | std.macho.N_EXT,
.n_sect = 0,
.n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
.n_value = 0,
},
.dylib_ordinal = 1, // TODO this is currently hardcoded.
.segment = self.data_const_segment_cmd_index.?,
.offset = index * @sizeOf(u64),
@ -2222,15 +2229,15 @@ pub fn makeStaticString(comptime bytes: []const u8) [16]u8 {
return buf;
}
pub fn makeString(self: *MachO, bytes: []const u8) !u32 {
fn makeString(self: *MachO, bytes: []const u8) !u32 {
try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1);
const result = @intCast(u32, self.string_table.items.len);
const offset = @intCast(u32, self.string_table.items.len);
self.string_table.appendSliceAssumeCapacity(bytes);
self.string_table.appendAssumeCapacity(0);
self.string_table_dirty = true;
if (self.d_sym) |*ds|
ds.string_table_dirty = true;
return result;
return offset;
}
fn getString(self: *MachO, str_off: u32) []const u8 {
@ -2246,6 +2253,23 @@ fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 {
return self.makeString(new_name);
}
pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
const index = @intCast(u32, self.extern_lazy_symbols.items().len);
const offset = try self.makeString(name);
const sym_name = try self.base.allocator.dupe(u8, name);
try self.extern_lazy_symbols.putNoClobber(self.base.allocator, sym_name, .{
.inner = .{
.n_strx = offset,
.n_type = macho.N_UNDF | macho.N_EXT,
.n_sect = 0,
.n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER,
.n_value = 0,
},
.dylib_ordinal = 1, // TODO this is now hardcoded, since we only support libSystem.
});
return index;
}
const NextSegmentAddressAndOffset = struct {
address: u64,
offset: u64,
@ -2585,24 +2609,10 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
defer undefs.deinit();
try undefs.ensureCapacity(nundefs);
for (self.extern_lazy_symbols.items()) |entry| {
const name = try self.makeString(entry.key);
undefs.appendAssumeCapacity(.{
.n_strx = name,
.n_type = std.macho.N_UNDF | std.macho.N_EXT,
.n_sect = 0,
.n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
.n_value = 0,
});
undefs.appendAssumeCapacity(entry.value.inner);
}
for (self.extern_nonlazy_symbols.items()) |entry| {
const name = try self.makeString(entry.key);
undefs.appendAssumeCapacity(.{
.n_strx = name,
.n_type = std.macho.N_UNDF | std.macho.N_EXT,
.n_sect = 0,
.n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
.n_value = 0,
});
undefs.appendAssumeCapacity(entry.value.inner);
}
const locals_off = symtab.symoff;
@ -2781,19 +2791,12 @@ fn writeRebaseInfoTable(self: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len);
defer self.base.allocator.free(symbols);
for (self.extern_lazy_symbols.items()) |*entry, i| {
symbols[i] = &entry.value;
}
const size = try rebaseInfoSize(symbols);
const size = try rebaseInfoSize(self.extern_lazy_symbols.items());
var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try writeRebaseInfo(symbols, stream.writer());
try writeRebaseInfo(self.extern_lazy_symbols.items(), stream.writer());
const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
@ -2820,19 +2823,12 @@ fn writeBindingInfoTable(self: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_nonlazy_symbols.items().len);
defer self.base.allocator.free(symbols);
for (self.extern_nonlazy_symbols.items()) |*entry, i| {
symbols[i] = &entry.value;
}
const size = try bindInfoSize(symbols);
const size = try bindInfoSize(self.extern_nonlazy_symbols.items());
var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try writeBindInfo(symbols, stream.writer());
try writeBindInfo(self.extern_nonlazy_symbols.items(), stream.writer());
const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
@ -2856,19 +2852,12 @@ fn writeBindingInfoTable(self: *MachO) !void {
fn writeLazyBindingInfoTable(self: *MachO) !void {
if (!self.lazy_binding_info_dirty) return;
var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len);
defer self.base.allocator.free(symbols);
for (self.extern_lazy_symbols.items()) |*entry, i| {
symbols[i] = &entry.value;
}
const size = try lazyBindInfoSize(symbols);
const size = try lazyBindInfoSize(self.extern_lazy_symbols.items());
var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
defer self.base.allocator.free(buffer);
var stream = std.io.fixedBufferStream(buffer);
try writeLazyBindInfo(symbols, stream.writer());
try writeLazyBindInfo(self.extern_lazy_symbols.items(), stream.writer());
const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;

View File

@ -7,12 +7,8 @@ const assert = std.debug.assert;
const Allocator = mem.Allocator;
pub const ExternSymbol = struct {
/// Symbol name.
/// We own the memory, therefore we'll need to free it by calling `deinit`.
/// In self-hosted, we don't expect it to be null ever.
/// However, this is for backwards compatibility with LLD when
/// we'll be patching things up post mortem.
name: ?[]u8 = null,
/// MachO symbol table entry.
inner: macho.nlist_64,
/// Id of the dynamic library where the specified entries can be found.
/// Id of 0 means self.
@ -26,22 +22,16 @@ pub const ExternSymbol = struct {
/// Offset relative to the start address of the `segment`.
offset: u32 = 0,
pub fn deinit(self: *ExternSymbol, allocator: *Allocator) void {
if (self.name) |name| {
allocator.free(name);
}
}
};
pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 {
pub fn rebaseInfoSize(symbols: anytype) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
for (symbols) |entry| {
size += 2;
try leb.writeILEB128(writer, symbol.offset);
try leb.writeILEB128(writer, entry.value.offset);
size += 1;
}
@ -49,8 +39,9 @@ pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 {
return size;
}
pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
pub fn writeRebaseInfo(symbols: anytype, writer: anytype) !void {
for (symbols) |entry| {
const symbol = entry.value;
try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
@ -59,23 +50,23 @@ pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
try writer.writeByte(macho.REBASE_OPCODE_DONE);
}
pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 {
pub fn bindInfoSize(symbols: anytype) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
for (symbols) |entry| {
const symbol = entry.value;
size += 1;
if (symbol.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
}
size += 1;
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 1;
size += entry.key.len;
size += 1;
size += 1;
try leb.writeILEB128(writer, symbol.offset);
@ -86,8 +77,10 @@ pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 {
return size;
}
pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
pub fn writeBindInfo(symbols: anytype, writer: anytype) !void {
for (symbols) |entry| {
const symbol = entry.value;
if (symbol.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
@ -98,11 +91,9 @@ pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
}
try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(entry.key);
try writer.writeByte(0);
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
@ -111,23 +102,24 @@ pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
}
}
pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 {
pub fn lazyBindInfoSize(symbols: anytype) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
for (symbols) |entry| {
const symbol = entry.value;
size += 1;
try leb.writeILEB128(writer, symbol.offset);
size += 1;
if (symbol.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
}
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 1;
size += entry.key.len;
size += 1;
size += 2;
}
@ -135,8 +127,9 @@ pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 {
return size;
}
pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
pub fn writeLazyBindInfo(symbols: anytype, writer: anytype) !void {
for (symbols) |entry| {
const symbol = entry.value;
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
@ -149,11 +142,9 @@ pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
}
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(entry.key);
try writer.writeByte(0);
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
try writer.writeByte(macho.BIND_OPCODE_DONE);