zig/deps/aro/build/GenerateDef.zig
Andrew Kelley 105db13536 std.Build: implement --host-target, --host-cpu, --host-dynamic-linker
This also makes a long-overdue change of extracting common state from
Build into a shared Graph object.

Getting the semantics right for these flags turned out to be quite
tricky. In the end it works like this:
* The override only happens when the target is fully native, with no
  additional query parameters, such as versions or CPU features added.
* The override affects the resolved Target but leaves the original Query
  unmodified.
* The "is native?" detection logic operates on the original, unmodified
  query. This makes it possible to provide invalid host target
  information, causing confusing errors to occur. Don't do that.

There are some minor breaking changes to std.Build API such as the fact
that `b.zig_exe` is now moved to `b.graph.zig_exe`, as well as a handful
of other similar flags.
2024-02-02 20:43:01 -07:00

684 lines
25 KiB
Zig
Vendored

const std = @import("std");
const Step = std.Build.Step;
const Allocator = std.mem.Allocator;
const GeneratedFile = std.Build.GeneratedFile;
const GenerateDef = @This();
step: Step,
path: []const u8,
name: []const u8,
kind: Options.Kind,
generated_file: GeneratedFile,
pub const base_id: Step.Id = .custom;
pub const Options = struct {
name: []const u8,
src_prefix: []const u8 = "src/aro",
kind: Kind = .dafsa,
pub const Kind = enum { dafsa, named };
};
pub fn create(owner: *std.Build, options: Options) std.Build.Module.Import {
const self = owner.allocator.create(GenerateDef) catch @panic("OOM");
const path = owner.pathJoin(&.{ options.src_prefix, options.name });
const name = owner.fmt("GenerateDef {s}", .{options.name});
self.* = .{
.step = Step.init(.{
.id = base_id,
.name = name,
.owner = owner,
.makeFn = make,
}),
.path = path,
.name = options.name,
.kind = options.kind,
.generated_file = .{ .step = &self.step },
};
const module = self.step.owner.createModule(.{
.root_source_file = .{ .generated = &self.generated_file },
});
return .{
.module = module,
.name = self.name,
};
}
fn make(step: *Step, prog_node: *std.Progress.Node) !void {
_ = prog_node;
const b = step.owner;
const self = @fieldParentPtr(GenerateDef, "step", step);
const arena = b.allocator;
var man = b.graph.cache.obtain();
defer man.deinit();
// Random bytes to make GenerateDef unique. Refresh this with new
// random bytes when GenerateDef implementation is modified in a
// non-backwards-compatible way.
man.hash.add(@as(u32, 0xDCC14144));
const contents = try b.build_root.handle.readFileAlloc(arena, self.path, std.math.maxInt(u32));
man.hash.addBytes(contents);
const out_name = b.fmt("{s}.zig", .{std.fs.path.stem(self.path)});
if (try step.cacheHit(&man)) {
const digest = man.final();
self.generated_file.path = try b.cache_root.join(arena, &.{
"o", &digest, out_name,
});
return;
}
const digest = man.final();
const sub_path = try std.fs.path.join(arena, &.{ "o", &digest, out_name });
const sub_path_dirname = std.fs.path.dirname(sub_path).?;
b.cache_root.handle.makePath(sub_path_dirname) catch |err| {
return step.fail("unable to make path '{}{s}': {s}", .{
b.cache_root, sub_path_dirname, @errorName(err),
});
};
const output = try self.generate(contents);
b.cache_root.handle.writeFile(sub_path, output) catch |err| {
return step.fail("unable to write file '{}{s}': {s}", .{
b.cache_root, sub_path, @errorName(err),
});
};
self.generated_file.path = try b.cache_root.join(arena, &.{sub_path});
try man.writeManifest();
}
const Value = struct {
name: []const u8,
properties: []const []const u8,
};
fn generate(self: *GenerateDef, input: []const u8) ![]const u8 {
const arena = self.step.owner.allocator;
var values = std.StringArrayHashMap([]const []const u8).init(arena);
defer values.deinit();
var properties = std.ArrayList([]const u8).init(arena);
defer properties.deinit();
var headers = std.ArrayList([]const u8).init(arena);
defer headers.deinit();
var value_name: ?[]const u8 = null;
var it = std.mem.tokenizeAny(u8, input, "\r\n");
while (it.next()) |line_untrimmed| {
const line = std.mem.trim(u8, line_untrimmed, " \t");
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.startsWith(u8, line, "const ") or std.mem.startsWith(u8, line, "pub const ")) {
try headers.append(line);
continue;
}
if (line[0] == '.') {
if (value_name == null) {
return self.step.fail("property not attached to a value:\n\"{s}\"", .{line});
}
try properties.append(line);
continue;
}
if (value_name) |name| {
const old = try values.fetchPut(name, try properties.toOwnedSlice());
if (old != null) return self.step.fail("duplicate value \"{s}\"", .{name});
}
value_name = line;
}
if (value_name) |name| {
const old = try values.fetchPut(name, try properties.toOwnedSlice());
if (old != null) return self.step.fail("duplicate value \"{s}\"", .{name});
}
{
const sorted_list = try arena.dupe([]const u8, values.keys());
defer arena.free(sorted_list);
std.mem.sort([]const u8, sorted_list, {}, struct {
pub fn lessThan(_: void, a: []const u8, b: []const u8) bool {
return std.mem.lessThan(u8, a, b);
}
}.lessThan);
var longest_name: usize = 0;
var shortest_name: usize = std.math.maxInt(usize);
var builder = try DafsaBuilder.init(arena);
defer builder.deinit();
for (sorted_list) |name| {
try builder.insert(name);
longest_name = @max(name.len, longest_name);
shortest_name = @min(name.len, shortest_name);
}
try builder.finish();
builder.calcNumbers();
// As a sanity check, confirm that the minimal perfect hashing doesn't
// have any collisions
{
var index_set = std.AutoHashMap(usize, void).init(arena);
defer index_set.deinit();
for (values.keys()) |name| {
const index = builder.getUniqueIndex(name).?;
const result = try index_set.getOrPut(index);
if (result.found_existing) {
return self.step.fail("clobbered {}, name={s}\n", .{ index, name });
}
}
}
var out_buf = std.ArrayList(u8).init(arena);
defer out_buf.deinit();
const writer = out_buf.writer();
try writer.print(
\\//! Autogenerated by GenerateDef from {s}, do not edit
\\
\\const std = @import("std");
\\
\\pub fn with(comptime Properties: type) type {{
\\return struct {{
\\
, .{self.path});
for (headers.items) |line| {
try writer.print("{s}\n", .{line});
}
if (self.kind == .named) {
try writer.writeAll("pub const Tag = enum {\n");
for (values.keys()) |property| {
try writer.print(" {s},\n", .{std.zig.fmtId(property)});
}
try writer.writeAll(
\\
\\ pub fn property(tag: Tag) Properties {
\\ return named_data[@intFromEnum(tag)];
\\ }
\\
\\ const named_data = [_]Properties{
\\
);
for (values.values()) |val_props| {
try writer.writeAll(" .{");
for (val_props, 0..) |val_prop, j| {
if (j != 0) try writer.writeByte(',');
try writer.writeByte(' ');
try writer.writeAll(val_prop);
}
try writer.writeAll(" },\n");
}
try writer.writeAll(
\\ };
\\};
\\};
\\}
\\
);
return out_buf.toOwnedSlice();
}
var values_array = try arena.alloc(Value, values.count());
defer arena.free(values_array);
for (values.keys(), values.values()) |name, props| {
const unique_index = builder.getUniqueIndex(name).?;
const data_index = unique_index - 1;
values_array[data_index] = .{ .name = name, .properties = props };
}
try writer.writeAll(
\\
\\tag: Tag,
\\properties: Properties,
\\
\\/// Integer starting at 0 derived from the unique index,
\\/// corresponds with the data array index.
\\pub const Tag = enum(u16) { _ };
\\
\\const Self = @This();
\\
\\pub fn fromName(name: []const u8) ?@This() {
\\ const data_index = tagFromName(name) orelse return null;
\\ return data[@intFromEnum(data_index)];
\\}
\\
\\pub fn tagFromName(name: []const u8) ?Tag {
\\ const unique_index = uniqueIndex(name) orelse return null;
\\ return @enumFromInt(unique_index - 1);
\\}
\\
\\pub fn fromTag(tag: Tag) @This() {
\\ return data[@intFromEnum(tag)];
\\}
\\
\\pub fn nameFromTagIntoBuf(tag: Tag, name_buf: []u8) []u8 {
\\ std.debug.assert(name_buf.len >= longest_name);
\\ const unique_index = @intFromEnum(tag) + 1;
\\ return nameFromUniqueIndex(unique_index, name_buf);
\\}
\\
\\pub fn nameFromTag(tag: Tag) NameBuf {
\\ var name_buf: NameBuf = undefined;
\\ const unique_index = @intFromEnum(tag) + 1;
\\ const name = nameFromUniqueIndex(unique_index, &name_buf.buf);
\\ name_buf.len = @intCast(name.len);
\\ return name_buf;
\\}
\\
\\pub const NameBuf = struct {
\\ buf: [longest_name]u8 = undefined,
\\ len: std.math.IntFittingRange(0, longest_name),
\\
\\ pub fn span(self: *const NameBuf) []const u8 {
\\ return self.buf[0..self.len];
\\ }
\\};
\\
\\pub fn exists(name: []const u8) bool {
\\ if (name.len < shortest_name or name.len > longest_name) return false;
\\
\\ var index: u16 = 0;
\\ for (name) |c| {
\\ index = findInList(dafsa[index].child_index, c) orelse return false;
\\ }
\\ return dafsa[index].end_of_word;
\\}
\\
\\
);
try writer.print("pub const shortest_name = {};\n", .{shortest_name});
try writer.print("pub const longest_name = {};\n\n", .{longest_name});
try writer.writeAll(
\\/// Search siblings of `first_child_index` for the `char`
\\/// If found, returns the index of the node within the `dafsa` array.
\\/// Otherwise, returns `null`.
\\pub fn findInList(first_child_index: u16, char: u8) ?u16 {
\\ var index = first_child_index;
\\ while (true) {
\\ if (dafsa[index].char == char) return index;
\\ if (dafsa[index].end_of_list) return null;
\\ index += 1;
\\ }
\\ unreachable;
\\}
\\
\\/// Returns a unique (minimal perfect hash) index (starting at 1) for the `name`,
\\/// or null if the name was not found.
\\pub fn uniqueIndex(name: []const u8) ?u16 {
\\ if (name.len < shortest_name or name.len > longest_name) return null;
\\
\\ var index: u16 = 0;
\\ var node_index: u16 = 0;
\\
\\ for (name) |c| {
\\ const child_index = findInList(dafsa[node_index].child_index, c) orelse return null;
\\ var sibling_index = dafsa[node_index].child_index;
\\ while (true) {
\\ const sibling_c = dafsa[sibling_index].char;
\\ std.debug.assert(sibling_c != 0);
\\ if (sibling_c < c) {
\\ index += dafsa[sibling_index].number;
\\ }
\\ if (dafsa[sibling_index].end_of_list) break;
\\ sibling_index += 1;
\\ }
\\ node_index = child_index;
\\ if (dafsa[node_index].end_of_word) index += 1;
\\ }
\\
\\ if (!dafsa[node_index].end_of_word) return null;
\\
\\ return index;
\\}
\\
\\/// Returns a slice of `buf` with the name associated with the given `index`.
\\/// This function should only be called with an `index` that
\\/// is already known to exist within the `dafsa`, e.g. an index
\\/// returned from `uniqueIndex`.
\\pub fn nameFromUniqueIndex(index: u16, buf: []u8) []u8 {
\\ std.debug.assert(index >= 1 and index <= data.len);
\\
\\ var node_index: u16 = 0;
\\ var count: u16 = index;
\\ var fbs = std.io.fixedBufferStream(buf);
\\ const w = fbs.writer();
\\
\\ while (true) {
\\ var sibling_index = dafsa[node_index].child_index;
\\ while (true) {
\\ if (dafsa[sibling_index].number > 0 and dafsa[sibling_index].number < count) {
\\ count -= dafsa[sibling_index].number;
\\ } else {
\\ w.writeByte(dafsa[sibling_index].char) catch unreachable;
\\ node_index = sibling_index;
\\ if (dafsa[node_index].end_of_word) {
\\ count -= 1;
\\ }
\\ break;
\\ }
\\
\\ if (dafsa[sibling_index].end_of_list) break;
\\ sibling_index += 1;
\\ }
\\ if (count == 0) break;
\\ }
\\
\\ return fbs.getWritten();
\\}
\\
\\
);
try writer.writeAll(
\\/// We're 1 bit shy of being able to fit this in a u32:
\\/// - char only contains 0-9, a-z, A-Z, and _, so it could use a enum(u6) with a way to convert <-> u8
\\/// (note: this would have a performance cost that may make the u32 not worth it)
\\/// - number has a max value of > 2047 and < 4095 (the first _ node has the largest number),
\\/// so it could fit into a u12
\\/// - child_index currently has a max of > 4095 and < 8191, so it could fit into a u13
\\///
\\/// with the end_of_word/end_of_list 2 bools, that makes 33 bits total
\\const Node = packed struct(u64) {
\\ char: u8,
\\ /// Nodes are numbered with "an integer which gives the number of words that
\\ /// would be accepted by the automaton starting from that state." This numbering
\\ /// allows calculating "a one-to-one correspondence between the integers 1 to L
\\ /// (L is the number of words accepted by the automaton) and the words themselves."
\\ ///
\\ /// Essentially, this allows us to have a minimal perfect hashing scheme such that
\\ /// it's possible to store & lookup the properties of each builtin using a separate array.
\\ number: u16,
\\ /// If true, this node is the end of a valid builtin.
\\ /// Note: This does not necessarily mean that this node does not have child nodes.
\\ end_of_word: bool,
\\ /// If true, this node is the end of a sibling list.
\\ /// If false, then (index + 1) will contain the next sibling.
\\ end_of_list: bool,
\\ /// Padding bits to get to u64, unsure if there's some way to use these to improve something.
\\ _extra: u22 = 0,
\\ /// Index of the first child of this node.
\\ child_index: u16,
\\};
\\
\\
);
try builder.writeDafsa(writer);
try writeData(writer, values_array);
try writer.writeAll(
\\};
\\}
\\
);
return out_buf.toOwnedSlice();
}
}
fn writeData(writer: anytype, values: []const Value) !void {
try writer.writeAll("pub const data = blk: {\n");
try writer.print(" @setEvalBranchQuota({});\n", .{values.len});
try writer.writeAll(" break :blk [_]@This(){\n");
for (values, 0..) |value, i| {
try writer.print(" // {s}\n", .{value.name});
try writer.print(" .{{ .tag = @enumFromInt({}), .properties = .{{", .{i});
for (value.properties, 0..) |property, j| {
if (j != 0) try writer.writeByte(',');
try writer.writeByte(' ');
try writer.writeAll(property);
}
if (value.properties.len != 0) try writer.writeByte(' ');
try writer.writeAll("} },\n");
}
try writer.writeAll(" };\n");
try writer.writeAll("};\n");
}
const DafsaBuilder = struct {
root: *Node,
arena: std.heap.ArenaAllocator.State,
allocator: Allocator,
unchecked_nodes: std.ArrayListUnmanaged(UncheckedNode),
minimized_nodes: std.HashMapUnmanaged(*Node, *Node, Node.DuplicateContext, std.hash_map.default_max_load_percentage),
previous_word_buf: [128]u8 = undefined,
previous_word: []u8 = &[_]u8{},
const UncheckedNode = struct {
parent: *Node,
char: u8,
child: *Node,
};
pub fn init(allocator: Allocator) !DafsaBuilder {
var arena = std.heap.ArenaAllocator.init(allocator);
errdefer arena.deinit();
const root = try arena.allocator().create(Node);
root.* = .{};
return DafsaBuilder{
.root = root,
.allocator = allocator,
.arena = arena.state,
.unchecked_nodes = .{},
.minimized_nodes = .{},
};
}
pub fn deinit(self: *DafsaBuilder) void {
self.arena.promote(self.allocator).deinit();
self.unchecked_nodes.deinit(self.allocator);
self.minimized_nodes.deinit(self.allocator);
self.* = undefined;
}
const Node = struct {
children: [256]?*Node = [_]?*Node{null} ** 256,
is_terminal: bool = false,
number: usize = 0,
const DuplicateContext = struct {
pub fn hash(ctx: @This(), key: *Node) u64 {
_ = ctx;
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHash(&hasher, key.children);
std.hash.autoHash(&hasher, key.is_terminal);
return hasher.final();
}
pub fn eql(ctx: @This(), a: *Node, b: *Node) bool {
_ = ctx;
return a.is_terminal == b.is_terminal and std.mem.eql(?*Node, &a.children, &b.children);
}
};
pub fn calcNumbers(self: *Node) void {
self.number = @intFromBool(self.is_terminal);
for (self.children) |maybe_child| {
const child = maybe_child orelse continue;
// A node's number is the sum of the
// numbers of its immediate child nodes.
child.calcNumbers();
self.number += child.number;
}
}
pub fn numDirectChildren(self: *const Node) u8 {
var num: u8 = 0;
for (self.children) |child| {
if (child != null) num += 1;
}
return num;
}
};
pub fn insert(self: *DafsaBuilder, str: []const u8) !void {
if (std.mem.order(u8, str, self.previous_word) == .lt) {
@panic("insertion order must be sorted");
}
var common_prefix_len: usize = 0;
for (0..@min(str.len, self.previous_word.len)) |i| {
if (str[i] != self.previous_word[i]) break;
common_prefix_len += 1;
}
try self.minimize(common_prefix_len);
var node = if (self.unchecked_nodes.items.len == 0)
self.root
else
self.unchecked_nodes.getLast().child;
for (str[common_prefix_len..]) |c| {
std.debug.assert(node.children[c] == null);
var arena = self.arena.promote(self.allocator);
const child = try arena.allocator().create(Node);
self.arena = arena.state;
child.* = .{};
node.children[c] = child;
try self.unchecked_nodes.append(self.allocator, .{
.parent = node,
.char = c,
.child = child,
});
node = node.children[c].?;
}
node.is_terminal = true;
self.previous_word = self.previous_word_buf[0..str.len];
@memcpy(self.previous_word, str);
}
pub fn minimize(self: *DafsaBuilder, down_to: usize) !void {
if (self.unchecked_nodes.items.len == 0) return;
while (self.unchecked_nodes.items.len > down_to) {
const unchecked_node = self.unchecked_nodes.pop();
if (self.minimized_nodes.getPtr(unchecked_node.child)) |child| {
unchecked_node.parent.children[unchecked_node.char] = child.*;
} else {
try self.minimized_nodes.put(self.allocator, unchecked_node.child, unchecked_node.child);
}
}
}
pub fn finish(self: *DafsaBuilder) !void {
try self.minimize(0);
}
fn nodeCount(self: *const DafsaBuilder) usize {
return self.minimized_nodes.count();
}
fn edgeCount(self: *const DafsaBuilder) usize {
var count: usize = 0;
var it = self.minimized_nodes.iterator();
while (it.next()) |entry| {
for (entry.key_ptr.*.children) |child| {
if (child != null) count += 1;
}
}
return count;
}
fn contains(self: *const DafsaBuilder, str: []const u8) bool {
var node = self.root;
for (str) |c| {
node = node.children[c] orelse return false;
}
return node.is_terminal;
}
fn calcNumbers(self: *const DafsaBuilder) void {
self.root.calcNumbers();
}
fn getUniqueIndex(self: *const DafsaBuilder, str: []const u8) ?usize {
var index: usize = 0;
var node = self.root;
for (str) |c| {
const child = node.children[c] orelse return null;
for (node.children, 0..) |sibling, sibling_c| {
if (sibling == null) continue;
if (sibling_c < c) {
index += sibling.?.number;
}
}
node = child;
if (node.is_terminal) index += 1;
}
return index;
}
fn writeDafsa(self: *const DafsaBuilder, writer: anytype) !void {
try writer.writeAll("const dafsa = [_]Node{\n");
// write root
try writer.writeAll(" .{ .char = 0, .end_of_word = false, .end_of_list = true, .number = 0, .child_index = 1 },\n");
var queue = std.ArrayList(*Node).init(self.allocator);
defer queue.deinit();
var child_indexes = std.AutoHashMap(*Node, usize).init(self.allocator);
defer child_indexes.deinit();
try child_indexes.ensureTotalCapacity(@intCast(self.edgeCount()));
var first_available_index: usize = self.root.numDirectChildren() + 1;
first_available_index = try writeDafsaChildren(self.root, writer, &queue, &child_indexes, first_available_index);
while (queue.items.len > 0) {
// TODO: something with better time complexity
const node = queue.orderedRemove(0);
first_available_index = try writeDafsaChildren(node, writer, &queue, &child_indexes, first_available_index);
}
try writer.writeAll("};\n");
}
fn writeDafsaChildren(
node: *Node,
writer: anytype,
queue: *std.ArrayList(*Node),
child_indexes: *std.AutoHashMap(*Node, usize),
first_available_index: usize,
) !usize {
var cur_available_index = first_available_index;
const num_children = node.numDirectChildren();
var child_i: usize = 0;
for (node.children, 0..) |maybe_child, c_usize| {
const child = maybe_child orelse continue;
const c: u8 = @intCast(c_usize);
const is_last_child = child_i == num_children - 1;
if (!child_indexes.contains(child)) {
const child_num_children = child.numDirectChildren();
if (child_num_children > 0) {
child_indexes.putAssumeCapacityNoClobber(child, cur_available_index);
cur_available_index += child_num_children;
}
try queue.append(child);
}
try writer.print(
" .{{ .char = '{c}', .end_of_word = {}, .end_of_list = {}, .number = {}, .child_index = {} }},\n",
.{ c, child.is_terminal, is_last_child, child.number, child_indexes.get(child) orelse 0 },
);
child_i += 1;
}
return cur_available_index;
}
};