zig/tools/gen_outline_atomics.zig
Andrew Kelley 58430ae6d1 outline atomics: ret instead of jump to ret
After this, the machine code generated by zig matches identically to
gcc's after the differences in loading the have_lse flag.
2022-11-21 17:17:02 -07:00

339 lines
9.3 KiB
Zig

const std = @import("std");
const Allocator = std.mem.Allocator;
const AtomicOp = enum {
cas,
swp,
ldadd,
ldclr,
ldeor,
ldset,
};
pub fn main() !void {
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena_instance.deinit();
const arena = arena_instance.allocator();
//const args = try std.process.argsAlloc(arena);
var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
const w = bw.writer();
try w.writeAll(
\\//! This file is generated by tools/gen_outline_atomics.zig.
\\const builtin = @import("builtin");
\\const std = @import("std");
\\const linkage = @import("./common.zig").linkage;
\\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
\\
\\/// This default is overridden at runtime after inspecting CPU properties.
\\/// It is intentionally not exported in order to make the machine code that
\\/// uses it a statically predicted direct branch rather than using the PLT,
\\/// which ARM is concerned would have too much overhead.
\\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
\\
\\
);
var footer = std.ArrayList(u8).init(arena);
try footer.appendSlice("\ncomptime {\n");
for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
if (n == .sixteen and op != .cas) continue;
const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
@tagName(op), n.toBytes(), @tagName(order),
});
try writeFunction(arena, w, name, op, n, order);
try footer.writer().print(" @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{
name, name,
});
}
}
}
try w.writeAll(footer.items);
try w.writeAll("}\n");
try bw.flush();
}
fn writeFunction(
arena: Allocator,
w: anytype,
name: []const u8,
op: AtomicOp,
n: N,
order: Ordering,
) !void {
const body = switch (op) {
.cas => try generateCas(arena, n, order),
.swp => try generateSwp(arena, n, order),
.ldadd => try generateLd(arena, n, order, .ldadd),
.ldclr => try generateLd(arena, n, order, .ldclr),
.ldeor => try generateLd(arena, n, order, .ldeor),
.ldset => try generateLd(arena, n, order, .ldset),
};
const fn_sig = try std.fmt.allocPrint(
arena,
"fn {[name]s}() align(16) callconv(.Naked) void {{",
.{ .name = name },
);
try w.writeAll(fn_sig);
try w.writeAll(
\\
\\ @setRuntimeSafety(false);
\\ asm volatile (
\\
);
var iter = std.mem.split(u8, body, "\n");
while (iter.next()) |line| {
try w.writeAll(" \\\\");
try w.writeAll(line);
try w.writeAll("\n");
}
try w.writeAll(
\\ :
\\ : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
\\ : "w15", "w16", "w17", "memory"
\\ );
\\ unreachable;
\\}
\\
);
}
const N = enum(u8) {
one = 1,
two = 2,
four = 4,
eight = 8,
sixteen = 16,
const Defines = struct {
s: []const u8,
uxt: []const u8,
b: []const u8,
};
fn defines(n: N) Defines {
const s = switch (n) {
.one => "b",
.two => "h",
else => "",
};
const uxt = switch (n) {
.one => "uxtb",
.two => "uxth",
.four, .eight, .sixteen => "mov",
};
const b = switch (n) {
.one => "0x00000000",
.two => "0x40000000",
.four => "0x80000000",
.eight => "0xc0000000",
else => "0x00000000",
};
return Defines{
.s = s,
.uxt = uxt,
.b = b,
};
}
fn register(n: N) []const u8 {
return if (@enumToInt(n) < 8) "w" else "x";
}
fn toBytes(n: N) u8 {
return @enumToInt(n);
}
fn toBits(n: N) u8 {
return n.toBytes() * 8;
}
};
const Ordering = enum {
relax,
acq,
rel,
acq_rel,
const Defines = struct {
suff: []const u8,
a: []const u8,
l: []const u8,
m: []const u8,
n: []const u8,
};
fn defines(self: @This()) Defines {
const suff = switch (self) {
.relax => "_relax",
.acq => "_acq",
.rel => "_rel",
.acq_rel => "_acq_rel",
};
const a = switch (self) {
.relax => "",
.acq => "a",
.rel => "",
.acq_rel => "a",
};
const l = switch (self) {
.relax => "",
.acq => "",
.rel => "l",
.acq_rel => "l",
};
const m = switch (self) {
.relax => "0x000000",
.acq => "0x400000",
.rel => "0x008000",
.acq_rel => "0x408000",
};
const n = switch (self) {
.relax => "0x000000",
.acq => "0x800000",
.rel => "0x400000",
.acq_rel => "0xc00000",
};
return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n };
}
};
const LdName = enum { ldadd, ldclr, ldeor, ldset };
fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
const s_def = n.defines();
const o_def = order.defines();
const reg = n.register();
if (@enumToInt(n) < 16) {
const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m });
const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
return try std.fmt.allocPrint(arena,
\\ cbz w16, 8f
\\ {[cas]s}
\\ ret
\\8:
\\ {[uxt]s} {[reg]s}16, {[reg]s}0
\\0:
\\ {[ldxr]s} {[reg]s}0, [x2]
\\ cmp {[reg]s}0, {[reg]s}16
\\ bne 1f
\\ {[stxr]s} w17, {[reg]s}1, [x2]
\\ cbnz w17, 0b
\\1:
\\ ret
, .{
.cas = cas,
.uxt = s_def.uxt,
.ldxr = ldxr,
.stxr = stxr,
.reg = reg,
});
} else {
const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m});
const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
return try std.fmt.allocPrint(arena,
\\ cbz w16, 8f
\\ {[casp]s}
\\ ret
\\8:
\\ mov x16, x0
\\ mov x17, x1
\\0:
\\ {[ldxp]s} x0, x1, [x4]
\\ cmp x0, x16
\\ ccmp x1, x17, #0, eq
\\ bne 1f
\\ {[stxp]s} w15, x2, x3, [x4]
\\ cbnz w15, 0b
\\1:
\\ ret
, .{
.casp = casp,
.ldxp = ldxp,
.stxp = stxp,
});
}
}
fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
const s_def = n.defines();
const o_def = order.defines();
const reg = n.register();
return try std.fmt.allocPrint(arena,
\\ cbz w16, 8f
\\ .inst 0x38208020 + {[b]s} + {[n]s}
\\ ret
\\8:
\\ mov {[reg]s}16, {[reg]s}0
\\0:
\\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
\\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1]
\\ cbnz w17, 0b
\\1:
\\ ret
, .{
.b = s_def.b,
.n = o_def.n,
.reg = reg,
.s = s_def.s,
.a = o_def.a,
.l = o_def.l,
});
}
fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
const s_def = n.defines();
const o_def = order.defines();
const op = switch (ld) {
.ldadd => "add",
.ldclr => "bic",
.ldeor => "eor",
.ldset => "orr",
};
const op_n = switch (ld) {
.ldadd => "0x0000",
.ldclr => "0x1000",
.ldeor => "0x2000",
.ldset => "0x3000",
};
const reg = n.register();
return try std.fmt.allocPrint(arena,
\\ cbz w16, 8f
\\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
\\ ret
\\8:
\\ mov {[reg]s}16, {[reg]s}0
\\0:
\\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
\\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16
\\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1]
\\ cbnz w15, 0b
\\1:
\\ ret
, .{
.op_n = op_n,
.b = s_def.b,
.n = o_def.n,
.s = s_def.s,
.a = o_def.a,
.l = o_def.l,
.op = op,
.reg = reg,
});
}