zig/src-self-hosted/stage1.zig

525 lines
16 KiB
Zig
Raw Normal View History

stage1 is now a hybrid of C++ and Zig This modifies the build process of Zig to put all of the source files into libcompiler.a, except main.cpp and userland.cpp. Next, the build process links main.cpp, userland.cpp, and libcompiler.a into zig1. userland.cpp is a shim for functions that will later be replaced with self-hosted implementations. Next, the build process uses zig1 to build src-self-hosted/stage1.zig into libuserland.a, which does not depend on any of the things that are shimmed in userland.cpp, such as translate-c. Finally, the build process re-links main.cpp and libcompiler.a, except with libuserland.a instead of userland.cpp. Now the shims are replaced with .zig code. This provides all of the Zig standard library to the stage1 C++ compiler, and enables us to move certain things to userland, such as translate-c. As a proof of concept I have made the `zig zen` command use text defined in userland. I added `zig translate-c-2` which is a work-in-progress reimplementation of translate-c in userland, which currently calls `std.debug.panic("unimplemented")` and you can see the stack trace makes it all the way back into the C++ main() function (Thanks LemonBoy for improving that!). This could potentially let us move other things into userland, such as hashing algorithms, the entire cache system, .d file parsing, pretty much anything that libuserland.a itself doesn't need to depend on. This can also let us have `zig fmt` in stage1 without the overhead of child process execution, and without the initial compilation delay before it gets cached. See #1964
2019-04-17 04:47:47 +08:00
// This is Zig code that is used by both stage1 and stage2.
// The prototypes in src/userland.h must match these definitions.
const builtin = @import("builtin");
2019-05-27 01:17:34 +08:00
const std = @import("std");
const io = std.io;
const mem = std.mem;
const fs = std.fs;
const process = std.process;
const Allocator = mem.Allocator;
const ArrayList = std.ArrayList;
const Buffer = std.Buffer;
const arg = @import("arg.zig");
const self_hosted_main = @import("main.zig");
const Args = arg.Args;
const Flag = arg.Flag;
const errmsg = @import("errmsg.zig");
const DepTokenizer = @import("dep_tokenizer.zig").Tokenizer;
2019-05-27 01:17:34 +08:00
var stderr_file: fs.File = undefined;
var stderr: *io.OutStream(fs.File.WriteError) = undefined;
var stdout: *io.OutStream(fs.File.WriteError) = undefined;
stage1 is now a hybrid of C++ and Zig This modifies the build process of Zig to put all of the source files into libcompiler.a, except main.cpp and userland.cpp. Next, the build process links main.cpp, userland.cpp, and libcompiler.a into zig1. userland.cpp is a shim for functions that will later be replaced with self-hosted implementations. Next, the build process uses zig1 to build src-self-hosted/stage1.zig into libuserland.a, which does not depend on any of the things that are shimmed in userland.cpp, such as translate-c. Finally, the build process re-links main.cpp and libcompiler.a, except with libuserland.a instead of userland.cpp. Now the shims are replaced with .zig code. This provides all of the Zig standard library to the stage1 C++ compiler, and enables us to move certain things to userland, such as translate-c. As a proof of concept I have made the `zig zen` command use text defined in userland. I added `zig translate-c-2` which is a work-in-progress reimplementation of translate-c in userland, which currently calls `std.debug.panic("unimplemented")` and you can see the stack trace makes it all the way back into the C++ main() function (Thanks LemonBoy for improving that!). This could potentially let us move other things into userland, such as hashing algorithms, the entire cache system, .d file parsing, pretty much anything that libuserland.a itself doesn't need to depend on. This can also let us have `zig fmt` in stage1 without the overhead of child process execution, and without the initial compilation delay before it gets cached. See #1964
2019-04-17 04:47:47 +08:00
comptime {
_ = @import("dep_tokenizer.zig");
}
// ABI warning
stage1 is now a hybrid of C++ and Zig This modifies the build process of Zig to put all of the source files into libcompiler.a, except main.cpp and userland.cpp. Next, the build process links main.cpp, userland.cpp, and libcompiler.a into zig1. userland.cpp is a shim for functions that will later be replaced with self-hosted implementations. Next, the build process uses zig1 to build src-self-hosted/stage1.zig into libuserland.a, which does not depend on any of the things that are shimmed in userland.cpp, such as translate-c. Finally, the build process re-links main.cpp and libcompiler.a, except with libuserland.a instead of userland.cpp. Now the shims are replaced with .zig code. This provides all of the Zig standard library to the stage1 C++ compiler, and enables us to move certain things to userland, such as translate-c. As a proof of concept I have made the `zig zen` command use text defined in userland. I added `zig translate-c-2` which is a work-in-progress reimplementation of translate-c in userland, which currently calls `std.debug.panic("unimplemented")` and you can see the stack trace makes it all the way back into the C++ main() function (Thanks LemonBoy for improving that!). This could potentially let us move other things into userland, such as hashing algorithms, the entire cache system, .d file parsing, pretty much anything that libuserland.a itself doesn't need to depend on. This can also let us have `zig fmt` in stage1 without the overhead of child process execution, and without the initial compilation delay before it gets cached. See #1964
2019-04-17 04:47:47 +08:00
export fn stage2_zen(ptr: *[*]const u8, len: *usize) void {
const info_zen = @import("main.zig").info_zen;
stage1 is now a hybrid of C++ and Zig This modifies the build process of Zig to put all of the source files into libcompiler.a, except main.cpp and userland.cpp. Next, the build process links main.cpp, userland.cpp, and libcompiler.a into zig1. userland.cpp is a shim for functions that will later be replaced with self-hosted implementations. Next, the build process uses zig1 to build src-self-hosted/stage1.zig into libuserland.a, which does not depend on any of the things that are shimmed in userland.cpp, such as translate-c. Finally, the build process re-links main.cpp and libcompiler.a, except with libuserland.a instead of userland.cpp. Now the shims are replaced with .zig code. This provides all of the Zig standard library to the stage1 C++ compiler, and enables us to move certain things to userland, such as translate-c. As a proof of concept I have made the `zig zen` command use text defined in userland. I added `zig translate-c-2` which is a work-in-progress reimplementation of translate-c in userland, which currently calls `std.debug.panic("unimplemented")` and you can see the stack trace makes it all the way back into the C++ main() function (Thanks LemonBoy for improving that!). This could potentially let us move other things into userland, such as hashing algorithms, the entire cache system, .d file parsing, pretty much anything that libuserland.a itself doesn't need to depend on. This can also let us have `zig fmt` in stage1 without the overhead of child process execution, and without the initial compilation delay before it gets cached. See #1964
2019-04-17 04:47:47 +08:00
ptr.* = &info_zen;
len.* = info_zen.len;
}
// ABI warning
export fn stage2_panic(ptr: [*]const u8, len: usize) void {
@panic(ptr[0..len]);
}
// ABI warning
const TranslateMode = extern enum {
import,
translate,
};
// ABI warning
const Error = extern enum {
None,
OutOfMemory,
InvalidFormat,
SemanticAnalyzeFail,
AccessDenied,
Interrupted,
SystemResources,
FileNotFound,
FileSystem,
FileTooBig,
DivByZero,
Overflow,
PathAlreadyExists,
Unexpected,
ExactDivRemainder,
NegativeDenominator,
ShiftedOutOneBits,
CCompileErrors,
EndOfFile,
IsDir,
NotDir,
UnsupportedOperatingSystem,
SharingViolation,
PipeBusy,
PrimitiveTypeNotFound,
CacheUnavailable,
PathTooLong,
CCompilerCannotFindFile,
ReadingDepFile,
InvalidDepFile,
MissingArchitecture,
MissingOperatingSystem,
UnknownArchitecture,
UnknownOperatingSystem,
UnknownABI,
InvalidFilename,
DiskQuota,
DiskSpace,
UnexpectedWriteFailure,
UnexpectedSeekFailure,
UnexpectedFileTruncationFailure,
Unimplemented,
OperationAborted,
BrokenPipe,
NoSpaceLeft,
};
const FILE = std.c.FILE;
const ast = std.zig.ast;
const translate_c = @import("translate_c.zig");
/// Args should have a null terminating last arg.
export fn stage2_translate_c(
out_ast: **ast.Tree,
out_errors_ptr: *[*]translate_c.ClangErrMsg,
out_errors_len: *usize,
args_begin: [*]?[*]const u8,
args_end: [*]?[*]const u8,
mode: TranslateMode,
resources_path: [*]const u8,
) Error {
var errors: []translate_c.ClangErrMsg = undefined;
out_ast.* = translate_c.translate(std.heap.c_allocator, args_begin, args_end, switch (mode) {
.import => translate_c.Mode.import,
.translate => translate_c.Mode.translate,
}, &errors, resources_path) catch |err| switch (err) {
error.SemanticAnalyzeFail => {
out_errors_ptr.* = errors.ptr;
out_errors_len.* = errors.len;
return Error.CCompileErrors;
},
error.OutOfMemory => return Error.OutOfMemory,
};
return Error.None;
}
export fn stage2_free_clang_errors(errors_ptr: [*]translate_c.ClangErrMsg, errors_len: usize) void {
translate_c.freeErrors(errors_ptr[0..errors_len]);
}
export fn stage2_render_ast(tree: *ast.Tree, output_file: *FILE) Error {
const c_out_stream = &std.io.COutStream.init(output_file).stream;
_ = std.zig.render(std.heap.c_allocator, c_out_stream, tree) catch |e| switch (e) {
std lib networking improvements, especially non-blocking I/O * delete the std/event/net directory * `std.event.Loop.waitUntilFdReadable` and related functions no longer have possibility of failure. On Linux, they fall back to poll() and then fall back to sleep(). * add some missing `noasync` decorations in `std.event.Loop` * redo the `std.net.Server` API. it's quite nice now, but shutdown does not work cleanly. There is a race condition with close() that I am actively working on. * move `std.io.OutStream` to its own file to match `std.io.InStream`. I started working on making `write` integrated with evented I/O, but it got tricky so I backed off and filed #3557. However I did integrate `std.os.writev` and `std.os.pwritev` with evented I/O. * add `std.Target.stack_align` * move networking tests to `lib/std/net/test.zig` * add `std.net.tcpConnectToHost` and `std.net.tcpConnectToAddress`. * rename `error.UnknownName` to `error.UnknownHostName` within the context of DNS resolution. * add `std.os.readv`, which is integrated with evented I/O. * `std.os.preadv`, is now integrated with evented I/O. * `std.os.accept4` now asserts that ENOTSOCK and EOPNOTSUPP never occur (misuse of API), instead of returning errors. * `std.os.connect` is now integrated with evented I/O. `std.os.connect_async` is gone. Just use `std.os.connect`. * fix false positive dependency loop regarding async function frames * add more compile notes to help when dependency loops occur in determining whether a function is async. * ir: change an assert to ir_assert to make it easier to find workarounds for when such an assert is triggered. In this case it was trying to parse an IPv4 address at comptime.
2019-10-30 10:59:30 +08:00
error.WouldBlock => unreachable, // stage1 opens stuff in exclusively blocking mode
error.SystemResources => return Error.SystemResources,
error.OperationAborted => return Error.OperationAborted,
error.BrokenPipe => return Error.BrokenPipe,
error.DiskQuota => return Error.DiskQuota,
error.FileTooBig => return Error.FileTooBig,
error.NoSpaceLeft => return Error.NoSpaceLeft,
error.AccessDenied => return Error.AccessDenied,
error.OutOfMemory => return Error.OutOfMemory,
error.Unexpected => return Error.Unexpected,
error.InputOutput => return Error.FileSystem,
};
return Error.None;
}
// TODO: just use the actual self-hosted zig fmt. Until https://github.com/ziglang/zig/issues/2377,
// we use a blocking implementation.
export fn stage2_fmt(argc: c_int, argv: [*]const [*]const u8) c_int {
if (std.debug.runtime_safety) {
fmtMain(argc, argv) catch unreachable;
} else {
fmtMain(argc, argv) catch |e| {
std.debug.warn("{}\n", @errorName(e));
return -1;
2019-04-27 08:58:43 +08:00
};
}
return 0;
}
fn fmtMain(argc: c_int, argv: [*]const [*]const u8) !void {
const allocator = std.heap.c_allocator;
var args_list = std.ArrayList([]const u8).init(allocator);
const argc_usize = @intCast(usize, argc);
var arg_i: usize = 0;
while (arg_i < argc_usize) : (arg_i += 1) {
try args_list.append(std.mem.toSliceConst(u8, argv[arg_i]));
}
stdout = &std.io.getStdOut().outStream().stream;
stderr_file = std.io.getStdErr();
stderr = &stderr_file.outStream().stream;
const args = args_list.toSliceConst();
var flags = try Args.parse(allocator, self_hosted_main.args_fmt_spec, args[2..]);
defer flags.deinit();
if (flags.present("help")) {
try stdout.write(self_hosted_main.usage_fmt);
2019-05-27 01:17:34 +08:00
process.exit(0);
}
const color = blk: {
if (flags.single("color")) |color_flag| {
if (mem.eql(u8, color_flag, "auto")) {
break :blk errmsg.Color.Auto;
} else if (mem.eql(u8, color_flag, "on")) {
break :blk errmsg.Color.On;
} else if (mem.eql(u8, color_flag, "off")) {
break :blk errmsg.Color.Off;
} else unreachable;
} else {
break :blk errmsg.Color.Auto;
}
};
if (flags.present("stdin")) {
if (flags.positionals.len != 0) {
try stderr.write("cannot use --stdin with positional arguments\n");
2019-05-27 01:17:34 +08:00
process.exit(1);
}
const stdin_file = io.getStdIn();
var stdin = stdin_file.inStream();
const source_code = try stdin.stream.readAllAlloc(allocator, self_hosted_main.max_src_size);
defer allocator.free(source_code);
const tree = std.zig.parse(allocator, source_code) catch |err| {
try stderr.print("error parsing stdin: {}\n", err);
2019-05-27 01:17:34 +08:00
process.exit(1);
};
defer tree.deinit();
var error_it = tree.errors.iterator(0);
while (error_it.next()) |parse_error| {
try printErrMsgToFile(allocator, parse_error, tree, "<stdin>", stderr_file, color);
}
if (tree.errors.len != 0) {
2019-05-27 01:17:34 +08:00
process.exit(1);
}
if (flags.present("check")) {
const anything_changed = try std.zig.render(allocator, io.null_out_stream, tree);
const code = if (anything_changed) @as(u8, 1) else @as(u8, 0);
2019-05-27 01:17:34 +08:00
process.exit(code);
}
_ = try std.zig.render(allocator, stdout, tree);
return;
}
if (flags.positionals.len == 0) {
try stderr.write("expected at least one source file argument\n");
2019-05-27 01:17:34 +08:00
process.exit(1);
}
var fmt = Fmt{
.seen = Fmt.SeenMap.init(allocator),
.any_error = false,
.color = color,
.allocator = allocator,
};
const check_mode = flags.present("check");
for (flags.positionals.toSliceConst()) |file_path| {
try fmtPath(&fmt, file_path, check_mode);
}
if (fmt.any_error) {
2019-05-27 01:17:34 +08:00
process.exit(1);
}
}
const FmtError = error{
SystemResources,
OperationAborted,
IoPending,
BrokenPipe,
Unexpected,
WouldBlock,
FileClosed,
DestinationAddressRequired,
DiskQuota,
FileTooBig,
InputOutput,
NoSpaceLeft,
AccessDenied,
OutOfMemory,
RenameAcrossMountPoints,
ReadOnlyFileSystem,
LinkQuotaExceeded,
FileBusy,
2019-05-27 01:17:34 +08:00
} || fs.File.OpenError;
fn fmtPath(fmt: *Fmt, file_path_ref: []const u8, check_mode: bool) FmtError!void {
const file_path = try std.mem.dupe(fmt.allocator, u8, file_path_ref);
defer fmt.allocator.free(file_path);
if (try fmt.seen.put(file_path, {})) |_| return;
const source_code = io.readFileAlloc(fmt.allocator, file_path) catch |err| switch (err) {
error.IsDir, error.AccessDenied => {
// TODO make event based (and dir.next())
var dir = try fs.Dir.open(file_path);
defer dir.close();
breaking: improve std.fs directory handling API * Added `std.c.unlinkat` and `std.os.unlinkat`. * Removed `std.fs.MAX_BUF_BYTES` (this declaration never made it to master branch) * Added `std.fs.Dir.deleteTree` to be used on an open directory handle. * `std.fs.deleteTree` has better behavior for both relative and absolute paths. For absolute paths, it opens the base directory and uses that handle for subsequent operations. For relative paths, it does a similar strategy, using the cwd handle. * The error set of `std.fs.deleteTree` is improved to no longer have these possible errors: - OutOfMemory - FileTooBig - IsDir - DirNotEmpty - PathAlreadyExists - NoSpaceLeft * Added `std.fs.Dir.posix_cwd` which is a statically initialized directory representing the current working directory. * The error set of `std.Dir.open` is improved to no longer have these possible errors: - FileTooBig - IsDir - NoSpaceLeft - PathAlreadyExists - OutOfMemory * Added more alternative functions to `std.fs` for when the path parameter is a null terminated string. This can sometimes be more effecient on systems which have an ABI based on null terminated strings. * Added `std.fs.Dir.openDir`, `std.fs.Dir.deleteFile`, and `std.fs.Dir.deleteDir` which all operate on an open directory handle. * `std.fs.Walker.Entry` now has a `dir` field, which can be used to do operations directly on `std.fs.Walker.Entry.basename`, avoiding `error.NameTooLong` for deeply nested paths. * Added more docs to `std.os.OpenError` This commit does the POSIX components for these changes. I plan to follow up shortly with a commit for Windows.
2019-10-21 09:48:23 +08:00
var dir_it = dir.iterate();
while (try dir_it.next()) |entry| {
if (entry.kind == .Directory or mem.endsWith(u8, entry.name, ".zig")) {
const full_path = try fs.path.join(fmt.allocator, [_][]const u8{ file_path, entry.name });
try fmtPath(fmt, full_path, check_mode);
}
}
return;
},
else => {
// TODO lock stderr printing
try stderr.print("unable to open '{}': {}\n", file_path, err);
fmt.any_error = true;
return;
},
};
defer fmt.allocator.free(source_code);
const tree = std.zig.parse(fmt.allocator, source_code) catch |err| {
try stderr.print("error parsing file '{}': {}\n", file_path, err);
fmt.any_error = true;
return;
};
defer tree.deinit();
var error_it = tree.errors.iterator(0);
while (error_it.next()) |parse_error| {
try printErrMsgToFile(fmt.allocator, parse_error, tree, file_path, stderr_file, fmt.color);
}
if (tree.errors.len != 0) {
fmt.any_error = true;
return;
}
if (check_mode) {
const anything_changed = try std.zig.render(fmt.allocator, io.null_out_stream, tree);
if (anything_changed) {
try stderr.print("{}\n", file_path);
fmt.any_error = true;
}
} else {
const baf = try io.BufferedAtomicFile.create(fmt.allocator, file_path);
defer baf.destroy();
const anything_changed = try std.zig.render(fmt.allocator, baf.stream(), tree);
if (anything_changed) {
try stderr.print("{}\n", file_path);
try baf.finish();
}
}
}
const Fmt = struct {
seen: SeenMap,
any_error: bool,
color: errmsg.Color,
allocator: *mem.Allocator,
const SeenMap = std.StringHashMap(void);
};
fn printErrMsgToFile(
allocator: *mem.Allocator,
parse_error: *const ast.Error,
tree: *ast.Tree,
path: []const u8,
2019-05-27 01:17:34 +08:00
file: fs.File,
color: errmsg.Color,
) !void {
const color_on = switch (color) {
errmsg.Color.Auto => file.isTty(),
errmsg.Color.On => true,
errmsg.Color.Off => false,
};
const lok_token = parse_error.loc();
const span = errmsg.Span{
.first = lok_token,
.last = lok_token,
};
const first_token = tree.tokens.at(span.first);
const last_token = tree.tokens.at(span.last);
const start_loc = tree.tokenLocationPtr(0, first_token);
const end_loc = tree.tokenLocationPtr(first_token.end, last_token);
var text_buf = try std.Buffer.initSize(allocator, 0);
var out_stream = &std.io.BufferOutStream.init(&text_buf).stream;
try parse_error.render(&tree.tokens, out_stream);
const text = text_buf.toOwnedSlice();
const stream = &file.outStream().stream;
try stream.print("{}:{}:{}: error: {}\n", path, start_loc.line + 1, start_loc.column + 1, text);
if (!color_on) return;
// Print \r and \t as one space each so that column counts line up
for (tree.source[start_loc.line_start..start_loc.line_end]) |byte| {
try stream.writeByte(switch (byte) {
'\r', '\t' => ' ',
else => byte,
});
}
try stream.writeByte('\n');
try stream.writeByteNTimes(' ', start_loc.column);
try stream.writeByteNTimes('~', last_token.end - first_token.start);
try stream.writeByte('\n');
}
export fn stage2_DepTokenizer_init(input: [*]const u8, len: usize) stage2_DepTokenizer {
const t = std.heap.c_allocator.create(DepTokenizer) catch @panic("failed to create .d tokenizer");
t.* = DepTokenizer.init(std.heap.c_allocator, input[0..len]);
return stage2_DepTokenizer{
.handle = t,
};
}
export fn stage2_DepTokenizer_deinit(self: *stage2_DepTokenizer) void {
self.handle.deinit();
}
export fn stage2_DepTokenizer_next(self: *stage2_DepTokenizer) stage2_DepNextResult {
const otoken = self.handle.next() catch {
const textz = std.Buffer.init(&self.handle.arena.allocator, self.handle.error_text) catch @panic("failed to create .d tokenizer error text");
return stage2_DepNextResult{
.type_id = .error_,
.textz = textz.toSlice().ptr,
};
};
const token = otoken orelse {
return stage2_DepNextResult{
.type_id = .null_,
.textz = undefined,
};
};
const textz = std.Buffer.init(&self.handle.arena.allocator, token.bytes) catch @panic("failed to create .d tokenizer token text");
return stage2_DepNextResult{
.type_id = switch (token.id) {
.target => stage2_DepNextResult.TypeId.target,
.prereq => stage2_DepNextResult.TypeId.prereq,
},
.textz = textz.toSlice().ptr,
};
}
export const stage2_DepTokenizer = extern struct {
handle: *DepTokenizer,
};
export const stage2_DepNextResult = extern struct {
type_id: TypeId,
// when type_id == error --> error text
// when type_id == null --> undefined
// when type_id == target --> target pathname
// when type_id == prereq --> prereq pathname
textz: [*]const u8,
export const TypeId = extern enum {
error_,
null_,
target,
prereq,
};
};
// ABI warning
export fn stage2_attach_segfault_handler() void {
if (std.debug.runtime_safety and std.debug.have_segfault_handling_support) {
std.debug.attachSegfaultHandler();
}
}
// ABI warning
export fn stage2_progress_create() *std.Progress {
const ptr = std.heap.c_allocator.create(std.Progress) catch @panic("out of memory");
ptr.* = std.Progress{};
return ptr;
}
// ABI warning
export fn stage2_progress_destroy(progress: *std.Progress) void {
std.heap.c_allocator.destroy(progress);
}
// ABI warning
export fn stage2_progress_start_root(
progress: *std.Progress,
name_ptr: [*]const u8,
name_len: usize,
estimated_total_items: usize,
) *std.Progress.Node {
return progress.start(
name_ptr[0..name_len],
if (estimated_total_items == 0) null else estimated_total_items,
) catch @panic("timer unsupported");
}
// ABI warning
export fn stage2_progress_disable_tty(progress: *std.Progress) void {
progress.terminal = null;
}
// ABI warning
export fn stage2_progress_start(
node: *std.Progress.Node,
name_ptr: [*]const u8,
name_len: usize,
estimated_total_items: usize,
) *std.Progress.Node {
const child_node = std.heap.c_allocator.create(std.Progress.Node) catch @panic("out of memory");
child_node.* = node.start(
name_ptr[0..name_len],
if (estimated_total_items == 0) null else estimated_total_items,
);
child_node.activate();
return child_node;
}
// ABI warning
export fn stage2_progress_end(node: *std.Progress.Node) void {
node.end();
if (&node.context.root != node) {
std.heap.c_allocator.destroy(node);
}
}
// ABI warning
export fn stage2_progress_complete_one(node: *std.Progress.Node) void {
node.completeOne();
}
// ABI warning
export fn stage2_progress_update_node(node: *std.Progress.Node, done_count: usize, total_count: usize) void {
node.completed_items = done_count;
node.estimated_total_items = total_count;
node.activate();
node.context.maybeRefresh();
}