// This is the userland implementation of translate-c which will be used by both stage1 // and stage2. Currently the only way it is used is with `zig translate-c-2`. const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; const ast = std.zig.ast; const Token = std.zig.Token; usingnamespace @import("clang.zig"); pub const Mode = enum { import, translate, }; // TODO merge with Type.Fn.CallingConvention const CallingConvention = builtin.TypeInfo.CallingConvention; pub const ClangErrMsg = Stage2ErrorMsg; pub const Error = error{OutOfMemory}; const TypeError = Error || error{UnsupportedType}; const TransError = Error || error{UnsupportedTranslation}; const DeclTable = std.HashMap(usize, void, addrHash, addrEql); fn addrHash(x: usize) u32 { switch (@typeInfo(usize).Int.bits) { 32 => return x, // pointers are usually aligned so we ignore the bits that are probably all 0 anyway // usually the larger bits of addr space are unused so we just chop em off 64 => return @truncate(u32, x >> 4), else => @compileError("unreachable"), } } fn addrEql(a: usize, b: usize) bool { return a == b; } const Scope = struct { id: Id, parent: ?*Scope, const Id = enum { Switch, Var, Block, Root, While, }; const Switch = struct { base: Scope, }; const Var = struct { base: Scope, c_name: []const u8, zig_name: []const u8, }; const Block = struct { base: Scope, block_node: *ast.Node.Block, /// Don't forget to set rbrace token later fn create(c: *Context, parent: *Scope, lbrace_tok: ast.TokenIndex) !*Block { const block = try c.a().create(Block); block.* = Block{ .base = Scope{ .id = Id.Block, .parent = parent, }, .block_node = try c.a().create(ast.Node.Block), }; block.block_node.* = ast.Node.Block{ .base = ast.Node{ .id = ast.Node.Id.Block }, .label = null, .lbrace = lbrace_tok, .statements = ast.Node.Block.StatementList.init(c.a()), .rbrace = undefined, }; return block; } }; const Root = struct { base: Scope, }; const While = struct { base: Scope, }; }; const TransResult = struct { node: *ast.Node, node_scope: *Scope, child_scope: *Scope, }; const Context = struct { tree: *ast.Tree, source_buffer: *std.Buffer, err: Error, source_manager: *ZigClangSourceManager, decl_table: DeclTable, global_scope: *Scope.Root, mode: Mode, fn a(c: *Context) *std.mem.Allocator { return &c.tree.arena_allocator.allocator; } /// Convert a null-terminated C string to a slice allocated in the arena fn str(c: *Context, s: [*]const u8) ![]u8 { return std.mem.dupe(c.a(), u8, std.mem.toSliceConst(u8, s)); } /// Convert a clang source location to a file:line:column string fn locStr(c: *Context, loc: ZigClangSourceLocation) ![]u8 { const spelling_loc = ZigClangSourceManager_getSpellingLoc(c.source_manager, loc); const filename_c = ZigClangSourceManager_getFilename(c.source_manager, spelling_loc); const filename = if (filename_c) |s| try c.str(s) else ([]const u8)("(no file)"); const line = ZigClangSourceManager_getSpellingLineNumber(c.source_manager, spelling_loc); const column = ZigClangSourceManager_getSpellingColumnNumber(c.source_manager, spelling_loc); return std.fmt.allocPrint(c.a(), "{}:{}:{}", filename, line, column); } }; pub fn translate( backing_allocator: *std.mem.Allocator, args_begin: [*]?[*]const u8, args_end: [*]?[*]const u8, mode: Mode, errors: *[]ClangErrMsg, resources_path: [*]const u8, ) !*ast.Tree { const ast_unit = ZigClangLoadFromCommandLine( args_begin, args_end, &errors.ptr, &errors.len, resources_path, ) orelse { if (errors.len == 0) return error.OutOfMemory; return error.SemanticAnalyzeFail; }; defer ZigClangASTUnit_delete(ast_unit); var tree_arena = std.heap.ArenaAllocator.init(backing_allocator); errdefer tree_arena.deinit(); const tree = try tree_arena.allocator.create(ast.Tree); tree.* = ast.Tree{ .source = undefined, // need to use Buffer.toOwnedSlice later .root_node = undefined, .arena_allocator = tree_arena, .tokens = undefined, // can't reference the allocator yet .errors = undefined, // can't reference the allocator yet }; const arena = &tree.arena_allocator.allocator; // now we can reference the allocator tree.tokens = ast.Tree.TokenList.init(arena); tree.errors = ast.Tree.ErrorList.init(arena); tree.root_node = try arena.create(ast.Node.Root); tree.root_node.* = ast.Node.Root{ .base = ast.Node{ .id = ast.Node.Id.Root }, .decls = ast.Node.Root.DeclList.init(arena), .doc_comments = null, // initialized with the eof token at the end .eof_token = undefined, }; var source_buffer = try std.Buffer.initSize(arena, 0); var context = Context{ .tree = tree, .source_buffer = &source_buffer, .source_manager = ZigClangASTUnit_getSourceManager(ast_unit), .err = undefined, .decl_table = DeclTable.init(arena), .global_scope = try arena.create(Scope.Root), .mode = mode, }; context.global_scope.* = Scope.Root{ .base = Scope{ .id = Scope.Id.Root, .parent = null, }, }; if (!ZigClangASTUnit_visitLocalTopLevelDecls(ast_unit, &context, declVisitorC)) { return context.err; } _ = try appendToken(&context, .Eof, ""); tree.source = source_buffer.toOwnedSlice(); if (false) { std.debug.warn("debug source:\n{}\n==EOF==\ntokens:\n", tree.source); var i: usize = 0; while (i < tree.tokens.len) : (i += 1) { const token = tree.tokens.at(i); std.debug.warn("{}\n", token); } } return tree; } extern fn declVisitorC(context: ?*c_void, decl: *const ZigClangDecl) bool { const c = @ptrCast(*Context, @alignCast(@alignOf(Context), context)); declVisitor(c, decl) catch |err| { c.err = err; return false; }; return true; } fn declVisitor(c: *Context, decl: *const ZigClangDecl) Error!void { switch (ZigClangDecl_getKind(decl)) { .Function => { return visitFnDecl(c, @ptrCast(*const ZigClangFunctionDecl, decl)); }, .Typedef => { try emitWarning(c, ZigClangDecl_getLocation(decl), "TODO implement translate-c for typedefs"); }, .Enum => { try emitWarning(c, ZigClangDecl_getLocation(decl), "TODO implement translate-c for enums"); }, .Record => { try emitWarning(c, ZigClangDecl_getLocation(decl), "TODO implement translate-c for structs"); }, .Var => { try emitWarning(c, ZigClangDecl_getLocation(decl), "TODO implement translate-c for variables"); }, else => { const decl_name = try c.str(ZigClangDecl_getDeclKindName(decl)); try emitWarning(c, ZigClangDecl_getLocation(decl), "ignoring {} declaration", decl_name); }, } } fn visitFnDecl(c: *Context, fn_decl: *const ZigClangFunctionDecl) Error!void { if (try c.decl_table.put(@ptrToInt(fn_decl), {})) |_| return; // Avoid processing this decl twice const rp = makeRestorePoint(c); const fn_name = try c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, fn_decl))); const fn_decl_loc = ZigClangFunctionDecl_getLocation(fn_decl); const fn_qt = ZigClangFunctionDecl_getType(fn_decl); const fn_type = ZigClangQualType_getTypePtr(fn_qt); var scope = &c.global_scope.base; const has_body = ZigClangFunctionDecl_hasBody(fn_decl); const storage_class = ZigClangFunctionDecl_getStorageClass(fn_decl); const decl_ctx = FnDeclContext{ .fn_name = fn_name, .has_body = has_body, .storage_class = storage_class, .scope = &scope, .is_export = switch (storage_class) { .None => has_body, .Extern, .Static => false, .PrivateExtern => return failDecl(c, fn_decl_loc, fn_name, "unsupported storage class: private extern"), .Auto => unreachable, // Not legal on functions .Register => unreachable, // Not legal on functions }, }; const proto_node = switch (ZigClangType_getTypeClass(fn_type)) { .FunctionProto => blk: { const fn_proto_type = @ptrCast(*const ZigClangFunctionProtoType, fn_type); break :blk transFnProto(rp, fn_proto_type, fn_decl_loc, decl_ctx) catch |err| switch (err) { error.UnsupportedType => { return failDecl(c, fn_decl_loc, fn_name, "unable to resolve prototype of function"); }, error.OutOfMemory => |e| return e, }; }, .FunctionNoProto => blk: { const fn_no_proto_type = @ptrCast(*const ZigClangFunctionType, fn_type); break :blk transFnNoProto(rp, fn_no_proto_type, fn_decl_loc, decl_ctx) catch |err| switch (err) { error.UnsupportedType => { return failDecl(c, fn_decl_loc, fn_name, "unable to resolve prototype of function"); }, error.OutOfMemory => |e| return e, }; }, else => unreachable, }; if (!decl_ctx.has_body) { const semi_tok = try appendToken(c, .Semicolon, ";"); return addTopLevelDecl(c, fn_name, &proto_node.base); } // actual function definition with body const body_stmt = ZigClangFunctionDecl_getBody(fn_decl); const result = transStmt(rp, scope, body_stmt, .unused, .r_value) catch |err| switch (err) { error.OutOfMemory => |e| return e, error.UnsupportedTranslation => return failDecl(c, fn_decl_loc, fn_name, "unable to translate function"), }; assert(result.node.id == ast.Node.Id.Block); proto_node.body_node = result.node; return addTopLevelDecl(c, fn_name, &proto_node.base); } const ResultUsed = enum { used, unused, }; const LRValue = enum { l_value, r_value, }; fn transStmt( rp: RestorePoint, scope: *Scope, stmt: *const ZigClangStmt, result_used: ResultUsed, lrvalue: LRValue, ) !TransResult { const sc = ZigClangStmt_getStmtClass(stmt); switch (sc) { .CompoundStmtClass => return transCompoundStmt(rp, scope, @ptrCast(*const ZigClangCompoundStmt, stmt)), else => { return revertAndWarn( rp, error.UnsupportedTranslation, ZigClangStmt_getBeginLoc(stmt), "TODO implement translation of stmt class {}", @tagName(sc), ); }, } } fn transCompoundStmtInline( rp: RestorePoint, parent_scope: *Scope, stmt: *const ZigClangCompoundStmt, block_node: *ast.Node.Block, ) TransError!TransResult { var it = ZigClangCompoundStmt_body_begin(stmt); const end_it = ZigClangCompoundStmt_body_end(stmt); var scope = parent_scope; while (it != end_it) : (it += 1) { const result = try transStmt(rp, scope, it.*, .unused, .r_value); scope = result.child_scope; try block_node.statements.push(result.node); } return TransResult{ .node = &block_node.base, .child_scope = scope, .node_scope = scope, }; } fn transCompoundStmt(rp: RestorePoint, scope: *Scope, stmt: *const ZigClangCompoundStmt) !TransResult { const lbrace_tok = try appendToken(rp.c, .LBrace, "{"); const block_scope = try Scope.Block.create(rp.c, scope, lbrace_tok); const inline_result = try transCompoundStmtInline(rp, &block_scope.base, stmt, block_scope.block_node); block_scope.block_node.rbrace = try appendToken(rp.c, .RBrace, "}"); return TransResult{ .node = &block_scope.block_node.base, .node_scope = inline_result.node_scope, .child_scope = inline_result.child_scope, }; } fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: *ast.Node) !void { try c.tree.root_node.decls.push(decl_node); } fn transQualType(rp: RestorePoint, qt: ZigClangQualType, source_loc: ZigClangSourceLocation) TypeError!*ast.Node { return transType(rp, ZigClangQualType_getTypePtr(qt), source_loc); } fn qualTypeCanon(qt: ZigClangQualType) *const ZigClangType { const canon = ZigClangQualType_getCanonicalType(qt); return ZigClangQualType_getTypePtr(canon); } const RestorePoint = struct { c: *Context, token_index: ast.TokenIndex, src_buf_index: usize, fn activate(self: RestorePoint) void { self.c.tree.tokens.shrink(self.token_index); self.c.source_buffer.shrink(self.src_buf_index); } }; fn makeRestorePoint(c: *Context) RestorePoint { return RestorePoint{ .c = c, .token_index = c.tree.tokens.len, .src_buf_index = c.source_buffer.len(), }; } fn transType(rp: RestorePoint, ty: *const ZigClangType, source_loc: ZigClangSourceLocation) TypeError!*ast.Node { switch (ZigClangType_getTypeClass(ty)) { .Builtin => { const builtin_ty = @ptrCast(*const ZigClangBuiltinType, ty); switch (ZigClangBuiltinType_getKind(builtin_ty)) { .Void => return appendIdentifier(rp.c, "c_void"), .Bool => return appendIdentifier(rp.c, "bool"), .Char_U, .UChar, .Char_S, .Char8 => return appendIdentifier(rp.c, "u8"), .SChar => return appendIdentifier(rp.c, "i8"), .UShort => return appendIdentifier(rp.c, "c_ushort"), .UInt => return appendIdentifier(rp.c, "c_uint"), .ULong => return appendIdentifier(rp.c, "c_ulong"), .ULongLong => return appendIdentifier(rp.c, "c_ulonglong"), .Short => return appendIdentifier(rp.c, "c_short"), .Int => return appendIdentifier(rp.c, "c_int"), .Long => return appendIdentifier(rp.c, "c_long"), .LongLong => return appendIdentifier(rp.c, "c_longlong"), .UInt128 => return appendIdentifier(rp.c, "u128"), .Int128 => return appendIdentifier(rp.c, "i128"), .Float => return appendIdentifier(rp.c, "f32"), .Double => return appendIdentifier(rp.c, "f64"), .Float128 => return appendIdentifier(rp.c, "f128"), .Float16 => return appendIdentifier(rp.c, "f16"), .LongDouble => return appendIdentifier(rp.c, "c_longdouble"), else => return revertAndWarn(rp, error.UnsupportedType, source_loc, "unsupported builtin type"), } }, .FunctionProto => { const fn_proto_ty = @ptrCast(*const ZigClangFunctionProtoType, ty); const fn_proto = try transFnProto(rp, fn_proto_ty, source_loc, null); return &fn_proto.base; }, else => { const type_name = rp.c.str(ZigClangType_getTypeClassName(ty)); return revertAndWarn(rp, error.UnsupportedType, source_loc, "unsupported type: '{}'", type_name); }, } } const FnDeclContext = struct { fn_name: []const u8, has_body: bool, storage_class: ZigClangStorageClass, scope: **Scope, is_export: bool, }; fn transCC( rp: RestorePoint, fn_ty: *const ZigClangFunctionType, source_loc: ZigClangSourceLocation, ) !CallingConvention { const clang_cc = ZigClangFunctionType_getCallConv(fn_ty); switch (clang_cc) { .C => return CallingConvention.C, .X86StdCall => return CallingConvention.Stdcall, else => return revertAndWarn(rp, error.UnsupportedType, source_loc, "unsupported calling convention: {}", @tagName(clang_cc)), } } fn transFnProto( rp: RestorePoint, fn_proto_ty: *const ZigClangFunctionProtoType, source_loc: ZigClangSourceLocation, fn_decl_context: ?FnDeclContext, ) !*ast.Node.FnProto { const fn_ty = @ptrCast(*const ZigClangFunctionType, fn_proto_ty); const cc = try transCC(rp, fn_ty, source_loc); const is_var_args = ZigClangFunctionProtoType_isVariadic(fn_proto_ty); const param_count: usize = ZigClangFunctionProtoType_getNumParams(fn_proto_ty); var i: usize = 0; while (i < param_count) : (i += 1) { return revertAndWarn(rp, error.UnsupportedType, source_loc, "TODO: implement parameters for FunctionProto in transType"); } return finishTransFnProto(rp, fn_ty, source_loc, fn_decl_context, is_var_args, cc); } fn transFnNoProto( rp: RestorePoint, fn_ty: *const ZigClangFunctionType, source_loc: ZigClangSourceLocation, fn_decl_context: ?FnDeclContext, ) !*ast.Node.FnProto { const cc = try transCC(rp, fn_ty, source_loc); const is_var_args = if (fn_decl_context) |ctx| !ctx.is_export else true; return finishTransFnProto(rp, fn_ty, source_loc, fn_decl_context, is_var_args, cc); } fn finishTransFnProto( rp: RestorePoint, fn_ty: *const ZigClangFunctionType, source_loc: ZigClangSourceLocation, fn_decl_context: ?FnDeclContext, is_var_args: bool, cc: CallingConvention, ) !*ast.Node.FnProto { const is_export = if (fn_decl_context) |ctx| ctx.is_export else false; // TODO check for always_inline attribute // TODO check for align attribute // pub extern fn name(...) T const pub_tok = try appendToken(rp.c, .Keyword_pub, "pub"); const cc_tok = if (cc == .Stdcall) try appendToken(rp.c, .Keyword_stdcallcc, "stdcallcc") else null; const extern_export_inline_tok = if (is_export) try appendToken(rp.c, .Keyword_export, "export") else if (cc == .C) try appendToken(rp.c, .Keyword_extern, "extern") else null; const fn_tok = try appendToken(rp.c, .Keyword_fn, "fn"); const name_tok = if (fn_decl_context) |ctx| try appendToken(rp.c, .Identifier, ctx.fn_name) else null; const lparen_tok = try appendToken(rp.c, .LParen, "("); const var_args_tok = if (is_var_args) try appendToken(rp.c, .Ellipsis3, "...") else null; const rparen_tok = try appendToken(rp.c, .RParen, ")"); const return_type_node = blk: { if (ZigClangFunctionType_getNoReturnAttr(fn_ty)) { break :blk try appendIdentifier(rp.c, "noreturn"); } else { const return_qt = ZigClangFunctionType_getReturnType(fn_ty); if (ZigClangType_isVoidType(qualTypeCanon(return_qt))) { break :blk try appendIdentifier(rp.c, "void"); } else { break :blk transQualType(rp, return_qt, source_loc) catch |err| switch (err) { error.UnsupportedType => { try emitWarning(rp.c, source_loc, "unsupported function proto return type"); return err; }, error.OutOfMemory => |e| return e, }; } } }; const fn_proto = try rp.c.a().create(ast.Node.FnProto); fn_proto.* = ast.Node.FnProto{ .base = ast.Node{ .id = ast.Node.Id.FnProto }, .doc_comments = null, .visib_token = pub_tok, .fn_token = fn_tok, .name_token = name_tok, .params = ast.Node.FnProto.ParamList.init(rp.c.a()), .return_type = ast.Node.FnProto.ReturnType{ .Explicit = return_type_node }, .var_args_token = null, // TODO this field is broken in the AST data model .extern_export_inline_token = extern_export_inline_tok, .cc_token = cc_tok, .async_attr = null, .body_node = null, .lib_name = null, .align_expr = null, .section_expr = null, }; if (is_var_args) { const var_arg_node = try rp.c.a().create(ast.Node.ParamDecl); var_arg_node.* = ast.Node.ParamDecl{ .base = ast.Node{ .id = ast.Node.Id.ParamDecl }, .doc_comments = null, .comptime_token = null, .noalias_token = null, .name_token = null, .type_node = undefined, .var_args_token = var_args_tok, }; try fn_proto.params.push(&var_arg_node.base); } return fn_proto; } fn revertAndWarn( rp: RestorePoint, err: var, source_loc: ZigClangSourceLocation, comptime format: []const u8, args: ..., ) (@typeOf(err) || error{OutOfMemory}) { rp.activate(); try emitWarning(rp.c, source_loc, format, args); return err; } fn emitWarning(c: *Context, loc: ZigClangSourceLocation, comptime format: []const u8, args: ...) !void { _ = try appendTokenFmt(c, .LineComment, "// {}: warning: " ++ format, c.locStr(loc), args); } fn failDecl(c: *Context, loc: ZigClangSourceLocation, name: []const u8, comptime format: []const u8, args: ...) !void { // const name = @compileError(msg); const const_tok = try appendToken(c, .Keyword_const, "const"); const name_tok = try appendToken(c, .Identifier, name); const eq_tok = try appendToken(c, .Equal, "="); const builtin_tok = try appendToken(c, .Builtin, "@compileError"); const lparen_tok = try appendToken(c, .LParen, "("); const msg_tok = try appendTokenFmt(c, .StringLiteral, "\"" ++ format ++ "\"", args); const rparen_tok = try appendToken(c, .RParen, ")"); const semi_tok = try appendToken(c, .Semicolon, ";"); const msg_node = try c.a().create(ast.Node.StringLiteral); msg_node.* = ast.Node.StringLiteral{ .base = ast.Node{ .id = ast.Node.Id.StringLiteral }, .token = msg_tok, }; const call_node = try c.a().create(ast.Node.BuiltinCall); call_node.* = ast.Node.BuiltinCall{ .base = ast.Node{ .id = ast.Node.Id.BuiltinCall }, .builtin_token = builtin_tok, .params = ast.Node.BuiltinCall.ParamList.init(c.a()), .rparen_token = rparen_tok, }; try call_node.params.push(&msg_node.base); const var_decl_node = try c.a().create(ast.Node.VarDecl); var_decl_node.* = ast.Node.VarDecl{ .base = ast.Node{ .id = ast.Node.Id.VarDecl }, .doc_comments = null, .visib_token = null, .thread_local_token = null, .name_token = name_tok, .eq_token = eq_tok, .mut_token = const_tok, .comptime_token = null, .extern_export_token = null, .lib_name = null, .type_node = null, .align_node = null, .section_node = null, .init_node = &call_node.base, .semicolon_token = semi_tok, }; try c.tree.root_node.decls.push(&var_decl_node.base); } fn appendToken(c: *Context, token_id: Token.Id, bytes: []const u8) !ast.TokenIndex { return appendTokenFmt(c, token_id, "{}", bytes); } fn appendTokenFmt(c: *Context, token_id: Token.Id, comptime format: []const u8, args: ...) !ast.TokenIndex { const S = struct { fn callback(context: *Context, bytes: []const u8) error{OutOfMemory}!void { return context.source_buffer.append(bytes); } }; const start_index = c.source_buffer.len(); errdefer c.source_buffer.shrink(start_index); try std.fmt.format(c, error{OutOfMemory}, S.callback, format, args); const end_index = c.source_buffer.len(); const token_index = c.tree.tokens.len; const new_token = try c.tree.tokens.addOne(); errdefer c.tree.tokens.shrink(token_index); new_token.* = Token{ .id = token_id, .start = start_index, .end = end_index, }; try c.source_buffer.appendByte('\n'); return token_index; } fn appendIdentifier(c: *Context, name: []const u8) !*ast.Node { const token_index = try appendToken(c, .Identifier, name); const identifier = try c.a().create(ast.Node.Identifier); identifier.* = ast.Node.Identifier{ .base = ast.Node{ .id = ast.Node.Id.Identifier }, .token = token_index, }; return &identifier.base; } pub fn freeErrors(errors: []ClangErrMsg) void { ZigClangErrorMsg_delete(errors.ptr, errors.len); }