From 9b2ed1fac53319cbddb2752409e166334bb339bf Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 29 Jan 2016 00:28:52 -0700 Subject: [PATCH] parseh understands simple character literal macro --- src/ast_render.cpp | 16 +++++++- src/parseh.cpp | 99 ++++++++++++++++++++++++++++++++++++++++++---- src/tokenizer.cpp | 36 ----------------- src/tokenizer.hpp | 1 - test/run_tests.cpp | 5 +++ 5 files changed, 111 insertions(+), 46 deletions(-) diff --git a/src/ast_render.cpp b/src/ast_render.cpp index 01aed4dfb..efe144ebc 100644 --- a/src/ast_render.cpp +++ b/src/ast_render.cpp @@ -497,6 +497,12 @@ static bool is_node_void(AstNode *node) { return node->type == NodeTypeSymbol && buf_eql_str(&node->data.symbol_expr.symbol, "void"); } +static bool is_printable(uint8_t c) { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'A') || + (c >= '0' && c <= '9'); +} + static void render_node(AstRender *ar, AstNode *node) { assert(node->type == NodeTypeRoot || *node->parent_field == node); @@ -601,7 +607,15 @@ static void render_node(AstRender *ar, AstNode *node) { case NodeTypeStringLiteral: zig_panic("TODO"); case NodeTypeCharLiteral: - zig_panic("TODO"); + { + uint8_t c = node->data.char_literal.value; + if (is_printable(c)) { + fprintf(ar->f, "'%c'", c); + } else { + fprintf(ar->f, "'\\x%x'", (int)c); + } + break; + } case NodeTypeSymbol: fprintf(ar->f, "%s", buf_ptr(&node->data.symbol_expr.symbol)); break; diff --git a/src/parseh.cpp b/src/parseh.cpp index e87daa5fc..e61d2e380 100644 --- a/src/parseh.cpp +++ b/src/parseh.cpp @@ -31,6 +31,7 @@ struct Context { HashMap struct_type_table; HashMap enum_type_table; HashMap fn_table; + HashMap macro_table; SourceManager *source_manager; ZigList aliases; }; @@ -132,12 +133,17 @@ static AstNode *create_param_decl_node(Context *c, const char *name, AstNode *ty return node; } +static AstNode *create_char_lit_node(Context *c, uint8_t value) { + AstNode *node = create_node(c, NodeTypeCharLiteral); + node->data.char_literal.value = value; + return node; +} + static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) { AstNode *node = create_node(c, NodeTypeNumberLiteral); node->data.number_literal.kind = NumLitUInt; node->data.number_literal.data.x_uint = x; - normalize_parent_ptrs(node); return node; } @@ -713,6 +719,69 @@ static void render_aliases(Context *c) { } } +static int parse_c_char_lit(Buf *value, uint8_t *out_c) { + enum State { + StateExpectStartQuot, + StateExpectChar, + StateExpectEndQuot, + StateExpectEnd, + }; + State state = StateExpectStartQuot; + for (int i = 0; i < buf_len(value); i += 1) { + uint8_t c = buf_ptr(value)[i]; + switch (state) { + case StateExpectStartQuot: + switch (c) { + case '\'': + state = StateExpectChar; + break; + default: + return -1; + } + break; + case StateExpectChar: + switch (c) { + case '\\': + case '\'': + return -1; + default: + *out_c = c; + state = StateExpectEndQuot; + } + break; + case StateExpectEndQuot: + switch (c) { + case '\'': + state = StateExpectEnd; + break; + default: + return -1; + } + break; + case StateExpectEnd: + return -1; + } + } + return (state == StateExpectEnd) ? 0 : -1; +} + +static void process_macro(Context *c, Buf *name, Buf *value) { + // maybe it's a character literal + uint8_t ch; + if (!parse_c_char_lit(value, &ch)) { + c->macro_table.put(name, true); + AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_char_lit_node(c, ch)); + c->root->data.root.top_level_decls.append(var_node); + return; + } + // maybe it's a string literal + // TODO + // maybe it's a number literal + // TODO + // maybe it's a symbol + // TODO +} + static void process_preprocessor_entities(Context *c, ASTUnit &unit) { for (PreprocessedEntity *entity : unit.getLocalPreprocessingEntities()) { switch (entity->getKind()) { @@ -724,14 +793,27 @@ static void process_preprocessor_entities(Context *c, ASTUnit &unit) { { MacroDefinitionRecord *macro = static_cast(entity); const char *name = macro->getName()->getNameStart(); - fprintf(stderr, "definition macro: %s\n", name); SourceRange range = macro->getSourceRange(); SourceLocation begin_loc = range.getBegin(); SourceLocation end_loc = range.getEnd(); - const char *start_c = c->source_manager->getCharacterData(begin_loc); + if (begin_loc == end_loc) { + // this means it is a macro without a value + // we don't care about such things + continue; + } + const char *end_c = c->source_manager->getCharacterData(end_loc); - fprintf(stderr, "source: '%.*s'\n", (int)(end_c - start_c), start_c); + Buf *value = buf_alloc(); + while (*end_c && *end_c != '\n') { + buf_append_char(value, *end_c); + if (end_c[0] == '\\' && end_c[1] == '\n') { + end_c += 2; + } else { + end_c += 1; + } + } + process_macro(c, buf_create_from_str(name), value); } } } @@ -771,10 +853,11 @@ int parse_h_file(ImportTableEntry *import, ZigList *errors, c->import = import; c->errors = errors; c->visib_mod = VisibModPub; - c->root_type_table.init(16); - c->enum_type_table.init(16); - c->struct_type_table.init(16); - c->fn_table.init(16); + c->root_type_table.init(8); + c->enum_type_table.init(8); + c->struct_type_table.init(8); + c->fn_table.init(8); + c->macro_table.init(8); char *ZIG_PARSEH_CFLAGS = getenv("ZIG_PARSEH_CFLAGS"); if (ZIG_PARSEH_CFLAGS) { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 13728835d..f4d909906 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -1140,42 +1140,6 @@ void print_tokens(Buf *buf, ZigList *tokens) { } } -bool is_printable(uint8_t c) { - switch (c) { - default: - return false; - case DIGIT: - case ALPHA: - case '!': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ':': - case ';': - case '<': - case '=': - case '>': - case '?': - case '@': - case '^': - case '_': - case '`': - case '~': - case ' ': - return true; - } -} - bool valid_symbol_starter(uint8_t c) { switch (c) { case SYMBOL_START: diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index 7f421d2ed..ba5feac19 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -126,7 +126,6 @@ void tokenize(Buf *buf, Tokenization *out_tokenization); void print_tokens(Buf *buf, ZigList *tokens); -bool is_printable(uint8_t c); int get_digit_value(uint8_t c); const char * token_name(TokenId id); diff --git a/test/run_tests.cpp b/test/run_tests.cpp index bb205031c..ea262a95d 100644 --- a/test/run_tests.cpp +++ b/test/run_tests.cpp @@ -1979,6 +1979,11 @@ struct Foo *some_func(struct Foo *foo, int x); )SOURCE", R"OUTPUT(pub const struct_Foo = u8; pub extern fn some_func(foo: ?&struct_Foo, x: c_int) -> ?&struct_Foo; pub const Foo = struct_Foo;)OUTPUT"); + + + add_parseh_case("#define a char literal", R"SOURCE( +#define A_CHAR 'a' + )SOURCE", R"OUTPUT(pub const A_CHAR = 'a';)OUTPUT"); } static void print_compiler_invocation(TestCase *test_case) {