zig/src/c_tokenizer.cpp

/*
 * Copyright (c) 2016 Andrew Kelley
 *
 * This file is part of zig, which is MIT licensed.
 * See http://opensource.org/licenses/MIT
 */

#include "c_tokenizer.hpp"
#include <inttypes.h>

#define WHITESPACE_EXCEPT_N \
         ' ': \
    case '\t': \
    case '\v': \
    case '\f'

#define DIGIT_NON_ZERO \
         '1': \
    case '2': \
    case '3': \
    case '4': \
    case '5': \
    case '6': \
    case '7': \
    case '8': \
    case '9'

#define DIGIT \
         '0': \
    case DIGIT_NON_ZERO

#define ALPHA \
         'a': \
    case 'b': \
    case 'c': \
    case 'd': \
    case 'e': \
    case 'f': \
    case 'g': \
    case 'h': \
    case 'i': \
    case 'j': \
    case 'k': \
    case 'l': \
    case 'm': \
    case 'n': \
    case 'o': \
    case 'p': \
    case 'q': \
    case 'r': \
    case 's': \
    case 't': \
    case 'u': \
    case 'v': \
    case 'w': \
    case 'x': \
    case 'y': \
    case 'z': \
    case 'A': \
    case 'B': \
    case 'C': \
    case 'D': \
    case 'E': \
    case 'F': \
    case 'G': \
    case 'H': \
    case 'I': \
    case 'J': \
    case 'K': \
    case 'L': \
    case 'M': \
    case 'N': \
    case 'O': \
    case 'P': \
    case 'Q': \
    case 'R': \
    case 'S': \
    case 'T': \
    case 'U': \
    case 'V': \
    case 'W': \
    case 'X': \
    case 'Y': \
    case 'Z'

#define IDENT_START \
    ALPHA: \
    case '_'

#define IDENT \
    IDENT_START: \
    case DIGIT

#define LINE_ENDING \
         '\r': \
    case '\n'

static void begin_token(CTokenize *ctok, CTokId id) {
    assert(ctok->cur_tok == nullptr);
    ctok->tokens.add_one();
    ctok->cur_tok = &ctok->tokens.last();
    ctok->cur_tok->id = id;

    switch (id) {
        case CTokIdStrLit:
            memset(&ctok->cur_tok->data.str_lit, 0, sizeof(Buf));
            buf_resize(&ctok->cur_tok->data.str_lit, 0);
            break;
        case CTokIdSymbol:
            memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf));
            buf_resize(&ctok->cur_tok->data.symbol, 0);
            break;
        case CTokIdNumLitInt:
            ctok->cur_tok->data.num_lit_int.x = 0;
            ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixNone;
            break;
        case CTokIdCharLit:
        case CTokIdNumLitFloat:
        case CTokIdMinus:
        case CTokIdLParen:
        case CTokIdRParen:
        case CTokIdEOF:
            break;
    }
}

static void end_token(CTokenize *ctok) {
    ctok->cur_tok = nullptr;
}

static void mark_error(CTokenize *ctok) {
    ctok->error = true;
}

static void add_char(CTokenize *ctok, uint8_t c) {
    assert(ctok->cur_tok);
    if (ctok->cur_tok->id == CTokIdCharLit) {
        ctok->cur_tok->data.char_lit = c;
        ctok->state = CTokStateExpectEndQuot;
    } else if (ctok->cur_tok->id == CTokIdStrLit) {
        buf_append_char(&ctok->cur_tok->data.str_lit, c);
        ctok->state = CTokStateString;
    } else {
        zig_unreachable();
    }
}

static void hex_digit(CTokenize *ctok, uint8_t value) {
    // TODO @mul_with_overflow
    ctok->cur_tok->data.num_lit_int.x *= 16;
    // TODO @add_with_overflow
    ctok->cur_tok->data.num_lit_int.x += value;

    static const uint8_t hex_digit[] = "0123456789abcdef";
    buf_append_char(&ctok->buf, hex_digit[value]);
}

static void end_float(CTokenize *ctok) {
    // TODO detect errors, overflow, and underflow
    double value = strtod(buf_ptr(&ctok->buf), nullptr);

    ctok->cur_tok->data.num_lit_float = value;

    end_token(ctok);
    ctok->state = CTokStateStart;

}

void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
    ctok->tokens.resize(0);
    ctok->state = CTokStateStart;
    ctok->error = false;
    ctok->cur_tok = nullptr;

    buf_resize(&ctok->buf, 0);

    for (; *c; c += 1) {
        switch (ctok->state) {
            case CTokStateStart:
                switch (*c) {
                    case WHITESPACE_EXCEPT_N:
                        break;
                    case '\'':
                        ctok->state = CTokStateExpectChar;
                        begin_token(ctok, CTokIdCharLit);
                        break;
                    case '\"':
                        ctok->state = CTokStateString;
                        begin_token(ctok, CTokIdStrLit);
                        break;
                    case '/':
                        ctok->state = CTokStateOpenComment;
                        break;
                    case '\\':
                        ctok->state = CTokStateBackslash;
                        break;
                    case LINE_ENDING:
                        goto found_end_of_macro;
                    case IDENT_START:
                        ctok->state = CTokStateIdentifier;
                        begin_token(ctok, CTokIdSymbol);
                        buf_append_char(&ctok->cur_tok->data.symbol, *c);
                        break;
                    case DIGIT_NON_ZERO:
                        ctok->state = CTokStateDecimal;
                        begin_token(ctok, CTokIdNumLitInt);
                        ctok->cur_tok->data.num_lit_int.x = *c - '0';
                        buf_resize(&ctok->buf, 0);
                        buf_append_char(&ctok->buf, *c);
                        break;
                    case '0':
                        ctok->state = CTokStateGotZero;
                        begin_token(ctok, CTokIdNumLitInt);
                        ctok->cur_tok->data.num_lit_int.x = 0;
                        buf_resize(&ctok->buf, 0);
                        buf_append_char(&ctok->buf, '0');
                        break;
                    case '.':
                        begin_token(ctok, CTokIdNumLitFloat);
                        ctok->state = CTokStateFloat;
                        buf_init_from_str(&ctok->buf, "0.");
                        break;
                    case '(':
                        begin_token(ctok, CTokIdLParen);
                        end_token(ctok);
                        break;
                    case ')':
                        begin_token(ctok, CTokIdRParen);
                        end_token(ctok);
                        break;
                    case '-':
                        begin_token(ctok, CTokIdMinus);
                        end_token(ctok);
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateFloat:
                switch (*c) {
                    case 'e':
                    case 'E':
                        buf_append_char(&ctok->buf, 'e');
                        ctok->state = CTokStateExpSign;
                        break;
                    case 'f':
                    case 'F':
                    case 'l':
                    case 'L':
                        end_float(ctok);
                        break;
                    case DIGIT:
                        buf_append_char(&ctok->buf, *c);
                        break;
                    default:
                        c -= 1;
                        end_float(ctok);
                        continue;
                }
                break;
            case CTokStateExpSign:
                switch (*c) {
                    case '+':
                    case '-':
                        ctok->state = CTokStateFloatExpFirst;
                        buf_append_char(&ctok->buf, *c);
                        break;
                    case DIGIT:
                        ctok->state = CTokStateFloatExp;
                        buf_append_char(&ctok->buf, *c);
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateFloatExpFirst:
                switch (*c) {
                    case DIGIT:
                        buf_append_char(&ctok->buf, *c);
                        ctok->state = CTokStateFloatExp;
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateFloatExp:
                switch (*c) {
                    case DIGIT:
                        buf_append_char(&ctok->buf, *c);
                        break;
                    case 'f':
                    case 'F':
                    case 'l':
                    case 'L':
                        end_float(ctok);
                        break;
                    default:
                        c -= 1;
                        end_float(ctok);
                        continue;
                }
                break;
            case CTokStateDecimal:
                switch (*c) {
                    case DIGIT:
                        buf_append_char(&ctok->buf, *c);

                        // TODO @mul_with_overflow
                        ctok->cur_tok->data.num_lit_int.x *= 10;
                        // TODO @add_with_overflow
                        ctok->cur_tok->data.num_lit_int.x += *c - '0';
                        break;
                    case '\'':
                        break;
                    case 'u':
                    case 'U':
                        ctok->state = CTokStateNumLitIntSuffixU;
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;
                        break;
                    case 'l':
                    case 'L':
                        ctok->state = CTokStateNumLitIntSuffixL;
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;
                        break;
                    case '.':
                        buf_append_char(&ctok->buf, '.');
                        ctok->cur_tok->id = CTokIdNumLitFloat;
                        ctok->state = CTokStateFloat;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateGotZero:
                switch (*c) {
                    case 'x':
                    case 'X':
                        ctok->state = CTokStateHex;
                        break;
                    case '.':
                        ctok->state = CTokStateFloat;
                        ctok->cur_tok->id = CTokIdNumLitFloat;
                        buf_append_char(&ctok->buf, '.');
                        break;
                    default:
                        c -= 1;
                        ctok->state = CTokStateOctal;
                        continue;
                }
                break;
            case CTokStateOctal:
                switch (*c) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                        // TODO @mul_with_overflow
                        ctok->cur_tok->data.num_lit_int.x *= 8;
                        // TODO @add_with_overflow
                        ctok->cur_tok->data.num_lit_int.x += *c - '0';
                        break;
                    case '8':
                    case '9':
                        return mark_error(ctok);
                    case '\'':
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateHex:
                switch (*c) {
                    case '0':
                        hex_digit(ctok, 0);
                        break;
                    case '1':
                        hex_digit(ctok, 1);
                        break;
                    case '2':
                        hex_digit(ctok, 2);
                        break;
                    case '3':
                        hex_digit(ctok, 3);
                        break;
                    case '4':
                        hex_digit(ctok, 4);
                        break;
                    case '5':
                        hex_digit(ctok, 5);
                        break;
                    case '6':
                        hex_digit(ctok, 6);
                        break;
                    case '7':
                        hex_digit(ctok, 7);
                        break;
                    case '8':
                        hex_digit(ctok, 8);
                        break;
                    case '9':
                        hex_digit(ctok, 9);
                        break;
                    case 'a':
                    case 'A':
                        hex_digit(ctok, 10);
                        break;
                    case 'b':
                    case 'B':
                        hex_digit(ctok, 11);
                        break;
                    case 'c':
                    case 'C':
                        hex_digit(ctok, 12);
                        break;
                    case 'd':
                    case 'D':
                        hex_digit(ctok, 13);
                        break;
                    case 'e':
                    case 'E':
                        hex_digit(ctok, 14);
                        break;
                    case 'f':
                    case 'F':
                        hex_digit(ctok, 15);
                        break;
                    case 'p':
                    case 'P':
                        ctok->cur_tok->id = CTokIdNumLitFloat;
                        ctok->state = CTokStateExpSign;
                        break;
                    case 'u':
                    case 'U':
                        // marks the number literal as unsigned
                        ctok->state = CTokStateNumLitIntSuffixU;
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;
                        break;
                    case 'l':
                    case 'L':
                        // marks the number literal as long
                        ctok->state = CTokStateNumLitIntSuffixL;
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateNumLitIntSuffixU:
                switch (*c) {
                    case 'l':
                    case 'L':
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;
                        ctok->state = CTokStateNumLitIntSuffixUL;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateNumLitIntSuffixL:
                switch (*c) {
                    case 'l':
                    case 'L':
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLL;
                        ctok->state = CTokStateNumLitIntSuffixLL;
                        break;
                    case 'u':
                    case 'U':
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateNumLitIntSuffixLL:
                switch (*c) {
                    case 'u':
                    case 'U':
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateNumLitIntSuffixUL:
                switch (*c) {
                    case 'l':
                    case 'L':
                        ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateIdentifier:
                switch (*c) {
                    case IDENT:
                        buf_append_char(&ctok->cur_tok->data.symbol, *c);
                        break;
                    default:
                        c -= 1;
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        continue;
                }
                break;
            case CTokStateString:
                switch (*c) {
                    case '\\':
                        ctok->state = CTokStateCharEscape;
                        break;
                    case '\"':
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        buf_append_char(&ctok->cur_tok->data.str_lit, *c);
                }
                break;
            case CTokStateExpectChar:
                switch (*c) {
                    case '\\':
                        ctok->state = CTokStateCharEscape;
                        break;
                    case '\'':
                        return mark_error(ctok);
                    default:
                        ctok->cur_tok->data.char_lit = *c;
                        ctok->state = CTokStateExpectEndQuot;
                }
                break;
            case CTokStateCharEscape:
                switch (*c) {
                    case '\'':
                    case '"':
                    case '?':
                    case '\\':
                        add_char(ctok, *c);
                        break;
                    case 'a':
                        add_char(ctok, '\a');
                        break;
                    case 'b':
                        add_char(ctok, '\b');
                        break;
                    case 'f':
                        add_char(ctok, '\f');
                        break;
                    case 'n':
                        add_char(ctok, '\n');
                        break;
                    case 'r':
                        add_char(ctok, '\r');
                        break;
                    case 't':
                        add_char(ctok, '\t');
                        break;
                    case 'v':
                        add_char(ctok, '\v');
                        break;
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                        ctok->state = CTokStateStrOctal;
                        ctok->cur_char = (uint8_t)(*c - '0');
                        ctok->octal_index = 1;
                        break;
                    case 'x':
                        ctok->state = CTokStateStrHex;
                        ctok->cur_char = 0;
                        break;
                    case 'u':
                        zig_panic("TODO unicode");
                        break;
                    case 'U':
                        zig_panic("TODO Unicode");
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateStrHex: {
                uint8_t value = 0;
                switch (*c) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                        value = *c - '0';
                        break;
                    case 'a':
                    case 'b':
                    case 'c':
                    case 'd':
                    case 'e':
                    case 'f':
                        value = (*c - 'a') + 10;
                        break;
                    case 'A':
                    case 'B':
                    case 'C':
                    case 'D':
                    case 'E':
                    case 'F':
                        value = (*c - 'A') + 10;
                        break;
                    default:
                        c -= 1;
                        add_char(ctok, ctok->cur_char);
                        continue;
                }
                // TODO @mul_with_overflow
                if (((long)ctok->cur_char) * 16 >= 256) {
                    zig_panic("TODO str hex mul overflow");
                }
                ctok->cur_char = (uint8_t)(ctok->cur_char * (uint8_t)16);
                // TODO @add_with_overflow
                if (((long)ctok->cur_char) + (long)(value) >= 256) {
                    zig_panic("TODO str hex add overflow");
                }
                ctok->cur_char = (uint8_t)(ctok->cur_char + value);
                break;
            }
            case CTokStateStrOctal:
                switch (*c) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                        // TODO @mul_with_overflow
                        if (((long)ctok->cur_char) * 8 >= 256) {
                            zig_panic("TODO");
                        }
                        ctok->cur_char = (uint8_t)(ctok->cur_char * (uint8_t)8);
                        // TODO @add_with_overflow
                        if (((long)ctok->cur_char) + (long)(*c - '0') >= 256) {
                            zig_panic("TODO");
                        }
                        ctok->cur_char = (uint8_t)(ctok->cur_char + (uint8_t)(*c - '0'));
                        ctok->octal_index += 1;
                        if (ctok->octal_index == 3) {
                            add_char(ctok, ctok->cur_char);
                        }
                        break;
                    default:
                        c -= 1;
                        add_char(ctok, ctok->cur_char);
                        continue;
                }
                break;
            case CTokStateExpectEndQuot:
                switch (*c) {
                    case '\'':
                        end_token(ctok);
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateOpenComment:
                switch (*c) {
                    case '/':
                        ctok->state = CTokStateLineComment;
                        break;
                    case '*':
                        ctok->state = CTokStateComment;
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
            case CTokStateLineComment:
                if (*c == '\n') {
                    ctok->state = CTokStateStart;
                    goto found_end_of_macro;
                }
                break;
            case CTokStateComment:
                switch (*c) {
                    case '*':
                        ctok->state = CTokStateCommentStar;
                        break;
                    default:
                        break;
                }
                break;
            case CTokStateCommentStar:
                switch (*c) {
                    case '/':
                        ctok->state = CTokStateStart;
                        break;
                    case '*':
                        break;
                    default:
                        ctok->state = CTokStateComment;
                        break;
                }
                break;
            case CTokStateBackslash:
                switch (*c) {
                    case '\n':
                        ctok->state = CTokStateStart;
                        break;
                    default:
                        return mark_error(ctok);
                }
                break;
        }
    }
found_end_of_macro:

    switch (ctok->state) {
        case CTokStateStart:
            break;
        case CTokStateIdentifier:
        case CTokStateDecimal:
        case CTokStateHex:
        case CTokStateOctal:
        case CTokStateGotZero:
        case CTokStateNumLitIntSuffixU:
        case CTokStateNumLitIntSuffixL:
        case CTokStateNumLitIntSuffixUL:
        case CTokStateNumLitIntSuffixLL:
            end_token(ctok);
            break;
        case CTokStateFloat:
        case CTokStateFloatExp:
            end_float(ctok);
            break;
        case CTokStateExpectChar:
        case CTokStateExpectEndQuot:
        case CTokStateOpenComment:
        case CTokStateLineComment:
        case CTokStateComment:
        case CTokStateCommentStar:
        case CTokStateCharEscape:
        case CTokStateBackslash:
        case CTokStateString:
        case CTokStateExpSign:
        case CTokStateFloatExpFirst:
        case CTokStateStrHex:
        case CTokStateStrOctal:
            return mark_error(ctok);
    }

    assert(ctok->cur_tok == nullptr);

    begin_token(ctok, CTokIdEOF);
    end_token(ctok);
}
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`/*`
			`* Copyright (c) 2016 Andrew Kelley`
			`*`
			`* This file is part of zig, which is MIT licensed.`
			`* See http://opensource.org/licenses/MIT`
			`*/`

			`#include "c_tokenizer.hpp"`
			`#include <inttypes.h>`

			`#define WHITESPACE_EXCEPT_N \`
			`' ': \`
			`case '\t': \`
			`case '\v': \`
			`case '\f'`

			`#define DIGIT_NON_ZERO \`
			`'1': \`
			`case '2': \`
			`case '3': \`
			`case '4': \`
			`case '5': \`
			`case '6': \`
			`case '7': \`
			`case '8': \`
			`case '9'`

			`#define DIGIT \`
			`'0': \`
			`case DIGIT_NON_ZERO`

			`#define ALPHA \`
			`'a': \`
			`case 'b': \`
			`case 'c': \`
			`case 'd': \`
			`case 'e': \`
			`case 'f': \`
			`case 'g': \`
			`case 'h': \`
			`case 'i': \`
			`case 'j': \`
			`case 'k': \`
			`case 'l': \`
			`case 'm': \`
			`case 'n': \`
			`case 'o': \`
			`case 'p': \`
			`case 'q': \`
			`case 'r': \`
			`case 's': \`
			`case 't': \`
			`case 'u': \`
			`case 'v': \`
			`case 'w': \`
			`case 'x': \`
			`case 'y': \`
			`case 'z': \`
			`case 'A': \`
			`case 'B': \`
			`case 'C': \`
			`case 'D': \`
			`case 'E': \`
			`case 'F': \`
			`case 'G': \`
			`case 'H': \`
			`case 'I': \`
			`case 'J': \`
			`case 'K': \`
			`case 'L': \`
			`case 'M': \`
			`case 'N': \`
			`case 'O': \`
			`case 'P': \`
			`case 'Q': \`
			`case 'R': \`
			`case 'S': \`
			`case 'T': \`
			`case 'U': \`
			`case 'V': \`
			`case 'W': \`
			`case 'X': \`
			`case 'Y': \`
			`case 'Z'`

			`#define IDENT_START \`
			`ALPHA: \`
			`case '_'`

			`#define IDENT \`
			`IDENT_START: \`
			`case DIGIT`

Add support for windows line endings with c macros 2017-11-04 05:25:00 +08:00			`#define LINE_ENDING \`
			`'\r': \`
			`case '\n'`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00
			`static void begin_token(CTokenize *ctok, CTokId id) {`
			`assert(ctok->cur_tok == nullptr);`
			`ctok->tokens.add_one();`
			`ctok->cur_tok = &ctok->tokens.last();`
			`ctok->cur_tok->id = id;`

			`switch (id) {`
			`case CTokIdStrLit:`
			`memset(&ctok->cur_tok->data.str_lit, 0, sizeof(Buf));`
			`buf_resize(&ctok->cur_tok->data.str_lit, 0);`
			`break;`
			`case CTokIdSymbol:`
			`memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf));`
			`buf_resize(&ctok->cur_tok->data.symbol, 0);`
			`break;`
			`case CTokIdNumLitInt:`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x = 0;`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixNone;`
			`break;`
			`case CTokIdCharLit:`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`case CTokIdNumLitFloat:`
			`case CTokIdMinus:`
support parens in C macros closes #454 2017-09-10 12:20:09 +08:00			`case CTokIdLParen:`
			`case CTokIdRParen:`
			`case CTokIdEOF:`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`}`
			`}`

			`static void end_token(CTokenize *ctok) {`
			`ctok->cur_tok = nullptr;`
			`}`

			`static void mark_error(CTokenize *ctok) {`
			`ctok->error = true;`
			`}`

			`static void add_char(CTokenize *ctok, uint8_t c) {`
			`assert(ctok->cur_tok);`
			`if (ctok->cur_tok->id == CTokIdCharLit) {`
			`ctok->cur_tok->data.char_lit = c;`
			`ctok->state = CTokStateExpectEndQuot;`
			`} else if (ctok->cur_tok->id == CTokIdStrLit) {`
			`buf_append_char(&ctok->cur_tok->data.str_lit, c);`
			`ctok->state = CTokStateString;`
			`} else {`
			`zig_unreachable();`
			`}`
			`}`

			`static void hex_digit(CTokenize *ctok, uint8_t value) {`
			`// TODO @mul_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x *= 16;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`// TODO @add_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x += value;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00
			`static const uint8_t hex_digit[] = "0123456789abcdef";`
			`buf_append_char(&ctok->buf, hex_digit[value]);`
			`}`

			`static void end_float(CTokenize *ctok) {`
			`// TODO detect errors, overflow, and underflow`
			`double value = strtod(buf_ptr(&ctok->buf), nullptr);`

			`ctok->cur_tok->data.num_lit_float = value;`

			`end_token(ctok);`
			`ctok->state = CTokStateStart;`

			`}`

			`void tokenize_c_macro(CTokenize ctok, const uint8_t c) {`
			`ctok->tokens.resize(0);`
			`ctok->state = CTokStateStart;`
			`ctok->error = false;`
			`ctok->cur_tok = nullptr;`

			`buf_resize(&ctok->buf, 0);`

			`for (; *c; c += 1) {`
			`switch (ctok->state) {`
			`case CTokStateStart:`
			`switch (*c) {`
			`case WHITESPACE_EXCEPT_N:`
			`break;`
			`case '\'':`
			`ctok->state = CTokStateExpectChar;`
			`begin_token(ctok, CTokIdCharLit);`
			`break;`
			`case '\"':`
			`ctok->state = CTokStateString;`
			`begin_token(ctok, CTokIdStrLit);`
			`break;`
			`case '/':`
			`ctok->state = CTokStateOpenComment;`
			`break;`
			`case '\\':`
			`ctok->state = CTokStateBackslash;`
			`break;`
Add support for windows line endings with c macros 2017-11-04 05:25:00 +08:00			`case LINE_ENDING:`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`goto found_end_of_macro;`
			`case IDENT_START:`
			`ctok->state = CTokStateIdentifier;`
			`begin_token(ctok, CTokIdSymbol);`
			`buf_append_char(&ctok->cur_tok->data.symbol, *c);`
			`break;`
			`case DIGIT_NON_ZERO:`
			`ctok->state = CTokStateDecimal;`
			`begin_token(ctok, CTokIdNumLitInt);`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x = *c - '0';`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`buf_resize(&ctok->buf, 0);`
			`buf_append_char(&ctok->buf, *c);`
			`break;`
			`case '0':`
			`ctok->state = CTokStateGotZero;`
			`begin_token(ctok, CTokIdNumLitInt);`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x = 0;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`buf_resize(&ctok->buf, 0);`
			`buf_append_char(&ctok->buf, '0');`
			`break;`
			`case '.':`
			`begin_token(ctok, CTokIdNumLitFloat);`
			`ctok->state = CTokStateFloat;`
			`buf_init_from_str(&ctok->buf, "0.");`
			`break;`
support parens in C macros closes #454 2017-09-10 12:20:09 +08:00			`case '(':`
			`begin_token(ctok, CTokIdLParen);`
			`end_token(ctok);`
			`break;`
			`case ')':`
			`begin_token(ctok, CTokIdRParen);`
			`end_token(ctok);`
			`break;`
			`case '-':`
			`begin_token(ctok, CTokIdMinus);`
			`end_token(ctok);`
			`break;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`case CTokStateFloat:`
			`switch (*c) {`
			`case 'e':`
			`case 'E':`
			`buf_append_char(&ctok->buf, 'e');`
			`ctok->state = CTokStateExpSign;`
			`break;`
			`case 'f':`
			`case 'F':`
			`case 'l':`
			`case 'L':`
			`end_float(ctok);`
			`break;`
			`case DIGIT:`
			`buf_append_char(&ctok->buf, *c);`
			`break;`
			`default:`
			`c -= 1;`
			`end_float(ctok);`
			`continue;`
			`}`
			`break;`
			`case CTokStateExpSign:`
			`switch (*c) {`
			`case '+':`
			`case '-':`
			`ctok->state = CTokStateFloatExpFirst;`
			`buf_append_char(&ctok->buf, *c);`
			`break;`
			`case DIGIT:`
			`ctok->state = CTokStateFloatExp;`
			`buf_append_char(&ctok->buf, *c);`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`case CTokStateFloatExpFirst:`
			`switch (*c) {`
			`case DIGIT:`
			`buf_append_char(&ctok->buf, *c);`
			`ctok->state = CTokStateFloatExp;`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`case CTokStateFloatExp:`
			`switch (*c) {`
			`case DIGIT:`
			`buf_append_char(&ctok->buf, *c);`
			`break;`
			`case 'f':`
			`case 'F':`
			`case 'l':`
			`case 'L':`
			`end_float(ctok);`
			`break;`
			`default:`
			`c -= 1;`
			`end_float(ctok);`
			`continue;`
			`}`
			`break;`
			`case CTokStateDecimal:`
			`switch (*c) {`
			`case DIGIT:`
			`buf_append_char(&ctok->buf, *c);`

			`// TODO @mul_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x *= 10;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`// TODO @add_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x += *c - '0';`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case '\'':`
			`break;`
			`case 'u':`
			`case 'U':`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->state = CTokStateNumLitIntSuffixU;`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case 'l':`
			`case 'L':`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->state = CTokStateNumLitIntSuffixL;`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case '.':`
			`buf_append_char(&ctok->buf, '.');`
			`ctok->cur_tok->id = CTokIdNumLitFloat;`
			`ctok->state = CTokStateFloat;`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateGotZero:`
			`switch (*c) {`
			`case 'x':`
			`case 'X':`
			`ctok->state = CTokStateHex;`
			`break;`
			`case '.':`
			`ctok->state = CTokStateFloat;`
			`ctok->cur_tok->id = CTokIdNumLitFloat;`
			`buf_append_char(&ctok->buf, '.');`
			`break;`
			`default:`
			`c -= 1;`
			`ctok->state = CTokStateOctal;`
			`continue;`
			`}`
			`break;`
			`case CTokStateOctal:`
			`switch (*c) {`
			`case '0':`
			`case '1':`
			`case '2':`
			`case '3':`
			`case '4':`
			`case '5':`
			`case '6':`
			`case '7':`
			`// TODO @mul_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x *= 8;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`// TODO @add_with_overflow`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`ctok->cur_tok->data.num_lit_int.x += *c - '0';`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case '8':`
			`case '9':`
			`return mark_error(ctok);`
			`case '\'':`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateHex:`
			`switch (*c) {`
			`case '0':`
			`hex_digit(ctok, 0);`
			`break;`
			`case '1':`
			`hex_digit(ctok, 1);`
			`break;`
			`case '2':`
			`hex_digit(ctok, 2);`
			`break;`
			`case '3':`
			`hex_digit(ctok, 3);`
			`break;`
			`case '4':`
			`hex_digit(ctok, 4);`
			`break;`
			`case '5':`
			`hex_digit(ctok, 5);`
			`break;`
			`case '6':`
			`hex_digit(ctok, 6);`
			`break;`
			`case '7':`
			`hex_digit(ctok, 7);`
			`break;`
			`case '8':`
			`hex_digit(ctok, 8);`
			`break;`
			`case '9':`
			`hex_digit(ctok, 9);`
			`break;`
			`case 'a':`
			`case 'A':`
			`hex_digit(ctok, 10);`
			`break;`
			`case 'b':`
			`case 'B':`
			`hex_digit(ctok, 11);`
			`break;`
			`case 'c':`
			`case 'C':`
			`hex_digit(ctok, 12);`
			`break;`
			`case 'd':`
			`case 'D':`
			`hex_digit(ctok, 13);`
			`break;`
			`case 'e':`
			`case 'E':`
			`hex_digit(ctok, 14);`
			`break;`
			`case 'f':`
			`case 'F':`
			`hex_digit(ctok, 15);`
			`break;`
			`case 'p':`
			`case 'P':`
			`ctok->cur_tok->id = CTokIdNumLitFloat;`
			`ctok->state = CTokStateExpSign;`
			`break;`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`case 'u':`
			`case 'U':`
			`// marks the number literal as unsigned`
			`ctok->state = CTokStateNumLitIntSuffixU;`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU;`
			`break;`
			`case 'l':`
			`case 'L':`
			`// marks the number literal as long`
			`ctok->state = CTokStateNumLitIntSuffixL;`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL;`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateNumLitIntSuffixU:`
			`switch (*c) {`
			`case 'l':`
			`case 'L':`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;`
			`ctok->state = CTokStateNumLitIntSuffixUL;`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateNumLitIntSuffixL:`
			`switch (*c) {`
			`case 'l':`
			`case 'L':`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLL;`
			`ctok->state = CTokStateNumLitIntSuffixLL;`
			`break;`
			`case 'u':`
			`case 'U':`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateNumLitIntSuffixLL:`
			`switch (*c) {`
			`case 'u':`
			`case 'U':`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateNumLitIntSuffixUL:`
			`switch (*c) {`
			`case 'l':`
			`case 'L':`
			`ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`break;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateIdentifier:`
			`switch (*c) {`
			`case IDENT:`
			`buf_append_char(&ctok->cur_tok->data.symbol, *c);`
			`break;`
			`default:`
			`c -= 1;`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`continue;`
			`}`
			`break;`
			`case CTokStateString:`
			`switch (*c) {`
			`case '\\':`
			`ctok->state = CTokStateCharEscape;`
			`break;`
			`case '\"':`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`break;`
			`default:`
			`buf_append_char(&ctok->cur_tok->data.str_lit, *c);`
			`}`
			`break;`
			`case CTokStateExpectChar:`
			`switch (*c) {`
			`case '\\':`
			`ctok->state = CTokStateCharEscape;`
			`break;`
			`case '\'':`
			`return mark_error(ctok);`
			`default:`
			`ctok->cur_tok->data.char_lit = *c;`
			`ctok->state = CTokStateExpectEndQuot;`
			`}`
			`break;`
			`case CTokStateCharEscape:`
			`switch (*c) {`
			`case '\'':`
			`case '"':`
			`case '?':`
			`case '\\':`
			`add_char(ctok, *c);`
			`break;`
			`case 'a':`
			`add_char(ctok, '\a');`
			`break;`
			`case 'b':`
			`add_char(ctok, '\b');`
			`break;`
			`case 'f':`
			`add_char(ctok, '\f');`
			`break;`
			`case 'n':`
			`add_char(ctok, '\n');`
			`break;`
			`case 'r':`
			`add_char(ctok, '\r');`
			`break;`
			`case 't':`
			`add_char(ctok, '\t');`
			`break;`
			`case 'v':`
			`add_char(ctok, '\v');`
			`break;`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`case '0':`
			`case '1':`
			`case '2':`
			`case '3':`
			`case '4':`
			`case '5':`
			`case '6':`
			`case '7':`
			`ctok->state = CTokStateStrOctal;`
fix some -Wconversion errors 2017-04-11 08:02:39 +08:00			`ctok->cur_char = (uint8_t)(*c - '0');`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`ctok->octal_index = 1;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case 'x':`
c macros: support hex chars (#459) * c macros: remove add_char redundancies * c macros: support hex chars * c macros: add test for hex chars 2017-09-11 04:35:56 +08:00			`ctok->state = CTokStateStrHex;`
			`ctok->cur_char = 0;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`break;`
			`case 'u':`
			`zig_panic("TODO unicode");`
			`break;`
			`case 'U':`
			`zig_panic("TODO Unicode");`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
c macros: support hex chars (#459) * c macros: remove add_char redundancies * c macros: support hex chars * c macros: add test for hex chars 2017-09-11 04:35:56 +08:00			`case CTokStateStrHex: {`
			`uint8_t value = 0;`
			`switch (*c) {`
			`case '0':`
			`case '1':`
			`case '2':`
			`case '3':`
			`case '4':`
			`case '5':`
			`case '6':`
			`case '7':`
			`case '8':`
			`case '9':`
			`value = *c - '0';`
			`break;`
			`case 'a':`
			`case 'b':`
			`case 'c':`
			`case 'd':`
			`case 'e':`
			`case 'f':`
			`value = (*c - 'a') + 10;`
			`break;`
			`case 'A':`
			`case 'B':`
			`case 'C':`
			`case 'D':`
			`case 'E':`
			`case 'F':`
			`value = (*c - 'A') + 10;`
			`break;`
			`default:`
			`c -= 1;`
			`add_char(ctok, ctok->cur_char);`
			`continue;`
			`}`
			`// TODO @mul_with_overflow`
			`if (((long)ctok->cur_char) * 16 >= 256) {`
			`zig_panic("TODO str hex mul overflow");`
			`}`
			`ctok->cur_char = (uint8_t)(ctok->cur_char * (uint8_t)16);`
			`// TODO @add_with_overflow`
			`if (((long)ctok->cur_char) + (long)(value) >= 256) {`
			`zig_panic("TODO str hex add overflow");`
			`}`
			`ctok->cur_char = (uint8_t)(ctok->cur_char + value);`
			`break;`
			`}`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`case CTokStateStrOctal:`
			`switch (*c) {`
			`case '0':`
			`case '1':`
			`case '2':`
			`case '3':`
			`case '4':`
			`case '5':`
			`case '6':`
			`case '7':`
			`// TODO @mul_with_overflow`
			`if (((long)ctok->cur_char) * 8 >= 256) {`
			`zig_panic("TODO");`
			`}`
fix some -Wconversion errors 2017-04-11 08:02:39 +08:00			`ctok->cur_char = (uint8_t)(ctok->cur_char * (uint8_t)8);`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`// TODO @add_with_overflow`
			`if (((long)ctok->cur_char) + (long)(*c - '0') >= 256) {`
			`zig_panic("TODO");`
			`}`
fix some -Wconversion errors 2017-04-11 08:02:39 +08:00			`ctok->cur_char = (uint8_t)(ctok->cur_char + (uint8_t)(*c - '0'));`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`ctok->octal_index += 1;`
			`if (ctok->octal_index == 3) {`
c macros: support hex chars (#459) * c macros: remove add_char redundancies * c macros: support hex chars * c macros: add test for hex chars 2017-09-11 04:35:56 +08:00			`add_char(ctok, ctok->cur_char);`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`}`
			`break;`
			`default:`
			`c -= 1;`
c macros: support hex chars (#459) * c macros: remove add_char redundancies * c macros: support hex chars * c macros: add test for hex chars 2017-09-11 04:35:56 +08:00			`add_char(ctok, ctok->cur_char);`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`continue;`
			`}`
			`break;`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`case CTokStateExpectEndQuot:`
			`switch (*c) {`
			`case '\'':`
			`end_token(ctok);`
			`ctok->state = CTokStateStart;`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`case CTokStateOpenComment:`
			`switch (*c) {`
			`case '/':`
			`ctok->state = CTokStateLineComment;`
			`break;`
			`case '*':`
			`ctok->state = CTokStateComment;`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`case CTokStateLineComment:`
			`if (*c == '\n') {`
			`ctok->state = CTokStateStart;`
			`goto found_end_of_macro;`
			`}`
			`break;`
			`case CTokStateComment:`
			`switch (*c) {`
			`case '*':`
			`ctok->state = CTokStateCommentStar;`
			`break;`
			`default:`
			`break;`
			`}`
			`break;`
			`case CTokStateCommentStar:`
			`switch (*c) {`
			`case '/':`
			`ctok->state = CTokStateStart;`
			`break;`
			`case '*':`
			`break;`
			`default:`
			`ctok->state = CTokStateComment;`
			`break;`
			`}`
			`break;`
			`case CTokStateBackslash:`
			`switch (*c) {`
			`case '\n':`
			`ctok->state = CTokStateStart;`
			`break;`
			`default:`
			`return mark_error(ctok);`
			`}`
			`break;`
			`}`
			`}`
			`found_end_of_macro:`

			`switch (ctok->state) {`
			`case CTokStateStart:`
			`break;`
			`case CTokStateIdentifier:`
			`case CTokStateDecimal:`
			`case CTokStateHex:`
			`case CTokStateOctal:`
			`case CTokStateGotZero:`
parseh: fix not recognizing integer suffixes on hex numbers 2017-06-17 02:34:38 +08:00			`case CTokStateNumLitIntSuffixU:`
			`case CTokStateNumLitIntSuffixL:`
			`case CTokStateNumLitIntSuffixUL:`
			`case CTokStateNumLitIntSuffixLL:`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`end_token(ctok);`
			`break;`
			`case CTokStateFloat:`
			`case CTokStateFloatExp:`
			`end_float(ctok);`
			`break;`
			`case CTokStateExpectChar:`
			`case CTokStateExpectEndQuot:`
			`case CTokStateOpenComment:`
			`case CTokStateLineComment:`
			`case CTokStateComment:`
			`case CTokStateCommentStar:`
			`case CTokStateCharEscape:`
			`case CTokStateBackslash:`
			`case CTokStateString:`
			`case CTokStateExpSign:`
			`case CTokStateFloatExpFirst:`
c macros: support hex chars (#459) * c macros: remove add_char redundancies * c macros: support hex chars * c macros: add test for hex chars 2017-09-11 04:35:56 +08:00			`case CTokStateStrHex:`
parseh: support octal in C macro string literal 2016-07-10 03:17:31 +08:00			`case CTokStateStrOctal:`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`return mark_error(ctok);`
			`}`

			`assert(ctok->cur_tok == nullptr);`
support parens in C macros closes #454 2017-09-10 12:20:09 +08:00
			`begin_token(ctok, CTokIdEOF);`
			`end_token(ctok);`
better parsing of C macros See #88 2016-04-22 06:48:13 +08:00			`}`