2015-11-05 08:15:46 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Andrew Kelley
|
|
|
|
*
|
|
|
|
* This file is part of zig, which is MIT licensed.
|
|
|
|
* See http://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
#include "tokenizer.hpp"
|
|
|
|
#include "util.hpp"
|
|
|
|
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#define WHITESPACE \
|
2015-12-15 14:13:53 +08:00
|
|
|
' ': \
|
2015-11-25 11:37:53 +08:00
|
|
|
case '\n'
|
2015-11-04 13:31:27 +08:00
|
|
|
|
2015-12-15 14:13:53 +08:00
|
|
|
#define DIGIT_NON_ZERO \
|
|
|
|
'1': \
|
2015-11-04 13:31:27 +08:00
|
|
|
case '2': \
|
|
|
|
case '3': \
|
|
|
|
case '4': \
|
|
|
|
case '5': \
|
|
|
|
case '6': \
|
|
|
|
case '7': \
|
|
|
|
case '8': \
|
|
|
|
case '9'
|
2015-12-15 14:13:53 +08:00
|
|
|
#define DIGIT \
|
|
|
|
'0': \
|
|
|
|
case DIGIT_NON_ZERO
|
2015-11-04 13:31:27 +08:00
|
|
|
|
2015-12-12 15:10:37 +08:00
|
|
|
#define ALPHA_EXCEPT_C \
|
2015-12-15 14:13:53 +08:00
|
|
|
'a': \
|
2015-11-04 13:31:27 +08:00
|
|
|
case 'b': \
|
2015-12-15 14:13:53 +08:00
|
|
|
/*case 'c':*/ \
|
2015-11-04 13:31:27 +08:00
|
|
|
case 'd': \
|
|
|
|
case 'e': \
|
|
|
|
case 'f': \
|
|
|
|
case 'g': \
|
|
|
|
case 'h': \
|
|
|
|
case 'i': \
|
|
|
|
case 'j': \
|
|
|
|
case 'k': \
|
|
|
|
case 'l': \
|
|
|
|
case 'm': \
|
|
|
|
case 'n': \
|
|
|
|
case 'o': \
|
|
|
|
case 'p': \
|
|
|
|
case 'q': \
|
|
|
|
case 'r': \
|
|
|
|
case 's': \
|
|
|
|
case 't': \
|
|
|
|
case 'u': \
|
|
|
|
case 'v': \
|
|
|
|
case 'w': \
|
|
|
|
case 'x': \
|
|
|
|
case 'y': \
|
|
|
|
case 'z': \
|
|
|
|
case 'A': \
|
|
|
|
case 'B': \
|
|
|
|
case 'C': \
|
|
|
|
case 'D': \
|
|
|
|
case 'E': \
|
|
|
|
case 'F': \
|
|
|
|
case 'G': \
|
|
|
|
case 'H': \
|
|
|
|
case 'I': \
|
|
|
|
case 'J': \
|
|
|
|
case 'K': \
|
|
|
|
case 'L': \
|
|
|
|
case 'M': \
|
|
|
|
case 'N': \
|
|
|
|
case 'O': \
|
|
|
|
case 'P': \
|
|
|
|
case 'Q': \
|
|
|
|
case 'R': \
|
|
|
|
case 'S': \
|
|
|
|
case 'T': \
|
|
|
|
case 'U': \
|
|
|
|
case 'V': \
|
|
|
|
case 'W': \
|
|
|
|
case 'X': \
|
|
|
|
case 'Y': \
|
|
|
|
case 'Z'
|
|
|
|
|
2015-12-12 15:10:37 +08:00
|
|
|
#define ALPHA \
|
|
|
|
ALPHA_EXCEPT_C: \
|
|
|
|
case 'c'
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
#define SYMBOL_CHAR \
|
|
|
|
ALPHA: \
|
|
|
|
case DIGIT: \
|
|
|
|
case '_'
|
|
|
|
|
2015-11-27 15:40:26 +08:00
|
|
|
enum TokenizeState {
|
|
|
|
TokenizeStateStart,
|
|
|
|
TokenizeStateSymbol,
|
2015-12-12 15:10:37 +08:00
|
|
|
TokenizeStateSymbolFirst,
|
2015-12-15 14:13:53 +08:00
|
|
|
TokenizeStateZero, // "0", which might lead to "0x"
|
|
|
|
TokenizeStateNumber, // "123", "0x123"
|
|
|
|
TokenizeStateFloatFraction, // "123.456", "0x123.456"
|
|
|
|
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
|
|
|
|
TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
|
2015-11-27 15:40:26 +08:00
|
|
|
TokenizeStateString,
|
2016-01-02 18:38:45 +08:00
|
|
|
TokenizeStateCharLiteral,
|
2015-12-13 09:17:27 +08:00
|
|
|
TokenizeStateSawStar,
|
2015-11-27 15:40:26 +08:00
|
|
|
TokenizeStateSawSlash,
|
2015-12-13 09:17:27 +08:00
|
|
|
TokenizeStateSawPercent,
|
|
|
|
TokenizeStateSawPlus,
|
|
|
|
TokenizeStateSawDash,
|
|
|
|
TokenizeStateSawAmpersand,
|
|
|
|
TokenizeStateSawAmpersandAmpersand,
|
|
|
|
TokenizeStateSawCaret,
|
|
|
|
TokenizeStateSawPipe,
|
|
|
|
TokenizeStateSawPipePipe,
|
2015-11-27 15:40:26 +08:00
|
|
|
TokenizeStateLineComment,
|
|
|
|
TokenizeStateMultiLineComment,
|
|
|
|
TokenizeStateMultiLineCommentSlash,
|
|
|
|
TokenizeStateMultiLineCommentStar,
|
2015-12-13 09:17:27 +08:00
|
|
|
TokenizeStateSawEq,
|
|
|
|
TokenizeStateSawBang,
|
|
|
|
TokenizeStateSawLessThan,
|
|
|
|
TokenizeStateSawLessThanLessThan,
|
|
|
|
TokenizeStateSawGreaterThan,
|
|
|
|
TokenizeStateSawGreaterThanGreaterThan,
|
|
|
|
TokenizeStateSawDot,
|
|
|
|
TokenizeStateSawDotDot,
|
2015-12-27 06:05:27 +08:00
|
|
|
TokenizeStateSawQuestionMark,
|
2015-12-01 15:50:11 +08:00
|
|
|
TokenizeStateError,
|
2015-11-27 15:40:26 +08:00
|
|
|
};
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
|
|
|
|
struct Tokenize {
|
|
|
|
Buf *buf;
|
|
|
|
int pos;
|
|
|
|
TokenizeState state;
|
|
|
|
ZigList<Token> *tokens;
|
|
|
|
int line;
|
|
|
|
int column;
|
|
|
|
Token *cur_tok;
|
2015-11-27 15:40:26 +08:00
|
|
|
int multi_line_comment_count;
|
2015-12-01 15:50:11 +08:00
|
|
|
Tokenization *out;
|
2015-11-04 13:31:27 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
__attribute__ ((format (printf, 2, 3)))
|
|
|
|
static void tokenize_error(Tokenize *t, const char *format, ...) {
|
2015-12-01 15:50:11 +08:00
|
|
|
t->state = TokenizeStateError;
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
if (t->cur_tok) {
|
2015-12-01 15:50:11 +08:00
|
|
|
t->out->err_line = t->cur_tok->start_line;
|
|
|
|
t->out->err_column = t->cur_tok->start_column;
|
2015-11-04 13:31:27 +08:00
|
|
|
} else {
|
2015-12-01 15:50:11 +08:00
|
|
|
t->out->err_line = t->line;
|
|
|
|
t->out->err_column = t->column;
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, format);
|
2015-12-01 15:50:11 +08:00
|
|
|
t->out->err = buf_vprintf(format, ap);
|
2015-11-04 13:31:27 +08:00
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void begin_token(Tokenize *t, TokenId id) {
|
|
|
|
assert(!t->cur_tok);
|
|
|
|
t->tokens->add_one();
|
|
|
|
Token *token = &t->tokens->last();
|
|
|
|
token->start_line = t->line;
|
|
|
|
token->start_column = t->column;
|
|
|
|
token->id = id;
|
|
|
|
token->start_pos = t->pos;
|
2015-12-15 19:05:43 +08:00
|
|
|
token->radix = 0;
|
|
|
|
token->decimal_point_pos = 0;
|
|
|
|
token->exponent_marker_pos = 0;
|
2015-11-04 13:31:27 +08:00
|
|
|
t->cur_tok = token;
|
|
|
|
}
|
|
|
|
|
2015-11-28 15:40:54 +08:00
|
|
|
static void cancel_token(Tokenize *t) {
|
|
|
|
t->tokens->pop();
|
|
|
|
t->cur_tok = nullptr;
|
|
|
|
}
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
static void end_token(Tokenize *t) {
|
|
|
|
assert(t->cur_tok);
|
|
|
|
t->cur_tok->end_pos = t->pos + 1;
|
|
|
|
|
2015-12-15 14:13:53 +08:00
|
|
|
// normalize number literal parsing stuff
|
|
|
|
if (t->cur_tok->id == TokenIdNumberLiteral) {
|
|
|
|
if (t->cur_tok->exponent_marker_pos == 0) {
|
|
|
|
t->cur_tok->exponent_marker_pos = t->cur_tok->end_pos;
|
|
|
|
}
|
|
|
|
if (t->cur_tok->decimal_point_pos == 0) {
|
|
|
|
t->cur_tok->decimal_point_pos = t->cur_tok->exponent_marker_pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
|
|
|
|
int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
|
|
|
|
|
|
|
|
if (mem_eql_str(token_mem, token_len, "fn")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordFn;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "return")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordReturn;
|
2015-12-15 09:51:07 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "var")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordVar;
|
2015-11-04 13:31:27 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "const")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordConst;
|
2015-11-24 17:43:45 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "extern")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordExtern;
|
2015-11-25 04:37:14 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "unreachable")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordUnreachable;
|
2015-11-28 06:46:06 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "pub")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordPub;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "export")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordExport;
|
2015-11-28 15:40:54 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "as")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordAs;
|
2015-12-01 10:58:53 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "use")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordUse;
|
2015-12-02 06:54:46 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "void")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordVoid;
|
2015-12-04 03:15:07 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "true")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordTrue;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "false")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordFalse;
|
2015-12-02 12:19:38 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "if")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordIf;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "else")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordElse;
|
2015-12-03 15:47:35 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "goto")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordGoto;
|
2015-12-11 06:34:38 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "volatile")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordVolatile;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "asm")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordAsm;
|
2015-12-12 15:10:37 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "struct")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordStruct;
|
2015-12-25 05:37:43 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "while")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordWhile;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "continue")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordContinue;
|
|
|
|
} else if (mem_eql_str(token_mem, token_len, "break")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordBreak;
|
2016-01-07 18:23:38 +08:00
|
|
|
} else if (mem_eql_str(token_mem, token_len, "null")) {
|
|
|
|
t->cur_tok->id = TokenIdKeywordNull;
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
t->cur_tok = nullptr;
|
|
|
|
}
|
|
|
|
|
2015-12-15 14:13:53 +08:00
|
|
|
static bool is_exponent_signifier(uint8_t c, int radix) {
|
|
|
|
if (radix == 16) {
|
|
|
|
return c == 'p' || c == 'P';
|
|
|
|
} else {
|
|
|
|
return c == 'e' || c == 'E';
|
|
|
|
}
|
|
|
|
}
|
2015-12-15 19:05:43 +08:00
|
|
|
|
|
|
|
int get_digit_value(uint8_t c) {
|
2015-12-15 14:13:53 +08:00
|
|
|
if ('0' <= c && c <= '9') {
|
|
|
|
return c - '0';
|
|
|
|
}
|
|
|
|
if ('A' <= c && c <= 'Z') {
|
|
|
|
return c - 'A' + 10;
|
|
|
|
}
|
|
|
|
if ('a' <= c && c <= 'z') {
|
|
|
|
return c - 'a' + 10;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-12-01 15:50:11 +08:00
|
|
|
void tokenize(Buf *buf, Tokenization *out) {
|
2015-11-04 13:31:27 +08:00
|
|
|
Tokenize t = {0};
|
2015-12-01 15:50:11 +08:00
|
|
|
t.out = out;
|
|
|
|
t.tokens = out->tokens = allocate<ZigList<Token>>(1);
|
2015-11-04 13:31:27 +08:00
|
|
|
t.buf = buf;
|
2015-12-01 15:50:11 +08:00
|
|
|
|
|
|
|
out->line_offsets = allocate<ZigList<int>>(1);
|
|
|
|
|
|
|
|
out->line_offsets->append(0);
|
2015-11-04 13:31:27 +08:00
|
|
|
for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
|
|
|
|
uint8_t c = buf_ptr(t.buf)[t.pos];
|
|
|
|
switch (t.state) {
|
2015-12-01 15:50:11 +08:00
|
|
|
case TokenizeStateError:
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenizeStateStart:
|
|
|
|
switch (c) {
|
|
|
|
case WHITESPACE:
|
|
|
|
break;
|
2015-12-12 15:10:37 +08:00
|
|
|
case 'c':
|
|
|
|
t.state = TokenizeStateSymbolFirst;
|
|
|
|
begin_token(&t, TokenIdSymbol);
|
|
|
|
break;
|
|
|
|
case ALPHA_EXCEPT_C:
|
2015-11-25 04:00:38 +08:00
|
|
|
case '_':
|
2015-11-04 13:31:27 +08:00
|
|
|
t.state = TokenizeStateSymbol;
|
|
|
|
begin_token(&t, TokenIdSymbol);
|
|
|
|
break;
|
2015-12-15 14:13:53 +08:00
|
|
|
case '0':
|
|
|
|
t.state = TokenizeStateZero;
|
|
|
|
begin_token(&t, TokenIdNumberLiteral);
|
|
|
|
t.cur_tok->radix = 10;
|
|
|
|
break;
|
|
|
|
case DIGIT_NON_ZERO:
|
2015-11-04 13:31:27 +08:00
|
|
|
t.state = TokenizeStateNumber;
|
|
|
|
begin_token(&t, TokenIdNumberLiteral);
|
2015-12-15 14:13:53 +08:00
|
|
|
t.cur_tok->radix = 10;
|
2015-11-04 13:31:27 +08:00
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
begin_token(&t, TokenIdStringLiteral);
|
|
|
|
t.state = TokenizeStateString;
|
|
|
|
break;
|
2016-01-02 18:38:45 +08:00
|
|
|
case '\'':
|
|
|
|
begin_token(&t, TokenIdCharLiteral);
|
|
|
|
t.state = TokenizeStateCharLiteral;
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case '(':
|
|
|
|
begin_token(&t, TokenIdLParen);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
begin_token(&t, TokenIdRParen);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case ',':
|
|
|
|
begin_token(&t, TokenIdComma);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case '{':
|
|
|
|
begin_token(&t, TokenIdLBrace);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case '}':
|
|
|
|
begin_token(&t, TokenIdRBrace);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
2015-12-07 23:29:19 +08:00
|
|
|
case '[':
|
|
|
|
begin_token(&t, TokenIdLBracket);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case ']':
|
|
|
|
begin_token(&t, TokenIdRBracket);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case ';':
|
|
|
|
begin_token(&t, TokenIdSemicolon);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case ':':
|
|
|
|
begin_token(&t, TokenIdColon);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case '#':
|
|
|
|
begin_token(&t, TokenIdNumberSign);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
case '*':
|
|
|
|
begin_token(&t, TokenIdStar);
|
|
|
|
t.state = TokenizeStateSawStar;
|
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
begin_token(&t, TokenIdSlash);
|
|
|
|
t.state = TokenizeStateSawSlash;
|
|
|
|
break;
|
|
|
|
case '%':
|
|
|
|
begin_token(&t, TokenIdPercent);
|
|
|
|
t.state = TokenizeStateSawPercent;
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case '+':
|
|
|
|
begin_token(&t, TokenIdPlus);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawPlus;
|
2015-11-04 13:31:27 +08:00
|
|
|
break;
|
2015-11-30 04:37:55 +08:00
|
|
|
case '~':
|
|
|
|
begin_token(&t, TokenIdTilde);
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case '-':
|
|
|
|
begin_token(&t, TokenIdDash);
|
|
|
|
t.state = TokenizeStateSawDash;
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case '&':
|
2015-12-15 09:10:25 +08:00
|
|
|
begin_token(&t, TokenIdAmpersand);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawAmpersand;
|
2015-11-25 14:44:41 +08:00
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
case '^':
|
|
|
|
begin_token(&t, TokenIdBinXor);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawCaret;
|
2015-11-27 15:40:26 +08:00
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
case '|':
|
|
|
|
begin_token(&t, TokenIdBinOr);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawPipe;
|
2015-11-28 15:40:54 +08:00
|
|
|
break;
|
|
|
|
case '=':
|
|
|
|
begin_token(&t, TokenIdEq);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawEq;
|
2015-11-28 15:40:54 +08:00
|
|
|
break;
|
|
|
|
case '!':
|
2015-11-30 04:37:55 +08:00
|
|
|
begin_token(&t, TokenIdBang);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawBang;
|
2015-11-28 15:40:54 +08:00
|
|
|
break;
|
|
|
|
case '<':
|
|
|
|
begin_token(&t, TokenIdCmpLessThan);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawLessThan;
|
2015-11-28 15:40:54 +08:00
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
begin_token(&t, TokenIdCmpGreaterThan);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawGreaterThan;
|
2015-11-28 15:40:54 +08:00
|
|
|
break;
|
2015-12-09 16:03:04 +08:00
|
|
|
case '.':
|
|
|
|
begin_token(&t, TokenIdDot);
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawDot;
|
2015-12-09 16:03:04 +08:00
|
|
|
break;
|
2015-12-27 06:05:27 +08:00
|
|
|
case '?':
|
|
|
|
begin_token(&t, TokenIdMaybe);
|
|
|
|
t.state = TokenizeStateSawQuestionMark;
|
|
|
|
break;
|
2015-11-27 15:40:26 +08:00
|
|
|
default:
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
}
|
|
|
|
break;
|
2015-12-27 06:05:27 +08:00
|
|
|
case TokenizeStateSawQuestionMark:
|
|
|
|
switch (c) {
|
2016-01-07 18:23:38 +08:00
|
|
|
case '?':
|
|
|
|
t.cur_tok->id = TokenIdDoubleQuestion;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
2015-12-27 06:05:27 +08:00
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdMaybeAssign;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawDot:
|
2015-12-09 16:03:04 +08:00
|
|
|
switch (c) {
|
|
|
|
case '.':
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawDotDot;
|
2015-12-09 16:07:27 +08:00
|
|
|
t.cur_tok->id = TokenIdEllipsis;
|
2015-12-09 16:03:04 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawDotDot:
|
2015-12-09 16:03:04 +08:00
|
|
|
switch (c) {
|
|
|
|
case '.':
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
end_token(&t);
|
|
|
|
break;
|
|
|
|
default:
|
2015-12-13 09:17:27 +08:00
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
2015-12-09 16:03:04 +08:00
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawGreaterThan:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdCmpGreaterOrEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
t.cur_tok->id = TokenIdBitShiftRight;
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawGreaterThanGreaterThan;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawGreaterThanGreaterThan:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBitShiftRightEq;
|
2015-11-28 15:40:54 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
2015-12-13 10:47:37 +08:00
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawLessThan:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdCmpLessOrEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
2015-12-13 10:47:37 +08:00
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
case '<':
|
|
|
|
t.cur_tok->id = TokenIdBitShiftLeft;
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawLessThanLessThan;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawLessThanLessThan:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBitShiftLeftEq;
|
2015-11-28 15:40:54 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
2015-12-13 10:47:37 +08:00
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawBang:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdCmpNotEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawEq:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdCmpEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawStar:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdTimesEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawPercent:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdModEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawPlus:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdPlusEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawAmpersand:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '&':
|
|
|
|
t.cur_tok->id = TokenIdBoolAnd;
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawAmpersandAmpersand;
|
|
|
|
break;
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBitAndEq;
|
2015-11-28 15:40:54 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawAmpersandAmpersand:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBoolAndEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawCaret:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBitXorEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawPipe:
|
2015-11-28 15:40:54 +08:00
|
|
|
switch (c) {
|
|
|
|
case '|':
|
|
|
|
t.cur_tok->id = TokenIdBoolOr;
|
2015-12-13 09:17:27 +08:00
|
|
|
t.state = TokenizeStateSawPipePipe;
|
|
|
|
break;
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBitOrEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawPipePipe:
|
|
|
|
switch (c) {
|
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdBoolOrEq;
|
2015-11-28 15:40:54 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-11-27 15:40:26 +08:00
|
|
|
case TokenizeStateSawSlash:
|
|
|
|
switch (c) {
|
|
|
|
case '/':
|
2015-11-28 15:40:54 +08:00
|
|
|
cancel_token(&t);
|
2015-11-27 15:40:26 +08:00
|
|
|
t.state = TokenizeStateLineComment;
|
|
|
|
break;
|
|
|
|
case '*':
|
2015-11-28 15:40:54 +08:00
|
|
|
cancel_token(&t);
|
2015-11-27 15:40:26 +08:00
|
|
|
t.state = TokenizeStateMultiLineComment;
|
|
|
|
t.multi_line_comment_count = 1;
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdDivEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
default:
|
2015-12-13 08:03:44 +08:00
|
|
|
t.pos -= 1;
|
2015-11-28 15:40:54 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
2015-12-13 08:03:44 +08:00
|
|
|
continue;
|
2015-11-27 15:40:26 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateLineComment:
|
|
|
|
switch (c) {
|
|
|
|
case '\n':
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// do nothing
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateMultiLineComment:
|
|
|
|
switch (c) {
|
|
|
|
case '*':
|
|
|
|
t.state = TokenizeStateMultiLineCommentStar;
|
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
t.state = TokenizeStateMultiLineCommentSlash;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// do nothing
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateMultiLineCommentSlash:
|
|
|
|
switch (c) {
|
|
|
|
case '*':
|
|
|
|
t.state = TokenizeStateMultiLineComment;
|
|
|
|
t.multi_line_comment_count += 1;
|
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.state = TokenizeStateMultiLineComment;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateMultiLineCommentStar:
|
|
|
|
switch (c) {
|
|
|
|
case '/':
|
|
|
|
t.multi_line_comment_count -= 1;
|
|
|
|
if (t.multi_line_comment_count == 0) {
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
} else {
|
|
|
|
t.state = TokenizeStateMultiLineComment;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '*':
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.state = TokenizeStateMultiLineComment;
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
break;
|
2015-12-12 15:10:37 +08:00
|
|
|
case TokenizeStateSymbolFirst:
|
|
|
|
switch (c) {
|
|
|
|
case '"':
|
|
|
|
t.cur_tok->id = TokenIdStringLiteral;
|
|
|
|
t.state = TokenizeStateString;
|
|
|
|
break;
|
|
|
|
case SYMBOL_CHAR:
|
|
|
|
t.state = TokenizeStateSymbol;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenizeStateSymbol:
|
|
|
|
switch (c) {
|
|
|
|
case SYMBOL_CHAR:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateString:
|
|
|
|
switch (c) {
|
|
|
|
case '"':
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-01-02 18:38:45 +08:00
|
|
|
case TokenizeStateCharLiteral:
|
|
|
|
switch (c) {
|
|
|
|
case '\'':
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2015-12-15 14:13:53 +08:00
|
|
|
case TokenizeStateZero:
|
|
|
|
switch (c) {
|
|
|
|
case 'b':
|
|
|
|
t.cur_tok->radix = 2;
|
|
|
|
break;
|
|
|
|
case 'o':
|
|
|
|
t.cur_tok->radix = 8;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
t.cur_tok->radix = 16;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// reinterpret as normal number
|
|
|
|
t.pos -= 1;
|
|
|
|
t.state = TokenizeStateNumber;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenizeStateNumber:
|
2015-12-15 14:13:53 +08:00
|
|
|
{
|
|
|
|
if (c == '.') {
|
|
|
|
t.cur_tok->decimal_point_pos = t.pos;
|
|
|
|
t.state = TokenizeStateFloatFraction;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (is_exponent_signifier(c, t.cur_tok->radix)) {
|
|
|
|
t.cur_tok->exponent_marker_pos = t.pos;
|
|
|
|
t.state = TokenizeStateFloatExponentUnsigned;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c == '_') {
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
int digit_value = get_digit_value(c);
|
|
|
|
if (digit_value >= 0) {
|
|
|
|
if (digit_value >= t.cur_tok->radix) {
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
}
|
|
|
|
// normal digit
|
|
|
|
} else {
|
|
|
|
// not my char
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TokenizeStateFloatFraction:
|
|
|
|
{
|
|
|
|
if (is_exponent_signifier(c, t.cur_tok->radix)) {
|
|
|
|
t.cur_tok->exponent_marker_pos = t.pos;
|
|
|
|
t.state = TokenizeStateFloatExponentUnsigned;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c == '_') {
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
int digit_value = get_digit_value(c);
|
|
|
|
if (digit_value >= 0) {
|
|
|
|
if (digit_value >= t.cur_tok->radix) {
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
}
|
|
|
|
// normal digit
|
|
|
|
} else {
|
|
|
|
// not my char
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TokenizeStateFloatExponentUnsigned:
|
|
|
|
switch (c) {
|
|
|
|
case '+':
|
|
|
|
case '-':
|
|
|
|
t.state = TokenizeStateFloatExponentNumber;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// reinterpret as normal exponent number
|
|
|
|
t.pos -= 1;
|
|
|
|
t.state = TokenizeStateFloatExponentNumber;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateFloatExponentNumber:
|
2015-11-04 13:31:27 +08:00
|
|
|
switch (c) {
|
|
|
|
case DIGIT:
|
|
|
|
break;
|
2015-12-15 14:13:53 +08:00
|
|
|
case ALPHA:
|
|
|
|
case '_':
|
|
|
|
tokenize_error(&t, "invalid character: '%c'", c);
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
default:
|
|
|
|
t.pos -= 1;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenizeStateSawDash:
|
|
|
|
switch (c) {
|
|
|
|
case '>':
|
|
|
|
t.cur_tok->id = TokenIdArrow;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case '=':
|
|
|
|
t.cur_tok->id = TokenIdMinusEq;
|
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
default:
|
2015-12-13 08:03:44 +08:00
|
|
|
t.pos -= 1;
|
2015-11-04 13:31:27 +08:00
|
|
|
end_token(&t);
|
|
|
|
t.state = TokenizeStateStart;
|
2015-12-13 08:03:44 +08:00
|
|
|
continue;
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c == '\n') {
|
2015-12-01 15:50:11 +08:00
|
|
|
out->line_offsets->append(t.pos + 1);
|
2015-11-04 13:31:27 +08:00
|
|
|
t.line += 1;
|
|
|
|
t.column = 0;
|
|
|
|
} else {
|
|
|
|
t.column += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// EOF
|
|
|
|
switch (t.state) {
|
|
|
|
case TokenizeStateStart:
|
2015-12-01 15:50:11 +08:00
|
|
|
case TokenizeStateError:
|
2015-11-04 13:31:27 +08:00
|
|
|
break;
|
|
|
|
case TokenizeStateString:
|
|
|
|
tokenize_error(&t, "unterminated string");
|
|
|
|
break;
|
2016-01-02 18:38:45 +08:00
|
|
|
case TokenizeStateCharLiteral:
|
|
|
|
tokenize_error(&t, "unterminated character literal");
|
|
|
|
break;
|
2015-11-28 15:40:54 +08:00
|
|
|
case TokenizeStateSymbol:
|
2015-12-12 15:10:37 +08:00
|
|
|
case TokenizeStateSymbolFirst:
|
2015-12-15 14:13:53 +08:00
|
|
|
case TokenizeStateZero:
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenizeStateNumber:
|
2015-12-15 14:13:53 +08:00
|
|
|
case TokenizeStateFloatFraction:
|
|
|
|
case TokenizeStateFloatExponentUnsigned:
|
|
|
|
case TokenizeStateFloatExponentNumber:
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawStar:
|
|
|
|
case TokenizeStateSawSlash:
|
|
|
|
case TokenizeStateSawPercent:
|
|
|
|
case TokenizeStateSawPlus:
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenizeStateSawDash:
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawAmpersand:
|
|
|
|
case TokenizeStateSawAmpersandAmpersand:
|
|
|
|
case TokenizeStateSawCaret:
|
|
|
|
case TokenizeStateSawPipe:
|
|
|
|
case TokenizeStateSawPipePipe:
|
|
|
|
case TokenizeStateSawEq:
|
|
|
|
case TokenizeStateSawBang:
|
|
|
|
case TokenizeStateSawLessThan:
|
|
|
|
case TokenizeStateSawLessThanLessThan:
|
|
|
|
case TokenizeStateSawGreaterThan:
|
|
|
|
case TokenizeStateSawGreaterThanGreaterThan:
|
|
|
|
case TokenizeStateSawDot:
|
2015-12-27 06:05:27 +08:00
|
|
|
case TokenizeStateSawQuestionMark:
|
2015-11-04 13:31:27 +08:00
|
|
|
end_token(&t);
|
|
|
|
break;
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenizeStateSawDotDot:
|
2015-11-27 15:40:26 +08:00
|
|
|
tokenize_error(&t, "unexpected EOF");
|
|
|
|
break;
|
|
|
|
case TokenizeStateLineComment:
|
|
|
|
break;
|
|
|
|
case TokenizeStateMultiLineComment:
|
|
|
|
case TokenizeStateMultiLineCommentSlash:
|
|
|
|
case TokenizeStateMultiLineCommentStar:
|
|
|
|
tokenize_error(&t, "unterminated multi-line comment");
|
|
|
|
break;
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
2015-12-01 15:50:11 +08:00
|
|
|
if (t.state != TokenizeStateError) {
|
|
|
|
t.pos = -1;
|
|
|
|
begin_token(&t, TokenIdEof);
|
|
|
|
end_token(&t);
|
|
|
|
assert(!t.cur_tok);
|
|
|
|
}
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char * token_name(Token *token) {
|
|
|
|
switch (token->id) {
|
|
|
|
case TokenIdEof: return "EOF";
|
|
|
|
case TokenIdSymbol: return "Symbol";
|
|
|
|
case TokenIdKeywordFn: return "Fn";
|
|
|
|
case TokenIdKeywordConst: return "Const";
|
2015-12-15 09:51:07 +08:00
|
|
|
case TokenIdKeywordVar: return "Var";
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenIdKeywordReturn: return "Return";
|
2015-11-24 17:43:45 +08:00
|
|
|
case TokenIdKeywordExtern: return "Extern";
|
2015-11-25 04:37:14 +08:00
|
|
|
case TokenIdKeywordUnreachable: return "Unreachable";
|
2015-11-28 06:46:06 +08:00
|
|
|
case TokenIdKeywordPub: return "Pub";
|
|
|
|
case TokenIdKeywordExport: return "Export";
|
2015-11-28 15:40:54 +08:00
|
|
|
case TokenIdKeywordAs: return "As";
|
2015-12-01 10:58:53 +08:00
|
|
|
case TokenIdKeywordUse: return "Use";
|
2015-12-02 06:54:46 +08:00
|
|
|
case TokenIdKeywordVoid: return "Void";
|
2015-12-04 03:15:07 +08:00
|
|
|
case TokenIdKeywordTrue: return "True";
|
|
|
|
case TokenIdKeywordFalse: return "False";
|
2015-12-02 12:19:38 +08:00
|
|
|
case TokenIdKeywordIf: return "If";
|
|
|
|
case TokenIdKeywordElse: return "Else";
|
2015-12-03 15:47:35 +08:00
|
|
|
case TokenIdKeywordGoto: return "Goto";
|
2015-12-11 06:34:38 +08:00
|
|
|
case TokenIdKeywordVolatile: return "Volatile";
|
|
|
|
case TokenIdKeywordAsm: return "Asm";
|
2015-12-12 15:10:37 +08:00
|
|
|
case TokenIdKeywordStruct: return "Struct";
|
2015-12-25 05:37:43 +08:00
|
|
|
case TokenIdKeywordWhile: return "While";
|
|
|
|
case TokenIdKeywordContinue: return "Continue";
|
|
|
|
case TokenIdKeywordBreak: return "Break";
|
2016-01-07 18:23:38 +08:00
|
|
|
case TokenIdKeywordNull: return "Null";
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenIdLParen: return "LParen";
|
|
|
|
case TokenIdRParen: return "RParen";
|
|
|
|
case TokenIdComma: return "Comma";
|
|
|
|
case TokenIdStar: return "Star";
|
|
|
|
case TokenIdLBrace: return "LBrace";
|
|
|
|
case TokenIdRBrace: return "RBrace";
|
2015-12-07 23:29:19 +08:00
|
|
|
case TokenIdLBracket: return "LBracket";
|
|
|
|
case TokenIdRBracket: return "RBracket";
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenIdStringLiteral: return "StringLiteral";
|
2016-01-02 18:38:45 +08:00
|
|
|
case TokenIdCharLiteral: return "CharLiteral";
|
2015-11-04 13:31:27 +08:00
|
|
|
case TokenIdSemicolon: return "Semicolon";
|
|
|
|
case TokenIdNumberLiteral: return "NumberLiteral";
|
|
|
|
case TokenIdPlus: return "Plus";
|
|
|
|
case TokenIdColon: return "Colon";
|
|
|
|
case TokenIdArrow: return "Arrow";
|
|
|
|
case TokenIdDash: return "Dash";
|
2015-11-25 14:44:41 +08:00
|
|
|
case TokenIdNumberSign: return "NumberSign";
|
2015-11-28 15:40:54 +08:00
|
|
|
case TokenIdBinOr: return "BinOr";
|
2015-12-15 09:10:25 +08:00
|
|
|
case TokenIdAmpersand: return "Ampersand";
|
2015-11-28 15:40:54 +08:00
|
|
|
case TokenIdBinXor: return "BinXor";
|
|
|
|
case TokenIdBoolOr: return "BoolOr";
|
|
|
|
case TokenIdBoolAnd: return "BoolAnd";
|
|
|
|
case TokenIdEq: return "Eq";
|
2015-12-13 09:17:27 +08:00
|
|
|
case TokenIdTimesEq: return "TimesEq";
|
|
|
|
case TokenIdDivEq: return "DivEq";
|
|
|
|
case TokenIdModEq: return "ModEq";
|
|
|
|
case TokenIdPlusEq: return "PlusEq";
|
|
|
|
case TokenIdMinusEq: return "MinusEq";
|
|
|
|
case TokenIdBitShiftLeftEq: return "BitShiftLeftEq";
|
|
|
|
case TokenIdBitShiftRightEq: return "BitShiftRightEq";
|
|
|
|
case TokenIdBitAndEq: return "BitAndEq";
|
|
|
|
case TokenIdBitXorEq: return "BitXorEq";
|
|
|
|
case TokenIdBitOrEq: return "BitOrEq";
|
|
|
|
case TokenIdBoolAndEq: return "BoolAndEq";
|
|
|
|
case TokenIdBoolOrEq: return "BoolOrEq";
|
2015-11-30 04:37:55 +08:00
|
|
|
case TokenIdBang: return "Bang";
|
|
|
|
case TokenIdTilde: return "Tilde";
|
2015-11-28 15:40:54 +08:00
|
|
|
case TokenIdCmpEq: return "CmpEq";
|
|
|
|
case TokenIdCmpNotEq: return "CmpNotEq";
|
|
|
|
case TokenIdCmpLessThan: return "CmpLessThan";
|
|
|
|
case TokenIdCmpGreaterThan: return "CmpGreaterThan";
|
|
|
|
case TokenIdCmpLessOrEq: return "CmpLessOrEq";
|
|
|
|
case TokenIdCmpGreaterOrEq: return "CmpGreaterOrEq";
|
|
|
|
case TokenIdBitShiftLeft: return "BitShiftLeft";
|
|
|
|
case TokenIdBitShiftRight: return "BitShiftRight";
|
|
|
|
case TokenIdSlash: return "Slash";
|
|
|
|
case TokenIdPercent: return "Percent";
|
2015-12-09 16:03:04 +08:00
|
|
|
case TokenIdDot: return "Dot";
|
2015-12-09 16:07:27 +08:00
|
|
|
case TokenIdEllipsis: return "Ellipsis";
|
2015-12-27 06:05:27 +08:00
|
|
|
case TokenIdMaybe: return "Maybe";
|
2016-01-07 18:23:38 +08:00
|
|
|
case TokenIdDoubleQuestion: return "DoubleQuestion";
|
2015-12-27 06:05:27 +08:00
|
|
|
case TokenIdMaybeAssign: return "MaybeAssign";
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
return "(invalid token)";
|
|
|
|
}
|
|
|
|
|
|
|
|
void print_tokens(Buf *buf, ZigList<Token> *tokens) {
|
|
|
|
for (int i = 0; i < tokens->length; i += 1) {
|
|
|
|
Token *token = &tokens->at(i);
|
2015-11-27 15:40:26 +08:00
|
|
|
fprintf(stderr, "%s ", token_name(token));
|
|
|
|
if (token->start_pos >= 0) {
|
|
|
|
fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stderr);
|
|
|
|
}
|
|
|
|
fprintf(stderr, "\n");
|
2015-11-04 13:31:27 +08:00
|
|
|
}
|
|
|
|
}
|
2015-12-11 06:34:38 +08:00
|
|
|
|
|
|
|
bool is_printable(uint8_t c) {
|
|
|
|
switch (c) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case DIGIT:
|
|
|
|
case ALPHA:
|
|
|
|
case '!':
|
|
|
|
case '#':
|
|
|
|
case '$':
|
|
|
|
case '%':
|
|
|
|
case '&':
|
|
|
|
case '\'':
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
case '*':
|
|
|
|
case '+':
|
|
|
|
case ',':
|
|
|
|
case '-':
|
|
|
|
case '.':
|
|
|
|
case '/':
|
|
|
|
case ':':
|
|
|
|
case ';':
|
|
|
|
case '<':
|
|
|
|
case '=':
|
|
|
|
case '>':
|
|
|
|
case '?':
|
|
|
|
case '@':
|
|
|
|
case '^':
|
|
|
|
case '_':
|
|
|
|
case '`':
|
|
|
|
case '~':
|
|
|
|
case ' ':
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|