2015-11-05 08:15:46 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Andrew Kelley
|
|
|
|
*
|
|
|
|
* This file is part of zig, which is MIT licensed.
|
|
|
|
* See http://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
#ifndef ZIG_TOKENIZER_HPP
|
|
|
|
#define ZIG_TOKENIZER_HPP
|
|
|
|
|
|
|
|
#include "buffer.hpp"
|
|
|
|
|
|
|
|
enum TokenId {
|
|
|
|
TokenIdEof,
|
|
|
|
TokenIdSymbol,
|
|
|
|
TokenIdKeywordFn,
|
|
|
|
TokenIdKeywordReturn,
|
2015-12-15 09:51:07 +08:00
|
|
|
TokenIdKeywordVar,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdKeywordConst,
|
2015-11-24 17:43:45 +08:00
|
|
|
TokenIdKeywordExtern,
|
2015-11-28 06:46:06 +08:00
|
|
|
TokenIdKeywordPub,
|
2016-02-28 13:06:46 +08:00
|
|
|
TokenIdKeywordUse,
|
2015-11-28 06:46:06 +08:00
|
|
|
TokenIdKeywordExport,
|
2015-12-04 03:15:07 +08:00
|
|
|
TokenIdKeywordTrue,
|
|
|
|
TokenIdKeywordFalse,
|
2015-12-02 12:19:38 +08:00
|
|
|
TokenIdKeywordIf,
|
|
|
|
TokenIdKeywordElse,
|
2015-12-03 15:47:35 +08:00
|
|
|
TokenIdKeywordGoto,
|
2015-12-11 06:34:38 +08:00
|
|
|
TokenIdKeywordAsm,
|
|
|
|
TokenIdKeywordVolatile,
|
2015-12-12 15:10:37 +08:00
|
|
|
TokenIdKeywordStruct,
|
2016-01-11 02:48:54 +08:00
|
|
|
TokenIdKeywordEnum,
|
2015-12-25 05:37:43 +08:00
|
|
|
TokenIdKeywordWhile,
|
2016-01-18 22:00:45 +08:00
|
|
|
TokenIdKeywordFor,
|
2015-12-25 05:37:43 +08:00
|
|
|
TokenIdKeywordContinue,
|
|
|
|
TokenIdKeywordBreak,
|
2016-01-07 18:23:38 +08:00
|
|
|
TokenIdKeywordNull,
|
2016-01-09 14:48:24 +08:00
|
|
|
TokenIdKeywordNoAlias,
|
2016-01-20 11:29:09 +08:00
|
|
|
TokenIdKeywordSwitch,
|
2016-01-23 07:05:29 +08:00
|
|
|
TokenIdKeywordUndefined,
|
2016-01-24 16:34:48 +08:00
|
|
|
TokenIdKeywordError,
|
2016-01-31 16:20:47 +08:00
|
|
|
TokenIdKeywordType,
|
2016-02-02 08:25:38 +08:00
|
|
|
TokenIdKeywordInline,
|
2016-02-06 14:20:34 +08:00
|
|
|
TokenIdKeywordDefer,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdLParen,
|
|
|
|
TokenIdRParen,
|
|
|
|
TokenIdComma,
|
|
|
|
TokenIdStar,
|
|
|
|
TokenIdLBrace,
|
|
|
|
TokenIdRBrace,
|
2015-12-07 23:29:19 +08:00
|
|
|
TokenIdLBracket,
|
|
|
|
TokenIdRBracket,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdStringLiteral,
|
2016-01-02 18:38:45 +08:00
|
|
|
TokenIdCharLiteral,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdSemicolon,
|
|
|
|
TokenIdNumberLiteral,
|
|
|
|
TokenIdPlus,
|
2016-01-26 12:56:29 +08:00
|
|
|
TokenIdPlusPlus,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdColon,
|
|
|
|
TokenIdArrow,
|
2016-01-14 09:15:51 +08:00
|
|
|
TokenIdFatArrow,
|
2015-11-04 13:31:27 +08:00
|
|
|
TokenIdDash,
|
2015-11-25 14:44:41 +08:00
|
|
|
TokenIdNumberSign,
|
2015-11-28 15:40:54 +08:00
|
|
|
TokenIdBoolOr,
|
|
|
|
TokenIdBoolAnd,
|
|
|
|
TokenIdBinOr,
|
2015-12-15 09:10:25 +08:00
|
|
|
TokenIdAmpersand,
|
2015-11-28 15:40:54 +08:00
|
|
|
TokenIdBinXor,
|
|
|
|
TokenIdEq,
|
2015-12-13 09:17:27 +08:00
|
|
|
TokenIdTimesEq,
|
|
|
|
TokenIdDivEq,
|
|
|
|
TokenIdModEq,
|
|
|
|
TokenIdPlusEq,
|
|
|
|
TokenIdMinusEq,
|
|
|
|
TokenIdBitShiftLeftEq,
|
|
|
|
TokenIdBitShiftRightEq,
|
|
|
|
TokenIdBitAndEq,
|
|
|
|
TokenIdBitXorEq,
|
|
|
|
TokenIdBitOrEq,
|
|
|
|
TokenIdBoolAndEq,
|
|
|
|
TokenIdBoolOrEq,
|
2015-11-28 15:40:54 +08:00
|
|
|
TokenIdCmpEq,
|
2015-11-30 04:37:55 +08:00
|
|
|
TokenIdBang,
|
|
|
|
TokenIdTilde,
|
2015-11-28 15:40:54 +08:00
|
|
|
TokenIdCmpNotEq,
|
|
|
|
TokenIdCmpLessThan,
|
|
|
|
TokenIdCmpGreaterThan,
|
|
|
|
TokenIdCmpLessOrEq,
|
|
|
|
TokenIdCmpGreaterOrEq,
|
|
|
|
TokenIdBitShiftLeft,
|
|
|
|
TokenIdBitShiftRight,
|
|
|
|
TokenIdSlash,
|
|
|
|
TokenIdPercent,
|
2016-01-26 04:53:40 +08:00
|
|
|
TokenIdPercentPercent,
|
2015-12-09 16:03:04 +08:00
|
|
|
TokenIdDot,
|
2015-12-09 16:07:27 +08:00
|
|
|
TokenIdEllipsis,
|
2015-12-27 06:05:27 +08:00
|
|
|
TokenIdMaybe,
|
2016-01-07 18:23:38 +08:00
|
|
|
TokenIdDoubleQuestion,
|
2015-12-27 06:05:27 +08:00
|
|
|
TokenIdMaybeAssign,
|
2016-01-09 14:41:40 +08:00
|
|
|
TokenIdAtSign,
|
2016-01-21 09:18:50 +08:00
|
|
|
TokenIdPercentDot,
|
2015-11-04 13:31:27 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Token {
|
|
|
|
TokenId id;
|
|
|
|
int start_pos;
|
|
|
|
int end_pos;
|
|
|
|
int start_line;
|
|
|
|
int start_column;
|
2015-12-15 14:13:53 +08:00
|
|
|
|
|
|
|
// for id == TokenIdNumberLiteral
|
|
|
|
int radix; // if != 10, then skip the first 2 characters
|
|
|
|
int decimal_point_pos; // either exponent_marker_pos or the position of the '.'
|
|
|
|
int exponent_marker_pos; // either end_pos or the position of the 'e'/'p'
|
2016-04-04 09:44:17 +08:00
|
|
|
|
|
|
|
// for id == TokenIdStringLiteral
|
|
|
|
int raw_string_start;
|
|
|
|
int raw_string_end;
|
2015-11-04 13:31:27 +08:00
|
|
|
};
|
|
|
|
|
2015-12-01 15:50:11 +08:00
|
|
|
struct Tokenization {
|
|
|
|
ZigList<Token> *tokens;
|
|
|
|
ZigList<int> *line_offsets;
|
|
|
|
|
|
|
|
// if an error occurred
|
|
|
|
Buf *err;
|
|
|
|
int err_line;
|
|
|
|
int err_column;
|
|
|
|
};
|
|
|
|
|
|
|
|
void tokenize(Buf *buf, Tokenization *out_tokenization);
|
2015-11-04 13:31:27 +08:00
|
|
|
|
|
|
|
void print_tokens(Buf *buf, ZigList<Token> *tokens);
|
|
|
|
|
2015-12-15 19:05:43 +08:00
|
|
|
int get_digit_value(uint8_t c);
|
2015-12-11 06:34:38 +08:00
|
|
|
|
2016-01-14 13:18:10 +08:00
|
|
|
const char * token_name(TokenId id);
|
|
|
|
|
2016-01-29 02:03:44 +08:00
|
|
|
bool valid_symbol_starter(uint8_t c);
|
2016-01-29 17:17:51 +08:00
|
|
|
bool is_zig_keyword(Buf *buf);
|
2016-01-29 02:03:44 +08:00
|
|
|
|
2015-11-04 13:31:27 +08:00
|
|
|
#endif
|