zig/src/tokenizer.hpp

171 lines
3.5 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2015 Andrew Kelley
*
* This file is part of zig, which is MIT licensed.
* See http://opensource.org/licenses/MIT
*/
2015-11-04 13:31:27 +08:00
#ifndef ZIG_TOKENIZER_HPP
#define ZIG_TOKENIZER_HPP
#include "buffer.hpp"
#include "bignum.hpp"
2015-11-04 13:31:27 +08:00
enum TokenId {
TokenIdEof,
TokenIdSymbol,
TokenIdKeywordFn,
TokenIdKeywordReturn,
TokenIdKeywordVar,
2015-11-04 13:31:27 +08:00
TokenIdKeywordConst,
2015-11-24 17:43:45 +08:00
TokenIdKeywordExtern,
TokenIdKeywordPub,
TokenIdKeywordUse,
TokenIdKeywordExport,
2015-12-04 03:15:07 +08:00
TokenIdKeywordTrue,
TokenIdKeywordFalse,
2015-12-02 12:19:38 +08:00
TokenIdKeywordIf,
TokenIdKeywordElse,
2015-12-03 15:47:35 +08:00
TokenIdKeywordGoto,
2015-12-11 06:34:38 +08:00
TokenIdKeywordAsm,
TokenIdKeywordVolatile,
TokenIdKeywordStruct,
2016-01-11 02:48:54 +08:00
TokenIdKeywordEnum,
2016-04-25 02:24:04 +08:00
TokenIdKeywordUnion,
2015-12-25 05:37:43 +08:00
TokenIdKeywordWhile,
TokenIdKeywordFor,
2015-12-25 05:37:43 +08:00
TokenIdKeywordContinue,
TokenIdKeywordBreak,
TokenIdKeywordNull,
2016-01-09 14:48:24 +08:00
TokenIdKeywordNoAlias,
2016-01-20 11:29:09 +08:00
TokenIdKeywordSwitch,
2016-01-23 07:05:29 +08:00
TokenIdKeywordUndefined,
2016-08-09 11:43:38 +08:00
TokenIdKeywordZeroes,
TokenIdKeywordError,
TokenIdKeywordType,
TokenIdKeywordInline,
TokenIdKeywordDefer,
2015-11-04 13:31:27 +08:00
TokenIdLParen,
TokenIdRParen,
TokenIdComma,
TokenIdStar,
2016-04-29 09:03:44 +08:00
TokenIdStarStar,
2015-11-04 13:31:27 +08:00
TokenIdLBrace,
TokenIdRBrace,
2015-12-07 23:29:19 +08:00
TokenIdLBracket,
TokenIdRBracket,
2015-11-04 13:31:27 +08:00
TokenIdStringLiteral,
2016-01-02 18:38:45 +08:00
TokenIdCharLiteral,
2015-11-04 13:31:27 +08:00
TokenIdSemicolon,
TokenIdNumberLiteral,
TokenIdPlus,
TokenIdPlusPlus,
2015-11-04 13:31:27 +08:00
TokenIdColon,
TokenIdArrow,
2016-01-14 09:15:51 +08:00
TokenIdFatArrow,
2015-11-04 13:31:27 +08:00
TokenIdDash,
TokenIdNumberSign,
2015-11-28 15:40:54 +08:00
TokenIdBoolOr,
TokenIdBoolAnd,
TokenIdBinOr,
TokenIdAmpersand,
2015-11-28 15:40:54 +08:00
TokenIdBinXor,
TokenIdEq,
2015-12-13 09:17:27 +08:00
TokenIdTimesEq,
TokenIdTimesPercent,
TokenIdTimesPercentEq,
2015-12-13 09:17:27 +08:00
TokenIdDivEq,
TokenIdModEq,
TokenIdPlusEq,
TokenIdPlusPercent,
TokenIdPlusPercentEq,
2015-12-13 09:17:27 +08:00
TokenIdMinusEq,
TokenIdMinusPercent,
TokenIdMinusPercentEq,
2015-12-13 09:17:27 +08:00
TokenIdBitShiftLeftEq,
TokenIdBitShiftLeftPercent,
TokenIdBitShiftLeftPercentEq,
2015-12-13 09:17:27 +08:00
TokenIdBitShiftRightEq,
TokenIdBitAndEq,
TokenIdBitXorEq,
TokenIdBitOrEq,
TokenIdBoolAndEq,
TokenIdBoolOrEq,
2015-11-28 15:40:54 +08:00
TokenIdCmpEq,
2015-11-30 04:37:55 +08:00
TokenIdBang,
TokenIdTilde,
2015-11-28 15:40:54 +08:00
TokenIdCmpNotEq,
TokenIdCmpLessThan,
TokenIdCmpGreaterThan,
TokenIdCmpLessOrEq,
TokenIdCmpGreaterOrEq,
TokenIdBitShiftLeft,
TokenIdBitShiftRight,
TokenIdSlash,
TokenIdPercent,
2016-01-26 04:53:40 +08:00
TokenIdPercentPercent,
TokenIdDot,
2015-12-09 16:07:27 +08:00
TokenIdEllipsis,
2015-12-27 06:05:27 +08:00
TokenIdMaybe,
TokenIdDoubleQuestion,
2015-12-27 06:05:27 +08:00
TokenIdMaybeAssign,
TokenIdAtSign,
TokenIdPercentDot,
2015-11-04 13:31:27 +08:00
};
struct TokenNumLit {
BigNum bignum;
// overflow is true if when parsing the number, we discovered it would not
// fit without losing data in a uint64_t or double
bool overflow;
};
struct TokenStrLit {
Buf str;
bool is_c_str;
};
struct TokenCharLit {
uint8_t c;
};
2015-11-04 13:31:27 +08:00
struct Token {
TokenId id;
size_t start_pos;
size_t end_pos;
size_t start_line;
size_t start_column;
union {
// TokenIdNumberLiteral
TokenNumLit num_lit;
// TokenIdStringLiteral or TokenIdSymbol
TokenStrLit str_lit;
// TokenIdCharLiteral
TokenCharLit char_lit;
} data;
2015-11-04 13:31:27 +08:00
};
struct Tokenization {
ZigList<Token> *tokens;
ZigList<size_t> *line_offsets;
// if an error occurred
Buf *err;
size_t err_line;
size_t err_column;
};
void tokenize(Buf *buf, Tokenization *out_tokenization);
2015-11-04 13:31:27 +08:00
void print_tokens(Buf *buf, ZigList<Token> *tokens);
const char * token_name(TokenId id);
2016-01-29 02:03:44 +08:00
bool valid_symbol_starter(uint8_t c);
bool is_zig_keyword(Buf *buf);
2016-01-29 02:03:44 +08:00
2015-11-04 13:31:27 +08:00
#endif