451 lines
14 KiB
C++
451 lines
14 KiB
C++
#include "parser.hpp"
|
|
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
|
|
void ast_error(Token *token, const char *format, ...) {
|
|
int line = token->start_line + 1;
|
|
int column = token->start_column + 1;
|
|
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
fprintf(stderr, "Error: Line %d, column %d: ", line, column);
|
|
vfprintf(stderr, format, ap);
|
|
fprintf(stderr, "\n");
|
|
va_end(ap);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
const char *node_type_str(NodeType node_type) {
|
|
switch (node_type) {
|
|
case NodeTypeRoot:
|
|
return "Root";
|
|
case NodeTypeFnDecl:
|
|
return "FnDecl";
|
|
case NodeTypeParamDecl:
|
|
return "ParamDecl";
|
|
case NodeTypeType:
|
|
return "Type";
|
|
case NodeTypeBlock:
|
|
return "Block";
|
|
case NodeTypeStatement:
|
|
return "Statement";
|
|
case NodeTypeExpression:
|
|
return "Expression";
|
|
case NodeTypeFnCall:
|
|
return "FnCall";
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
void ast_print(AstNode *node, int indent) {
|
|
for (int i = 0; i < indent; i += 1) {
|
|
fprintf(stderr, " ");
|
|
}
|
|
|
|
switch (node->type) {
|
|
case NodeTypeRoot:
|
|
fprintf(stderr, "%s\n", node_type_str(node->type));
|
|
for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
|
|
AstNode *child = node->data.root.fn_decls.at(i);
|
|
ast_print(child, indent + 2);
|
|
}
|
|
break;
|
|
case NodeTypeFnDecl:
|
|
{
|
|
Buf *name_buf = &node->data.fn_decl.name;
|
|
fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
|
|
|
|
for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
|
|
AstNode *child = node->data.fn_decl.params.at(i);
|
|
ast_print(child, indent + 2);
|
|
}
|
|
|
|
ast_print(node->data.fn_decl.return_type, indent + 2);
|
|
|
|
ast_print(node->data.fn_decl.body, indent + 2);
|
|
|
|
break;
|
|
}
|
|
case NodeTypeBlock:
|
|
{
|
|
fprintf(stderr, "%s\n", node_type_str(node->type));
|
|
for (int i = 0; i < node->data.block.statements.length; i += 1) {
|
|
AstNode *child = node->data.block.statements.at(i);
|
|
ast_print(child, indent + 2);
|
|
}
|
|
break;
|
|
}
|
|
case NodeTypeParamDecl:
|
|
{
|
|
Buf *name_buf = &node->data.param_decl.name;
|
|
fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
|
|
|
|
ast_print(node->data.param_decl.type, indent + 2);
|
|
|
|
break;
|
|
}
|
|
case NodeTypeType:
|
|
switch (node->data.type.type) {
|
|
case AstNodeTypeTypePrimitive:
|
|
{
|
|
Buf *name_buf = &node->data.type.primitive_name;
|
|
fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
|
|
break;
|
|
}
|
|
case AstNodeTypeTypePointer:
|
|
{
|
|
const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut";
|
|
fprintf(stderr, "'%s' PointerType\n", const_or_mut_str);
|
|
|
|
ast_print(node->data.type.child_type, indent + 2);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case NodeTypeStatement:
|
|
switch (node->data.statement.type) {
|
|
case AstNodeStatementTypeReturn:
|
|
fprintf(stderr, "ReturnStatement\n");
|
|
ast_print(node->data.statement.data.retrn.expression, indent + 2);
|
|
break;
|
|
case AstNodeStatementTypeExpression:
|
|
fprintf(stderr, "ExpressionStatement\n");
|
|
ast_print(node->data.statement.data.expr.expression, indent + 2);
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
fprintf(stderr, "%s\n", node_type_str(node->type));
|
|
break;
|
|
}
|
|
}
|
|
|
|
struct ParseContext {
|
|
Buf *buf;
|
|
AstNode *root;
|
|
ZigList<Token> *tokens;
|
|
};
|
|
|
|
static AstNode *ast_create_node(NodeType type, Token *first_token) {
|
|
AstNode *node = allocate<AstNode>(1);
|
|
node->type = type;
|
|
node->line = first_token->start_line;
|
|
node->column = first_token->start_column;
|
|
return node;
|
|
}
|
|
|
|
static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
|
|
buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
|
|
}
|
|
|
|
static void ast_invalid_token_error(ParseContext *pc, Token *token) {
|
|
Buf token_value = {0};
|
|
ast_buf_from_token(pc, token, &token_value);
|
|
ast_error(token, "invalid token: '%s'", buf_ptr(&token_value));
|
|
}
|
|
|
|
static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index);
|
|
|
|
|
|
static void ast_expect_token(ParseContext *pc, Token *token, TokenId token_id) {
|
|
if (token->id != token_id) {
|
|
ast_invalid_token_error(pc, token);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Type : token(Symbol) | PointerType;
|
|
PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
|
|
*/
|
|
static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *token = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
|
|
AstNode *node = ast_create_node(NodeTypeType, token);
|
|
|
|
if (token->id == TokenIdSymbol) {
|
|
node->data.type.type = AstNodeTypeTypePrimitive;
|
|
ast_buf_from_token(pc, token, &node->data.type.primitive_name);
|
|
} else if (token->id == TokenIdStar) {
|
|
node->data.type.type = AstNodeTypeTypePointer;
|
|
Token *const_or_mut = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
if (const_or_mut->id == TokenIdKeywordMut) {
|
|
node->data.type.is_const = false;
|
|
} else if (const_or_mut->id == TokenIdKeywordConst) {
|
|
node->data.type.is_const = true;
|
|
} else {
|
|
ast_invalid_token_error(pc, const_or_mut);
|
|
}
|
|
|
|
node->data.type.child_type = ast_parse_type(pc, token_index, &token_index);
|
|
} else {
|
|
ast_invalid_token_error(pc, token);
|
|
}
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
/*
|
|
ParamDecl<node> : token(Symbol) token(Colon) Type {
|
|
};
|
|
*/
|
|
static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *param_name = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, param_name, TokenIdSymbol);
|
|
|
|
AstNode *node = ast_create_node(NodeTypeParamDecl, param_name);
|
|
|
|
|
|
ast_buf_from_token(pc, param_name, &node->data.param_decl.name);
|
|
|
|
Token *colon = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, colon, TokenIdColon);
|
|
|
|
node->data.param_decl.type = ast_parse_type(pc, token_index, &token_index);
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
|
|
static void ast_parse_param_decl_list(ParseContext *pc, int token_index, int *new_token_index,
|
|
ZigList<AstNode *> *params)
|
|
{
|
|
Token *l_paren = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, l_paren, TokenIdLParen);
|
|
|
|
Token *token = &pc->tokens->at(token_index);
|
|
if (token->id == TokenIdRParen) {
|
|
token_index += 1;
|
|
*new_token_index = token_index;
|
|
return;
|
|
}
|
|
|
|
for (;;) {
|
|
AstNode *param_decl_node = ast_parse_param_decl(pc, token_index, &token_index);
|
|
params->append(param_decl_node);
|
|
|
|
Token *token = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
if (token->id == TokenIdRParen) {
|
|
*new_token_index = token_index;
|
|
return;
|
|
} else {
|
|
ast_expect_token(pc, token, TokenIdComma);
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int *new_token_index,
|
|
ZigList<AstNode*> *params)
|
|
{
|
|
Token *l_paren = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, l_paren, TokenIdLParen);
|
|
|
|
Token *token = &pc->tokens->at(token_index);
|
|
if (token->id == TokenIdRParen) {
|
|
token_index += 1;
|
|
*new_token_index = token_index;
|
|
return;
|
|
}
|
|
|
|
for (;;) {
|
|
AstNode *expr = ast_parse_expression(pc, token_index, &token_index);
|
|
params->append(expr);
|
|
|
|
Token *token = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
if (token->id == TokenIdRParen) {
|
|
*new_token_index = token_index;
|
|
return;
|
|
} else {
|
|
ast_expect_token(pc, token, TokenIdComma);
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
/*
|
|
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
|
|
*/
|
|
static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *fn_name = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, fn_name, TokenIdSymbol);
|
|
|
|
AstNode *node = ast_create_node(NodeTypeFnCall, fn_name);
|
|
|
|
|
|
ast_buf_from_token(pc, fn_name, &node->data.fn_call.name);
|
|
|
|
ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params);
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *token = &pc->tokens->at(token_index);
|
|
AstNode *node = ast_create_node(NodeTypeExpression, token);
|
|
if (token->id == TokenIdSymbol) {
|
|
node->data.expression.type = AstNodeExpressionTypeFnCall;
|
|
node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
|
|
} else if (token->id == TokenIdNumberLiteral) {
|
|
node->data.expression.type = AstNodeExpressionTypeNumber;
|
|
ast_buf_from_token(pc, token, &node->data.expression.data.number);
|
|
token_index += 1;
|
|
} else if (token->id == TokenIdStringLiteral) {
|
|
node->data.expression.type = AstNodeExpressionTypeString;
|
|
ast_buf_from_token(pc, token, &node->data.expression.data.string);
|
|
token_index += 1;
|
|
} else {
|
|
ast_invalid_token_error(pc, token);
|
|
}
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
/*
|
|
Statement : ExpressionStatement | ReturnStatement ;
|
|
|
|
ExpressionStatement : Expression token(Semicolon) ;
|
|
|
|
ReturnStatement : token(Return) Expression token(Semicolon) ;
|
|
|
|
Expression : token(Number) | token(String) | FnCall ;
|
|
|
|
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
|
|
*/
|
|
static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *token = &pc->tokens->at(token_index);
|
|
AstNode *node = ast_create_node(NodeTypeStatement, token);
|
|
|
|
if (token->id == TokenIdKeywordReturn) {
|
|
token_index += 1;
|
|
node->data.statement.type = AstNodeStatementTypeReturn;
|
|
node->data.statement.data.retrn.expression = ast_parse_expression(pc, token_index, &token_index);
|
|
|
|
Token *semicolon = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, semicolon, TokenIdSemicolon);
|
|
} else if (token->id == TokenIdSymbol ||
|
|
token->id == TokenIdStringLiteral ||
|
|
token->id == TokenIdNumberLiteral)
|
|
{
|
|
node->data.statement.type = AstNodeStatementTypeExpression;
|
|
node->data.statement.data.expr.expression = ast_parse_expression(pc, token_index, &token_index);
|
|
|
|
Token *semicolon = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, semicolon, TokenIdSemicolon);
|
|
} else {
|
|
ast_invalid_token_error(pc, token);
|
|
}
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
/*
|
|
Block : token(LBrace) many(Statement) token(RBrace);
|
|
*/
|
|
static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *l_brace = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, l_brace, TokenIdLBrace);
|
|
|
|
AstNode *node = ast_create_node(NodeTypeBlock, l_brace);
|
|
|
|
|
|
for (;;) {
|
|
Token *token = &pc->tokens->at(token_index);
|
|
if (token->id == TokenIdRBrace) {
|
|
token_index += 1;
|
|
*new_token_index = token_index;
|
|
return node;
|
|
} else {
|
|
AstNode *statement_node = ast_parse_statement(pc, token_index, &token_index);
|
|
node->data.block.statements.append(statement_node);
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
/*
|
|
FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
|
|
*/
|
|
static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) {
|
|
Token *fn_token = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, fn_token, TokenIdKeywordFn);
|
|
|
|
AstNode *node = ast_create_node(NodeTypeFnDecl, fn_token);
|
|
|
|
|
|
Token *fn_name = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
ast_expect_token(pc, fn_name, TokenIdSymbol);
|
|
|
|
ast_buf_from_token(pc, fn_name, &node->data.fn_decl.name);
|
|
|
|
|
|
ast_parse_param_decl_list(pc, token_index, &token_index, &node->data.fn_decl.params);
|
|
|
|
Token *arrow = &pc->tokens->at(token_index);
|
|
token_index += 1;
|
|
if (arrow->id == TokenIdArrow) {
|
|
node->data.fn_decl.return_type = ast_parse_type(pc, token_index, &token_index);
|
|
} else if (arrow->id == TokenIdLBrace) {
|
|
node->data.fn_decl.return_type = nullptr;
|
|
} else {
|
|
ast_invalid_token_error(pc, arrow);
|
|
}
|
|
|
|
node->data.fn_decl.body = ast_parse_block(pc, token_index, &token_index);
|
|
|
|
*new_token_index = token_index;
|
|
return node;
|
|
}
|
|
|
|
|
|
static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList<AstNode *> *fn_decls,
|
|
int *new_token_index)
|
|
{
|
|
for (;;) {
|
|
Token *token = &pc->tokens->at(token_index);
|
|
if (token->id == TokenIdKeywordFn) {
|
|
AstNode *fn_decl_node = ast_parse_fn_decl(pc, token_index, &token_index);
|
|
fn_decls->append(fn_decl_node);
|
|
} else {
|
|
*new_token_index = token_index;
|
|
return;
|
|
}
|
|
}
|
|
zig_unreachable();
|
|
}
|
|
|
|
AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) {
|
|
ParseContext pc = {0};
|
|
pc.buf = buf;
|
|
pc.root = ast_create_node(NodeTypeRoot, &tokens->at(0));
|
|
pc.tokens = tokens;
|
|
|
|
int new_token_index;
|
|
ast_parse_fn_decl_list(&pc, 0, &pc.root->data.root.fn_decls, &new_token_index);
|
|
|
|
if (new_token_index != tokens->length - 1) {
|
|
ast_invalid_token_error(&pc, &tokens->at(new_token_index));
|
|
}
|
|
|
|
return pc.root;
|
|
}
|