#include "parser.h" #include "ast.h" #include "token.h" #include "vector.h" #include #include enum precedence { PRE_LOWEST = 1, PRE_EQUALS, PRE_LG, PRE_SUM, PRE_PROD, PRE_PREFIX, PRE_CALL, }; static enum precedence precedences[] = { 0, PRE_LOWEST, PRE_EQUALS, PRE_LG, PRE_SUM, PRE_PROD, PRE_PREFIX, PRE_CALL, }; static inline void parser_register_prefix(struct parser *parser, enum token_type t, prefix_parse_f fn) { hmap_set(parser->prefix_fns, token_type_print(t), fn); } static inline void parser_register_infix(struct parser *parser, enum token_type t, infix_parse_f fn) { hmap_set(parser->infix_fns, token_type_print(t), fn); } static inline void parser_register_precedence(struct parser *parser, enum token_type t, enum precedence pre) { hmap_set(parser->precedences, token_type_print(t), &precedences[pre]); } static inline prefix_parse_f parser_get_prefix(struct parser *parser, enum token_type t) { return hmap_get(parser->prefix_fns, token_type_print(t)); } static inline infix_parse_f parser_get_infix(struct parser *parser, enum token_type t) { return hmap_get(parser->infix_fns, token_type_print(t)); } static inline enum precedence parser_get_precedence(struct parser *parser, enum token_type t) { enum precedence *pre = hmap_get(parser->precedences, token_type_print(t)); if (!pre) return PRE_LOWEST; return *pre; } static inline bool parser_cur_token_is(struct parser *parser, enum token_type t) { return parser->cur_token.type == t; } static inline bool parser_peek_token_is(struct parser *parser, enum token_type t) { return parser->peek_token.type == t; } static inline enum precedence parser_peek_precedence(struct parser *parser) { return parser_get_precedence(parser, parser->peek_token.type); } static inline enum precedence parser_cur_precedence(struct parser *parser) { return parser_get_precedence(parser, parser->cur_token.type); } static inline void parser_peek_error(struct parser *parser, enum token_type t) { char *msg = malloc(sizeof(*msg) * 1024); if (msg) { sprintf(msg, "expected token %s, got %s", token_type_print(t), token_type_print(parser->peek_token.type)); vector_push(parser->errors, msg); } } static inline bool parser_expect_peek(struct parser *parser, enum token_type t) { if (parser_peek_token_is(parser, t)) { parser_next_token(parser); return true; } parser_peek_error(parser, t); return false; } static inline void parser_no_prefix_fn_error(struct parser *parser, enum token_type t) { char *msg = malloc(256); sprintf(msg, "%s not recognized as prefix", token_type_print(t)); vector_push(parser->errors, msg); } static struct expression * parser_parse_expression(struct parser *parser, enum precedence pre) { prefix_parse_f prefix = parser_get_prefix(parser, parser->cur_token.type); if (!prefix) { parser_no_prefix_fn_error(parser, parser->cur_token.type); return NULL; } struct expression *lexpr = prefix(parser); while (pre < parser_peek_precedence(parser) && !parser_peek_token_is(parser, TOKEN_SEMICOLON)) { infix_parse_f infix = parser_get_infix(parser, parser->peek_token.type); if (!infix) { return lexpr; } parser_next_token(parser); lexpr = infix(parser, lexpr); } return lexpr; } static struct expression * parser_parse_identifier(struct parser *parser) { struct expression *expr = malloc(sizeof(*expr)); expr->type = EXPRESSION_IDENT; expr->token = parser->cur_token; expr->ident.value = parser->cur_token.literal; return expr; } static struct expression * parser_parse_integer(struct parser *parser) { struct expression *expr = malloc(sizeof(*expr)); expr->type = EXPRESSION_INT; expr->token = parser->cur_token; char *end; expr->integer.value = strtol(expr->token.literal.str + expr->token.literal.start, &end, 0); if (*end != '\0' && (expr->token.literal.str + expr->token.literal.end) < end) { char *msg = malloc(256); char litbuf[32]; sprintf(msg, "%s is not a valid integer", slice_sprint(&expr->token.literal, litbuf)); vector_push(parser->errors, msg); free(expr); return NULL; } return expr; } static struct expression * parser_parse_boolean(struct parser *parser) { struct expression *expr = malloc(sizeof(*expr)); expr->type = EXPRESSION_BOOL; expr->token = parser->cur_token; expr->boolean.value = expr->token.type == TOKEN_TRUE; return expr; } static struct expression * parser_parse_grouped(struct parser *parser) { parser_next_token(parser); struct expression *expr = parser_parse_expression(parser, PRE_LOWEST); if (!parser_expect_peek(parser, TOKEN_RPAREN)) { return NULL; } return expr; } static struct expression * parser_parse_prefix(struct parser *parser) { struct expression *expr = malloc(sizeof(*expr)); expr->type = EXPRESSION_PREFIX; expr->token = parser->cur_token; expr->prefix.operator = parser->cur_token.literal; parser_next_token(parser); expr->prefix.right = parser_parse_expression(parser, PRE_PREFIX); return expr; } static struct expression * parser_parse_infix(struct parser *parser, struct expression *lexpr) { struct expression *expr = malloc(sizeof(*expr)); expr->type = EXPRESSION_INFIX; expr->token = parser->cur_token; expr->infix.operator = parser->cur_token.literal; expr->infix.left = lexpr; enum precedence pre = parser_cur_precedence(parser); parser_next_token(parser); expr->infix.right = parser_parse_expression(parser, pre); return expr; } static struct statement * parser_parse_let_statement(struct parser *parser) { struct statement *st = calloc(1, sizeof(*st)); if (!st) return NULL; st->type = STATEMENT_LET; st->token = parser->cur_token; if (!parser_expect_peek(parser, TOKEN_IDENT)) goto fail; st->let.name = malloc(sizeof(*st->let.name)); if (!st->let.name) goto fail; st->let.name->token = parser->cur_token; st->let.name->value = parser->cur_token.literal; if (!parser_expect_peek(parser, TOKEN_ASSIGN)) goto ass_fail; parser_next_token(parser); st->let.value = parser_parse_expression(parser, PRE_LOWEST); if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { parser_next_token(parser); } return st; ass_fail: free(st->let.name); fail: free(st); return NULL; } static struct statement * parser_parse_return_statement(struct parser *parser) { struct statement *st = calloc(1, sizeof(*st)); if (!st) return NULL; st->type = STATEMENT_RETURN; st->token = parser->cur_token; parser_next_token(parser); st->retrn.value = parser_parse_expression(parser, PRE_LOWEST); if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { parser_next_token(parser); } return st; } static struct statement * parser_parse_expression_statement(struct parser *parser) { struct statement *st = calloc(1, sizeof(*st)); if (!st) return NULL; st->type = STATEMENT_EXPRESSION; st->token = parser->cur_token; st->expr.expr = parser_parse_expression(parser, PRE_LOWEST); if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { parser_next_token(parser); } return st; } static struct statement *parser_parse_statement(struct parser *); static struct statement * parser_parse_block_statement(struct parser *parser) { struct statement *st = calloc(1, sizeof(*st)); if (!st) return NULL; st->type = STATEMENT_BLOCK; st->block.statements = vector_new(); parser_next_token(parser); while (!parser_cur_token_is(parser, TOKEN_RBRACE) && !parser_cur_token_is(parser, TOKEN_EOF)) { struct statement *subst = parser_parse_statement(parser); if (subst) { vector_push(st->block.statements, subst); } parser_next_token(parser); } return st; } static struct expression * parser_parse_if(struct parser *parser) { struct expression *expr = calloc(1, sizeof(*expr)); expr->type = EXPRESSION_IF; expr->token = parser->cur_token; if (!parser_expect_peek(parser, TOKEN_LPAREN)) { goto fail; } parser_next_token(parser); expr->cond.condition = parser_parse_expression(parser, PRE_LOWEST); if (!parser_expect_peek(parser, TOKEN_RPAREN)) { goto fail; } if (!parser_expect_peek(parser, TOKEN_LBRACE)) { goto fail; } expr->cond.consequence = parser_parse_block_statement(parser); if (parser_peek_token_is(parser, TOKEN_ELSE)) { parser_next_token(parser); if (!parser_expect_peek(parser, TOKEN_LBRACE)) { goto fail; } expr->cond.alternative = parser_parse_block_statement(parser); } return expr; fail: free(expr); return NULL; } static struct vector * parser_parse_func_params(struct parser *parser) { struct vector *idents = vector_new_with_cap(8); if (parser_peek_token_is(parser, TOKEN_RPAREN)) { parser_next_token(parser); return idents; } parser_next_token(parser); struct expression *ident = calloc(1, sizeof(*ident)); ident->token = parser->cur_token; ident->ident.value = parser->cur_token.literal; vector_push(idents, ident); while (parser_peek_token_is(parser, TOKEN_COMMA)) { parser_next_token(parser); parser_next_token(parser); ident = calloc(1, sizeof(*ident)); ident->token = parser->cur_token; ident->ident.value = parser->cur_token.literal; vector_push(idents, ident); } if (!parser_expect_peek(parser, TOKEN_RPAREN)) { size_t i; vector_foreach(idents, i, ident) { expression_destroy(ident); } vector_free(idents); return NULL; } return idents; } static struct expression * parser_parse_func_literal(struct parser *parser) { struct expression *expr = calloc(1, sizeof(*expr)); expr->type = EXPRESSION_FUNC; expr->token = parser->cur_token; if (!parser_expect_peek(parser, TOKEN_LPAREN)) { goto fail; } expr->func.parameters = parser_parse_func_params(parser); if (!parser_expect_peek(parser, TOKEN_LBRACE)) { goto fail; } expr->func.body = parser_parse_block_statement(parser); return expr; fail: free(expr); return NULL; } static struct vector * parser_parse_call_arguments(struct parser *parser) { struct vector *args = vector_new_with_cap(8); if (parser_peek_token_is(parser, TOKEN_RPAREN)) { parser_next_token(parser); return args; } parser_next_token(parser); vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); while (parser_peek_token_is(parser, TOKEN_COMMA)) { parser_next_token(parser); parser_next_token(parser); vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); } if (!parser_expect_peek(parser, TOKEN_RPAREN)) { size_t i; struct expression *expr; vector_foreach(args, i, expr) { expression_destroy(expr); } vector_free(args); return NULL; } return args; } static struct expression * parser_parse_call_expression(struct parser *parser, struct expression *func) { struct expression *expr = calloc(1, sizeof(*expr)); expr->token = parser->cur_token; expr->type = EXPRESSION_CALL; expr->call.func = func; expr->call.arguments = parser_parse_call_arguments(parser); return expr; } static struct statement * parser_parse_statement(struct parser *parser) { switch (parser->cur_token.type) { case TOKEN_LET: return parser_parse_let_statement(parser); case TOKEN_RETURN: return parser_parse_return_statement(parser); case TOKEN_LBRACE: return parser_parse_block_statement(parser); default: return parser_parse_expression_statement(parser); } } struct parser * parser_new() { struct parser *parser = malloc(sizeof *parser); if (!parser) return NULL; struct lexer *lex = lexer_new(); if (!lex) goto fail; parser->lexer = lex; parser->errors = vector_new(); if (!parser->errors) goto fail2; parser->prefix_fns = hmap_new(); if (!parser->prefix_fns) goto fail3; parser_register_prefix(parser, TOKEN_IDENT, parser_parse_identifier); parser_register_prefix(parser, TOKEN_INT, parser_parse_integer); parser_register_prefix(parser, TOKEN_BANG, parser_parse_prefix); parser_register_prefix(parser, TOKEN_MINUS, parser_parse_prefix); parser_register_prefix(parser, TOKEN_TRUE, parser_parse_boolean); parser_register_prefix(parser, TOKEN_FALSE, parser_parse_boolean); parser_register_prefix(parser, TOKEN_LPAREN, parser_parse_grouped); parser_register_prefix(parser, TOKEN_RPAREN, parser_parse_grouped); parser_register_prefix(parser, TOKEN_IF, parser_parse_if); parser_register_prefix(parser, TOKEN_FUNC, parser_parse_func_literal); parser->infix_fns = hmap_new(); if (!parser->infix_fns) goto fail4; parser_register_infix(parser, TOKEN_PLUS, parser_parse_infix); parser_register_infix(parser, TOKEN_MINUS, parser_parse_infix); parser_register_infix(parser, TOKEN_SLASH, parser_parse_infix); parser_register_infix(parser, TOKEN_ASTERISK, parser_parse_infix); parser_register_infix(parser, TOKEN_EQ, parser_parse_infix); parser_register_infix(parser, TOKEN_NOTEQ, parser_parse_infix); parser_register_infix(parser, TOKEN_LT, parser_parse_infix); parser_register_infix(parser, TOKEN_GT, parser_parse_infix); parser_register_infix(parser, TOKEN_LPAREN, parser_parse_call_expression); parser->precedences = hmap_new(); if (!parser->precedences) goto fail5; parser_register_precedence(parser, TOKEN_EQ, PRE_EQUALS); parser_register_precedence(parser, TOKEN_NOTEQ, PRE_EQUALS); parser_register_precedence(parser, TOKEN_LT, PRE_LG); parser_register_precedence(parser, TOKEN_GT, PRE_LG); parser_register_precedence(parser, TOKEN_PLUS, PRE_SUM); parser_register_precedence(parser, TOKEN_MINUS, PRE_SUM); parser_register_precedence(parser, TOKEN_ASTERISK, PRE_PROD); parser_register_precedence(parser, TOKEN_SLASH, PRE_PROD); parser_register_precedence(parser, TOKEN_LPAREN, PRE_CALL); return parser; fail5: hmap_free(parser->infix_fns); fail4: hmap_free(parser->prefix_fns); fail3: vector_free(parser->errors); fail2: lexer_destroy(lex); fail: free(parser); return NULL; } void parser_reset(struct parser *parser, const char *input) { size_t i; char *val; vector_foreach (parser->errors, i, val) { free(val); } parser->errors->len = 0; lexer_reset(parser->lexer, input); parser_next_token(parser); parser_next_token(parser); } void parser_next_token(struct parser *parser) { parser->cur_token = parser->peek_token; parser->peek_token = lexer_next_token(parser->lexer); } struct program * parser_parse_program(struct parser *parser) { struct program *program = malloc(sizeof *program); if (!program) return NULL; program->statements = vector_new(); if (!program->statements) { free(program); return NULL; } while (!parser_cur_token_is(parser, TOKEN_EOF)) { struct statement *st = parser_parse_statement(parser); if (st != NULL) { vector_push(program->statements, st); } parser_next_token(parser); } return program; } void parser_destroy(struct parser *parser) { size_t i; char *val; vector_foreach (parser->errors, i, val) { free(val); } vector_free(parser->errors); hmap_free(parser->infix_fns); hmap_free(parser->prefix_fns); hmap_free(parser->precedences); lexer_destroy(parser->lexer); free(parser); }