From c0cd4e5f199e8567ec3b5e216fbee27837d21bea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yaroslav=20de=20la=20Pe=C3=B1a=20Smirnov?= Date: Thu, 20 Jan 2022 02:34:32 +0300 Subject: init --- src/parser.c | 605 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 src/parser.c (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..e8c44df --- /dev/null +++ b/src/parser.c @@ -0,0 +1,605 @@ +#include "parser.h" +#include "ast.h" +#include "token.h" +#include "vector.h" + +#include +#include + +enum precedence { + PRE_LOWEST = 1, + PRE_EQUALS, + PRE_LG, + PRE_SUM, + PRE_PROD, + PRE_PREFIX, + PRE_CALL, +}; + +static enum precedence precedences[] = { + 0, + PRE_LOWEST, + PRE_EQUALS, + PRE_LG, + PRE_SUM, + PRE_PROD, + PRE_PREFIX, + PRE_CALL, +}; + +static inline void +parser_register_prefix(struct parser *parser, enum token_type t, + prefix_parse_f fn) +{ + hmap_set(parser->prefix_fns, token_type_print(t), fn); +} + +static inline void +parser_register_infix(struct parser *parser, enum token_type t, + infix_parse_f fn) +{ + hmap_set(parser->infix_fns, token_type_print(t), fn); +} + +static inline void +parser_register_precedence(struct parser *parser, enum token_type t, + enum precedence pre) +{ + hmap_set(parser->precedences, token_type_print(t), &precedences[pre]); +} + +static inline prefix_parse_f +parser_get_prefix(struct parser *parser, enum token_type t) +{ + return hmap_get(parser->prefix_fns, token_type_print(t)); +} + +static inline infix_parse_f +parser_get_infix(struct parser *parser, enum token_type t) +{ + return hmap_get(parser->infix_fns, token_type_print(t)); +} + +static inline enum precedence +parser_get_precedence(struct parser *parser, enum token_type t) +{ + enum precedence *pre = hmap_get(parser->precedences, token_type_print(t)); + if (!pre) return PRE_LOWEST; + return *pre; +} + +static inline bool +parser_cur_token_is(struct parser *parser, enum token_type t) +{ + return parser->cur_token.type == t; +} + +static inline bool +parser_peek_token_is(struct parser *parser, enum token_type t) +{ + return parser->peek_token.type == t; +} + +static inline enum precedence +parser_peek_precedence(struct parser *parser) +{ + return parser_get_precedence(parser, parser->peek_token.type); +} + +static inline enum precedence +parser_cur_precedence(struct parser *parser) +{ + return parser_get_precedence(parser, parser->cur_token.type); +} + +static inline void +parser_peek_error(struct parser *parser, enum token_type t) +{ + char *msg = malloc(sizeof(*msg) * 1024); + if (msg) { + sprintf(msg, "expected token %s, got %s", token_type_print(t), + token_type_print(parser->peek_token.type)); + vector_push(parser->errors, msg); + } +} + +static inline bool +parser_expect_peek(struct parser *parser, enum token_type t) +{ + if (parser_peek_token_is(parser, t)) { + parser_next_token(parser); + return true; + } + parser_peek_error(parser, t); + return false; +} + +static inline void +parser_no_prefix_fn_error(struct parser *parser, enum token_type t) +{ + char *msg = malloc(256); + sprintf(msg, "%s not recognized as prefix", token_type_print(t)); + vector_push(parser->errors, msg); +} + +static struct expression * +parser_parse_expression(struct parser *parser, enum precedence pre) +{ + prefix_parse_f prefix = parser_get_prefix(parser, parser->cur_token.type); + if (!prefix) { + parser_no_prefix_fn_error(parser, parser->cur_token.type); + return NULL; + } + struct expression *lexpr = prefix(parser); + + while (pre < parser_peek_precedence(parser) + && !parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + infix_parse_f infix = parser_get_infix(parser, parser->peek_token.type); + if (!infix) { + return lexpr; + } + + parser_next_token(parser); + lexpr = infix(parser, lexpr); + } + + return lexpr; +} + +static struct expression * +parser_parse_identifier(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_IDENT; + expr->token = parser->cur_token; + expr->ident.value = parser->cur_token.literal; + + return expr; +} + +static struct expression * +parser_parse_integer(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_INT; + expr->token = parser->cur_token; + + char *end; + expr->integer.value = strtol(expr->token.literal.str + + expr->token.literal.start, + &end, 0); + if (*end != '\0' && (expr->token.literal.str + expr->token.literal.end) < end) { + char *msg = malloc(256); + char litbuf[32]; + sprintf(msg, "%s is not a valid integer", + slice_sprint(&expr->token.literal, litbuf)); + vector_push(parser->errors, msg); + free(expr); + return NULL; + } + + return expr; +} + +static struct expression * +parser_parse_boolean(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_BOOL; + expr->token = parser->cur_token; + expr->boolean.value = expr->token.type == TOKEN_TRUE; + + return expr; +} + +static struct expression * +parser_parse_grouped(struct parser *parser) +{ + parser_next_token(parser); + struct expression *expr = parser_parse_expression(parser, PRE_LOWEST); + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + return NULL; + } + + return expr; +} + +static struct expression * +parser_parse_prefix(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_PREFIX; + expr->token = parser->cur_token; + expr->prefix.operator = parser->cur_token.literal; + + parser_next_token(parser); + expr->prefix.right = parser_parse_expression(parser, PRE_PREFIX); + + return expr; +} + +static struct expression * +parser_parse_infix(struct parser *parser, struct expression *lexpr) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_INFIX; + expr->token = parser->cur_token; + expr->infix.operator = parser->cur_token.literal; + expr->infix.left = lexpr; + + enum precedence pre = parser_cur_precedence(parser); + parser_next_token(parser); + expr->infix.right = parser_parse_expression(parser, pre); + + return expr; +} + +static struct statement * +parser_parse_let_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_LET; + st->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_IDENT)) goto fail; + + st->let.name = malloc(sizeof(*st->let.name)); + if (!st->let.name) goto fail; + st->let.name->token = parser->cur_token; + st->let.name->value = parser->cur_token.literal; + + if (!parser_expect_peek(parser, TOKEN_ASSIGN)) goto ass_fail; + + parser_next_token(parser); + + st->let.value = parser_parse_expression(parser, PRE_LOWEST); + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +ass_fail: + free(st->let.name); +fail: + free(st); + return NULL; +} + +static struct statement * +parser_parse_return_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_RETURN; + st->token = parser->cur_token; + + parser_next_token(parser); + + st->retrn.value = parser_parse_expression(parser, PRE_LOWEST); + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +} + +static struct statement * +parser_parse_expression_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_EXPRESSION; + st->token = parser->cur_token; + st->expr.expr = parser_parse_expression(parser, PRE_LOWEST); + + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +} + +static struct statement *parser_parse_statement(struct parser *); + +static struct statement * +parser_parse_block_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_BLOCK; + st->block.statements = vector_new(); + parser_next_token(parser); + + while (!parser_cur_token_is(parser, TOKEN_RBRACE) + && !parser_cur_token_is(parser, TOKEN_EOF)) { + struct statement *subst = parser_parse_statement(parser); + if (subst) { + vector_push(st->block.statements, subst); + } + parser_next_token(parser); + } + + return st; +} + +static struct expression * +parser_parse_if(struct parser *parser) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->type = EXPRESSION_IF; + expr->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_LPAREN)) { + goto fail; + } + + parser_next_token(parser); + expr->cond.condition = parser_parse_expression(parser, PRE_LOWEST); + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + goto fail; + } + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + + expr->cond.consequence = parser_parse_block_statement(parser); + + if (parser_peek_token_is(parser, TOKEN_ELSE)) { + parser_next_token(parser); + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + expr->cond.alternative = parser_parse_block_statement(parser); + } + + return expr; +fail: + free(expr); + return NULL; +} + +static struct vector * +parser_parse_func_params(struct parser *parser) +{ + struct vector *idents = vector_new_with_cap(8); + + if (parser_peek_token_is(parser, TOKEN_RPAREN)) { + parser_next_token(parser); + return idents; + } + + parser_next_token(parser); + + struct expression *ident = calloc(1, sizeof(*ident)); + ident->token = parser->cur_token; + ident->ident.value = parser->cur_token.literal; + vector_push(idents, ident); + while (parser_peek_token_is(parser, TOKEN_COMMA)) { + parser_next_token(parser); + parser_next_token(parser); + ident = calloc(1, sizeof(*ident)); + ident->token = parser->cur_token; + ident->ident.value = parser->cur_token.literal; + vector_push(idents, ident); + } + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + size_t i; + vector_foreach(idents, i, ident) { + expression_destroy(ident); + } + vector_free(idents); + return NULL; + } + + return idents; +} + +static struct expression * +parser_parse_func_literal(struct parser *parser) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->type = EXPRESSION_FUNC; + expr->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_LPAREN)) { + goto fail; + } + expr->func.parameters = parser_parse_func_params(parser); + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + + expr->func.body = parser_parse_block_statement(parser); + + return expr; +fail: + free(expr); + return NULL; +} + +static struct vector * +parser_parse_call_arguments(struct parser *parser) +{ + struct vector *args = vector_new_with_cap(8); + + if (parser_peek_token_is(parser, TOKEN_RPAREN)) { + parser_next_token(parser); + return args; + } + + parser_next_token(parser); + vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); + + while (parser_peek_token_is(parser, TOKEN_COMMA)) { + parser_next_token(parser); + parser_next_token(parser); + vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); + } + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + size_t i; + struct expression *expr; + vector_foreach(args, i, expr) { + expression_destroy(expr); + } + vector_free(args); + return NULL; + } + + return args; +} + +static struct expression * +parser_parse_call_expression(struct parser *parser, struct expression *func) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->token = parser->cur_token; + expr->type = EXPRESSION_CALL; + expr->call.func = func; + expr->call.arguments = parser_parse_call_arguments(parser); + + return expr; +} + +static struct statement * +parser_parse_statement(struct parser *parser) +{ + switch (parser->cur_token.type) { + case TOKEN_LET: + return parser_parse_let_statement(parser); + case TOKEN_RETURN: + return parser_parse_return_statement(parser); + case TOKEN_LBRACE: + return parser_parse_block_statement(parser); + default: + return parser_parse_expression_statement(parser); + } +} + +struct parser * +parser_new() +{ + struct parser *parser = malloc(sizeof *parser); + if (!parser) return NULL; + + struct lexer *lex = lexer_new(); + if (!lex) goto fail; + parser->lexer = lex; + + parser->errors = vector_new(); + if (!parser->errors) goto fail2; + + parser->prefix_fns = hmap_new(); + if (!parser->prefix_fns) goto fail3; + parser_register_prefix(parser, TOKEN_IDENT, parser_parse_identifier); + parser_register_prefix(parser, TOKEN_INT, parser_parse_integer); + parser_register_prefix(parser, TOKEN_BANG, parser_parse_prefix); + parser_register_prefix(parser, TOKEN_MINUS, parser_parse_prefix); + parser_register_prefix(parser, TOKEN_TRUE, parser_parse_boolean); + parser_register_prefix(parser, TOKEN_FALSE, parser_parse_boolean); + parser_register_prefix(parser, TOKEN_LPAREN, parser_parse_grouped); + parser_register_prefix(parser, TOKEN_RPAREN, parser_parse_grouped); + parser_register_prefix(parser, TOKEN_IF, parser_parse_if); + parser_register_prefix(parser, TOKEN_FUNC, parser_parse_func_literal); + + parser->infix_fns = hmap_new(); + if (!parser->infix_fns) goto fail4; + parser_register_infix(parser, TOKEN_PLUS, parser_parse_infix); + parser_register_infix(parser, TOKEN_MINUS, parser_parse_infix); + parser_register_infix(parser, TOKEN_SLASH, parser_parse_infix); + parser_register_infix(parser, TOKEN_ASTERISK, parser_parse_infix); + parser_register_infix(parser, TOKEN_EQ, parser_parse_infix); + parser_register_infix(parser, TOKEN_NOTEQ, parser_parse_infix); + parser_register_infix(parser, TOKEN_LT, parser_parse_infix); + parser_register_infix(parser, TOKEN_GT, parser_parse_infix); + parser_register_infix(parser, TOKEN_LPAREN, parser_parse_call_expression); + + parser->precedences = hmap_new(); + if (!parser->precedences) goto fail5; + parser_register_precedence(parser, TOKEN_EQ, PRE_EQUALS); + parser_register_precedence(parser, TOKEN_NOTEQ, PRE_EQUALS); + parser_register_precedence(parser, TOKEN_LT, PRE_LG); + parser_register_precedence(parser, TOKEN_GT, PRE_LG); + parser_register_precedence(parser, TOKEN_PLUS, PRE_SUM); + parser_register_precedence(parser, TOKEN_MINUS, PRE_SUM); + parser_register_precedence(parser, TOKEN_ASTERISK, PRE_PROD); + parser_register_precedence(parser, TOKEN_SLASH, PRE_PROD); + parser_register_precedence(parser, TOKEN_LPAREN, PRE_CALL); + + return parser; +fail5: + hmap_free(parser->infix_fns); +fail4: + hmap_free(parser->prefix_fns); +fail3: + vector_free(parser->errors); +fail2: + lexer_destroy(lex); +fail: + free(parser); + return NULL; +} + +void +parser_reset(struct parser *parser, const char *input) +{ + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + free(val); + } + parser->errors->len = 0; + + lexer_reset(parser->lexer, input); + + parser_next_token(parser); + parser_next_token(parser); +} + +void +parser_next_token(struct parser *parser) +{ + parser->cur_token = parser->peek_token; + parser->peek_token = lexer_next_token(parser->lexer); +} + +struct program * +parser_parse_program(struct parser *parser) +{ + struct program *program = malloc(sizeof *program); + if (!program) return NULL; + program->statements = vector_new(); + if (!program->statements) { + free(program); + return NULL; + } + + while (!parser_cur_token_is(parser, TOKEN_EOF)) { + struct statement *st = parser_parse_statement(parser); + if (st != NULL) { + vector_push(program->statements, st); + } + parser_next_token(parser); + } + + return program; +} + +void +parser_destroy(struct parser *parser) +{ + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + free(val); + } + vector_free(parser->errors); + hmap_free(parser->infix_fns); + hmap_free(parser->prefix_fns); + hmap_free(parser->precedences); + lexer_destroy(parser->lexer); + free(parser); +} -- cgit v1.2.3