aboutsummaryrefslogtreecommitdiff
path: root/src/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c605
1 files changed, 605 insertions, 0 deletions
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..e8c44df
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,605 @@
+#include "parser.h"
+#include "ast.h"
+#include "token.h"
+#include "vector.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+enum precedence {
+ PRE_LOWEST = 1,
+ PRE_EQUALS,
+ PRE_LG,
+ PRE_SUM,
+ PRE_PROD,
+ PRE_PREFIX,
+ PRE_CALL,
+};
+
+static enum precedence precedences[] = {
+ 0,
+ PRE_LOWEST,
+ PRE_EQUALS,
+ PRE_LG,
+ PRE_SUM,
+ PRE_PROD,
+ PRE_PREFIX,
+ PRE_CALL,
+};
+
+static inline void
+parser_register_prefix(struct parser *parser, enum token_type t,
+ prefix_parse_f fn)
+{
+ hmap_set(parser->prefix_fns, token_type_print(t), fn);
+}
+
+static inline void
+parser_register_infix(struct parser *parser, enum token_type t,
+ infix_parse_f fn)
+{
+ hmap_set(parser->infix_fns, token_type_print(t), fn);
+}
+
+static inline void
+parser_register_precedence(struct parser *parser, enum token_type t,
+ enum precedence pre)
+{
+ hmap_set(parser->precedences, token_type_print(t), &precedences[pre]);
+}
+
+static inline prefix_parse_f
+parser_get_prefix(struct parser *parser, enum token_type t)
+{
+ return hmap_get(parser->prefix_fns, token_type_print(t));
+}
+
+static inline infix_parse_f
+parser_get_infix(struct parser *parser, enum token_type t)
+{
+ return hmap_get(parser->infix_fns, token_type_print(t));
+}
+
+static inline enum precedence
+parser_get_precedence(struct parser *parser, enum token_type t)
+{
+ enum precedence *pre = hmap_get(parser->precedences, token_type_print(t));
+ if (!pre) return PRE_LOWEST;
+ return *pre;
+}
+
+static inline bool
+parser_cur_token_is(struct parser *parser, enum token_type t)
+{
+ return parser->cur_token.type == t;
+}
+
+static inline bool
+parser_peek_token_is(struct parser *parser, enum token_type t)
+{
+ return parser->peek_token.type == t;
+}
+
+static inline enum precedence
+parser_peek_precedence(struct parser *parser)
+{
+ return parser_get_precedence(parser, parser->peek_token.type);
+}
+
+static inline enum precedence
+parser_cur_precedence(struct parser *parser)
+{
+ return parser_get_precedence(parser, parser->cur_token.type);
+}
+
+static inline void
+parser_peek_error(struct parser *parser, enum token_type t)
+{
+ char *msg = malloc(sizeof(*msg) * 1024);
+ if (msg) {
+ sprintf(msg, "expected token %s, got %s", token_type_print(t),
+ token_type_print(parser->peek_token.type));
+ vector_push(parser->errors, msg);
+ }
+}
+
+static inline bool
+parser_expect_peek(struct parser *parser, enum token_type t)
+{
+ if (parser_peek_token_is(parser, t)) {
+ parser_next_token(parser);
+ return true;
+ }
+ parser_peek_error(parser, t);
+ return false;
+}
+
+static inline void
+parser_no_prefix_fn_error(struct parser *parser, enum token_type t)
+{
+ char *msg = malloc(256);
+ sprintf(msg, "%s not recognized as prefix", token_type_print(t));
+ vector_push(parser->errors, msg);
+}
+
+static struct expression *
+parser_parse_expression(struct parser *parser, enum precedence pre)
+{
+ prefix_parse_f prefix = parser_get_prefix(parser, parser->cur_token.type);
+ if (!prefix) {
+ parser_no_prefix_fn_error(parser, parser->cur_token.type);
+ return NULL;
+ }
+ struct expression *lexpr = prefix(parser);
+
+ while (pre < parser_peek_precedence(parser)
+ && !parser_peek_token_is(parser, TOKEN_SEMICOLON)) {
+ infix_parse_f infix = parser_get_infix(parser, parser->peek_token.type);
+ if (!infix) {
+ return lexpr;
+ }
+
+ parser_next_token(parser);
+ lexpr = infix(parser, lexpr);
+ }
+
+ return lexpr;
+}
+
+static struct expression *
+parser_parse_identifier(struct parser *parser)
+{
+ struct expression *expr = malloc(sizeof(*expr));
+ expr->type = EXPRESSION_IDENT;
+ expr->token = parser->cur_token;
+ expr->ident.value = parser->cur_token.literal;
+
+ return expr;
+}
+
+static struct expression *
+parser_parse_integer(struct parser *parser)
+{
+ struct expression *expr = malloc(sizeof(*expr));
+ expr->type = EXPRESSION_INT;
+ expr->token = parser->cur_token;
+
+ char *end;
+ expr->integer.value = strtol(expr->token.literal.str
+ + expr->token.literal.start,
+ &end, 0);
+ if (*end != '\0' && (expr->token.literal.str + expr->token.literal.end) < end) {
+ char *msg = malloc(256);
+ char litbuf[32];
+ sprintf(msg, "%s is not a valid integer",
+ slice_sprint(&expr->token.literal, litbuf));
+ vector_push(parser->errors, msg);
+ free(expr);
+ return NULL;
+ }
+
+ return expr;
+}
+
+static struct expression *
+parser_parse_boolean(struct parser *parser)
+{
+ struct expression *expr = malloc(sizeof(*expr));
+ expr->type = EXPRESSION_BOOL;
+ expr->token = parser->cur_token;
+ expr->boolean.value = expr->token.type == TOKEN_TRUE;
+
+ return expr;
+}
+
+static struct expression *
+parser_parse_grouped(struct parser *parser)
+{
+ parser_next_token(parser);
+ struct expression *expr = parser_parse_expression(parser, PRE_LOWEST);
+ if (!parser_expect_peek(parser, TOKEN_RPAREN)) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+static struct expression *
+parser_parse_prefix(struct parser *parser)
+{
+ struct expression *expr = malloc(sizeof(*expr));
+ expr->type = EXPRESSION_PREFIX;
+ expr->token = parser->cur_token;
+ expr->prefix.operator = parser->cur_token.literal;
+
+ parser_next_token(parser);
+ expr->prefix.right = parser_parse_expression(parser, PRE_PREFIX);
+
+ return expr;
+}
+
+static struct expression *
+parser_parse_infix(struct parser *parser, struct expression *lexpr)
+{
+ struct expression *expr = malloc(sizeof(*expr));
+ expr->type = EXPRESSION_INFIX;
+ expr->token = parser->cur_token;
+ expr->infix.operator = parser->cur_token.literal;
+ expr->infix.left = lexpr;
+
+ enum precedence pre = parser_cur_precedence(parser);
+ parser_next_token(parser);
+ expr->infix.right = parser_parse_expression(parser, pre);
+
+ return expr;
+}
+
+static struct statement *
+parser_parse_let_statement(struct parser *parser)
+{
+ struct statement *st = calloc(1, sizeof(*st));
+ if (!st) return NULL;
+ st->type = STATEMENT_LET;
+ st->token = parser->cur_token;
+
+ if (!parser_expect_peek(parser, TOKEN_IDENT)) goto fail;
+
+ st->let.name = malloc(sizeof(*st->let.name));
+ if (!st->let.name) goto fail;
+ st->let.name->token = parser->cur_token;
+ st->let.name->value = parser->cur_token.literal;
+
+ if (!parser_expect_peek(parser, TOKEN_ASSIGN)) goto ass_fail;
+
+ parser_next_token(parser);
+
+ st->let.value = parser_parse_expression(parser, PRE_LOWEST);
+ if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) {
+ parser_next_token(parser);
+ }
+
+ return st;
+ass_fail:
+ free(st->let.name);
+fail:
+ free(st);
+ return NULL;
+}
+
+static struct statement *
+parser_parse_return_statement(struct parser *parser)
+{
+ struct statement *st = calloc(1, sizeof(*st));
+ if (!st) return NULL;
+ st->type = STATEMENT_RETURN;
+ st->token = parser->cur_token;
+
+ parser_next_token(parser);
+
+ st->retrn.value = parser_parse_expression(parser, PRE_LOWEST);
+ if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) {
+ parser_next_token(parser);
+ }
+
+ return st;
+}
+
+static struct statement *
+parser_parse_expression_statement(struct parser *parser)
+{
+ struct statement *st = calloc(1, sizeof(*st));
+ if (!st) return NULL;
+ st->type = STATEMENT_EXPRESSION;
+ st->token = parser->cur_token;
+ st->expr.expr = parser_parse_expression(parser, PRE_LOWEST);
+
+ if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) {
+ parser_next_token(parser);
+ }
+
+ return st;
+}
+
+static struct statement *parser_parse_statement(struct parser *);
+
+static struct statement *
+parser_parse_block_statement(struct parser *parser)
+{
+ struct statement *st = calloc(1, sizeof(*st));
+ if (!st) return NULL;
+ st->type = STATEMENT_BLOCK;
+ st->block.statements = vector_new();
+ parser_next_token(parser);
+
+ while (!parser_cur_token_is(parser, TOKEN_RBRACE)
+ && !parser_cur_token_is(parser, TOKEN_EOF)) {
+ struct statement *subst = parser_parse_statement(parser);
+ if (subst) {
+ vector_push(st->block.statements, subst);
+ }
+ parser_next_token(parser);
+ }
+
+ return st;
+}
+
+static struct expression *
+parser_parse_if(struct parser *parser)
+{
+ struct expression *expr = calloc(1, sizeof(*expr));
+ expr->type = EXPRESSION_IF;
+ expr->token = parser->cur_token;
+
+ if (!parser_expect_peek(parser, TOKEN_LPAREN)) {
+ goto fail;
+ }
+
+ parser_next_token(parser);
+ expr->cond.condition = parser_parse_expression(parser, PRE_LOWEST);
+
+ if (!parser_expect_peek(parser, TOKEN_RPAREN)) {
+ goto fail;
+ }
+ if (!parser_expect_peek(parser, TOKEN_LBRACE)) {
+ goto fail;
+ }
+
+ expr->cond.consequence = parser_parse_block_statement(parser);
+
+ if (parser_peek_token_is(parser, TOKEN_ELSE)) {
+ parser_next_token(parser);
+ if (!parser_expect_peek(parser, TOKEN_LBRACE)) {
+ goto fail;
+ }
+ expr->cond.alternative = parser_parse_block_statement(parser);
+ }
+
+ return expr;
+fail:
+ free(expr);
+ return NULL;
+}
+
+static struct vector *
+parser_parse_func_params(struct parser *parser)
+{
+ struct vector *idents = vector_new_with_cap(8);
+
+ if (parser_peek_token_is(parser, TOKEN_RPAREN)) {
+ parser_next_token(parser);
+ return idents;
+ }
+
+ parser_next_token(parser);
+
+ struct expression *ident = calloc(1, sizeof(*ident));
+ ident->token = parser->cur_token;
+ ident->ident.value = parser->cur_token.literal;
+ vector_push(idents, ident);
+ while (parser_peek_token_is(parser, TOKEN_COMMA)) {
+ parser_next_token(parser);
+ parser_next_token(parser);
+ ident = calloc(1, sizeof(*ident));
+ ident->token = parser->cur_token;
+ ident->ident.value = parser->cur_token.literal;
+ vector_push(idents, ident);
+ }
+
+ if (!parser_expect_peek(parser, TOKEN_RPAREN)) {
+ size_t i;
+ vector_foreach(idents, i, ident) {
+ expression_destroy(ident);
+ }
+ vector_free(idents);
+ return NULL;
+ }
+
+ return idents;
+}
+
+static struct expression *
+parser_parse_func_literal(struct parser *parser)
+{
+ struct expression *expr = calloc(1, sizeof(*expr));
+ expr->type = EXPRESSION_FUNC;
+ expr->token = parser->cur_token;
+
+ if (!parser_expect_peek(parser, TOKEN_LPAREN)) {
+ goto fail;
+ }
+ expr->func.parameters = parser_parse_func_params(parser);
+ if (!parser_expect_peek(parser, TOKEN_LBRACE)) {
+ goto fail;
+ }
+
+ expr->func.body = parser_parse_block_statement(parser);
+
+ return expr;
+fail:
+ free(expr);
+ return NULL;
+}
+
+static struct vector *
+parser_parse_call_arguments(struct parser *parser)
+{
+ struct vector *args = vector_new_with_cap(8);
+
+ if (parser_peek_token_is(parser, TOKEN_RPAREN)) {
+ parser_next_token(parser);
+ return args;
+ }
+
+ parser_next_token(parser);
+ vector_push(args, parser_parse_expression(parser, PRE_LOWEST));
+
+ while (parser_peek_token_is(parser, TOKEN_COMMA)) {
+ parser_next_token(parser);
+ parser_next_token(parser);
+ vector_push(args, parser_parse_expression(parser, PRE_LOWEST));
+ }
+
+ if (!parser_expect_peek(parser, TOKEN_RPAREN)) {
+ size_t i;
+ struct expression *expr;
+ vector_foreach(args, i, expr) {
+ expression_destroy(expr);
+ }
+ vector_free(args);
+ return NULL;
+ }
+
+ return args;
+}
+
+static struct expression *
+parser_parse_call_expression(struct parser *parser, struct expression *func)
+{
+ struct expression *expr = calloc(1, sizeof(*expr));
+ expr->token = parser->cur_token;
+ expr->type = EXPRESSION_CALL;
+ expr->call.func = func;
+ expr->call.arguments = parser_parse_call_arguments(parser);
+
+ return expr;
+}
+
+static struct statement *
+parser_parse_statement(struct parser *parser)
+{
+ switch (parser->cur_token.type) {
+ case TOKEN_LET:
+ return parser_parse_let_statement(parser);
+ case TOKEN_RETURN:
+ return parser_parse_return_statement(parser);
+ case TOKEN_LBRACE:
+ return parser_parse_block_statement(parser);
+ default:
+ return parser_parse_expression_statement(parser);
+ }
+}
+
+struct parser *
+parser_new()
+{
+ struct parser *parser = malloc(sizeof *parser);
+ if (!parser) return NULL;
+
+ struct lexer *lex = lexer_new();
+ if (!lex) goto fail;
+ parser->lexer = lex;
+
+ parser->errors = vector_new();
+ if (!parser->errors) goto fail2;
+
+ parser->prefix_fns = hmap_new();
+ if (!parser->prefix_fns) goto fail3;
+ parser_register_prefix(parser, TOKEN_IDENT, parser_parse_identifier);
+ parser_register_prefix(parser, TOKEN_INT, parser_parse_integer);
+ parser_register_prefix(parser, TOKEN_BANG, parser_parse_prefix);
+ parser_register_prefix(parser, TOKEN_MINUS, parser_parse_prefix);
+ parser_register_prefix(parser, TOKEN_TRUE, parser_parse_boolean);
+ parser_register_prefix(parser, TOKEN_FALSE, parser_parse_boolean);
+ parser_register_prefix(parser, TOKEN_LPAREN, parser_parse_grouped);
+ parser_register_prefix(parser, TOKEN_RPAREN, parser_parse_grouped);
+ parser_register_prefix(parser, TOKEN_IF, parser_parse_if);
+ parser_register_prefix(parser, TOKEN_FUNC, parser_parse_func_literal);
+
+ parser->infix_fns = hmap_new();
+ if (!parser->infix_fns) goto fail4;
+ parser_register_infix(parser, TOKEN_PLUS, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_MINUS, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_SLASH, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_ASTERISK, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_EQ, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_NOTEQ, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_LT, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_GT, parser_parse_infix);
+ parser_register_infix(parser, TOKEN_LPAREN, parser_parse_call_expression);
+
+ parser->precedences = hmap_new();
+ if (!parser->precedences) goto fail5;
+ parser_register_precedence(parser, TOKEN_EQ, PRE_EQUALS);
+ parser_register_precedence(parser, TOKEN_NOTEQ, PRE_EQUALS);
+ parser_register_precedence(parser, TOKEN_LT, PRE_LG);
+ parser_register_precedence(parser, TOKEN_GT, PRE_LG);
+ parser_register_precedence(parser, TOKEN_PLUS, PRE_SUM);
+ parser_register_precedence(parser, TOKEN_MINUS, PRE_SUM);
+ parser_register_precedence(parser, TOKEN_ASTERISK, PRE_PROD);
+ parser_register_precedence(parser, TOKEN_SLASH, PRE_PROD);
+ parser_register_precedence(parser, TOKEN_LPAREN, PRE_CALL);
+
+ return parser;
+fail5:
+ hmap_free(parser->infix_fns);
+fail4:
+ hmap_free(parser->prefix_fns);
+fail3:
+ vector_free(parser->errors);
+fail2:
+ lexer_destroy(lex);
+fail:
+ free(parser);
+ return NULL;
+}
+
+void
+parser_reset(struct parser *parser, const char *input)
+{
+ size_t i;
+ char *val;
+ vector_foreach (parser->errors, i, val) {
+ free(val);
+ }
+ parser->errors->len = 0;
+
+ lexer_reset(parser->lexer, input);
+
+ parser_next_token(parser);
+ parser_next_token(parser);
+}
+
+void
+parser_next_token(struct parser *parser)
+{
+ parser->cur_token = parser->peek_token;
+ parser->peek_token = lexer_next_token(parser->lexer);
+}
+
+struct program *
+parser_parse_program(struct parser *parser)
+{
+ struct program *program = malloc(sizeof *program);
+ if (!program) return NULL;
+ program->statements = vector_new();
+ if (!program->statements) {
+ free(program);
+ return NULL;
+ }
+
+ while (!parser_cur_token_is(parser, TOKEN_EOF)) {
+ struct statement *st = parser_parse_statement(parser);
+ if (st != NULL) {
+ vector_push(program->statements, st);
+ }
+ parser_next_token(parser);
+ }
+
+ return program;
+}
+
+void
+parser_destroy(struct parser *parser)
+{
+ size_t i;
+ char *val;
+ vector_foreach (parser->errors, i, val) {
+ free(val);
+ }
+ vector_free(parser->errors);
+ hmap_free(parser->infix_fns);
+ hmap_free(parser->prefix_fns);
+ hmap_free(parser->precedences);
+ lexer_destroy(parser->lexer);
+ free(parser);
+}