From c0cd4e5f199e8567ec3b5e216fbee27837d21bea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yaroslav=20de=20la=20Pe=C3=B1a=20Smirnov?= Date: Thu, 20 Jan 2022 02:34:32 +0300 Subject: init --- .exrc | 1 + .gitignore | 3 + Makefile | 46 ++++ README.md | 7 + compile_commands.json | 167 +++++++++++++ include/ast.h | 170 ++++++++++++++ include/eval.h | 20 ++ include/hmap.h | 53 +++++ include/lexer.h | 25 ++ include/object.h | 81 +++++++ include/parser.h | 33 +++ include/repl.h | 8 + include/slice.h | 27 +++ include/tests/tests.h | 45 ++++ include/token.h | 55 +++++ include/vector.h | 27 +++ src/ast.c | 381 ++++++++++++++++++++++++++++++ src/cmonkey.c | 16 ++ src/eval.c | 333 ++++++++++++++++++++++++++ src/hmap.c | 225 ++++++++++++++++++ src/lexer.c | 187 +++++++++++++++ src/object.c | 195 +++++++++++++++ src/parser.c | 605 +++++++++++++++++++++++++++++++++++++++++++++++ src/repl.c | 67 ++++++ src/slice.c | 77 ++++++ src/tests/ast.c | 42 ++++ src/tests/eval.c | 332 ++++++++++++++++++++++++++ src/tests/lexer.c | 126 ++++++++++ src/tests/parser.c | 640 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/tests/slice.c | 41 ++++ src/token.c | 106 +++++++++ src/vector.c | 42 ++++ 32 files changed, 4183 insertions(+) create mode 100644 .exrc create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 compile_commands.json create mode 100644 include/ast.h create mode 100644 include/eval.h create mode 100644 include/hmap.h create mode 100644 include/lexer.h create mode 100644 include/object.h create mode 100644 include/parser.h create mode 100644 include/repl.h create mode 100644 include/slice.h create mode 100644 include/tests/tests.h create mode 100644 include/token.h create mode 100644 include/vector.h create mode 100644 src/ast.c create mode 100644 src/cmonkey.c create mode 100644 src/eval.c create mode 100644 src/hmap.c create mode 100644 src/lexer.c create mode 100644 src/object.c create mode 100644 src/parser.c create mode 100644 src/repl.c create mode 100644 src/slice.c create mode 100644 src/tests/ast.c create mode 100644 src/tests/eval.c create mode 100644 src/tests/lexer.c create mode 100644 src/tests/parser.c create mode 100644 src/tests/slice.c create mode 100644 src/token.c create mode 100644 src/vector.c diff --git a/.exrc b/.exrc new file mode 100644 index 0000000..236a385 --- /dev/null +++ b/.exrc @@ -0,0 +1 @@ +let ch_syntax_for_h = 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b0b5a06 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.cache +obj/ +build/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..60481b3 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +CC?=gcc +XFLAGS= +CFLAGS?=-std=c11 -O2 -Wall $(XFLAGS) + +LIBS:= +IDIRS:=$(addprefix -iquote,include) + +BUILDIR?=build/release + +ifdef DEBUG +BUILDIR:=build/debug +CFLAGS:=-std=c11 -O0 -DDEBUG $(XFLAGS) -g +endif +ifdef ASAN +CFLAGS+= -fsanitize=address -fno-omit-frame-pointer +endif + +OBJDIR=$(BUILDIR)/obj + +CMONKEY_SRCS:=$(shell find src -name '*.c' -not -path '*/tests/*') +CMONKEY_OBJS:=$(CMONKEY_SRCS:%.c=$(OBJDIR)/%.o) +ALL_OBJS:=$(CMONKEY_OBJS) +TEST_OBJS:=$(filter-out $(OBJDIR)/src/cmonkey.o,$(ALL_OBJS)) + +all: cmonkey + +test: tests/lexer tests/slice tests/parser tests/ast tests/eval + +tests/%: $(OBJDIR)/src/tests/%.o $(TEST_OBJS) + mkdir -p $(BUILDIR)/$(@D) + $(CC) -o $(BUILDIR)/$@ $^ $(IDIRS) $(LIBS) $(CFLAGS) + +$(OBJDIR)/%.o: %.c + mkdir -p $(@D) + $(CC) -c $(IDIRS) -o $@ $< $(LIBS) $(CFLAGS) + +cmonkey: $(ALL_OBJS) + mkdir -p $(@D) + $(CC) -o $(BUILDIR)/$@ $^ $(LIBS) $(CFLAGS) + +clean: + rm -r build + +.PHONY: clean all test + +.PRECIOUS: $(OBJDIR)/src/tests/%.o diff --git a/README.md b/README.md new file mode 100644 index 0000000..f684d21 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# cmonkey + +A monkey programming language interpreter written in C11. From Thorsten Ball's +[Writing an Interpreter in Go](https://interpreterbook.com/) book, except I +wrote mine in C, just for lulz. + +TODO: chapter 4, GC, debug diff --git a/compile_commands.json b/compile_commands.json new file mode 100644 index 0000000..0dd5bca --- /dev/null +++ b/compile_commands.json @@ -0,0 +1,167 @@ +[ + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/lexer.o", + "src/lexer.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/lexer.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/token.o", + "src/token.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/token.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/cmonkey.o", + "src/cmonkey.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/cmonkey.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/hmap.o", + "src/hmap.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/hmap.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/slice.o", + "src/slice.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/slice.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/repl.o", + "src/repl.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/repl.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/parser.o", + "src/parser.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/parser.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/eval.o", + "src/eval.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/eval.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/vector.o", + "src/vector.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/vector.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/ast.o", + "src/ast.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/ast.c" + }, + { + "directory": "/home/yaroslav/repos/personal/cmonkey", + "arguments": [ + "cc", + "-c", + "-iquoteinclude", + "-o", + "build/release/obj/src/object.o", + "src/object.c", + "-std=c11", + "-O2", + "-Wall" + ], + "file": "src/object.c" + } +] diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..eb3e1fc --- /dev/null +++ b/include/ast.h @@ -0,0 +1,170 @@ +#ifndef CMONKEY_AST_H +#define CMONKEY_AST_H + +#include "token.h" +#include "vector.h" + +enum node_type { + NODE_PROGRAM, + NODE_STATEMENT, + NODE_EXPRESSION, +}; + +enum statement_type { + STATEMENT_LET, + STATEMENT_RETURN, + STATEMENT_EXPRESSION, + STATEMENT_BLOCK, +}; + +enum expression_type { + EXPRESSION_IDENT, + EXPRESSION_INT, + EXPRESSION_BOOL, + EXPRESSION_PREFIX, + EXPRESSION_INFIX, + EXPRESSION_IF, + EXPRESSION_FUNC, + EXPRESSION_CALL, +}; + +struct identifier { + struct token token; + struct slice value; +}; + +struct prefix_expression { + struct token token; + struct slice operator; + struct expression *right; +}; + +struct infix_expression { + struct token token; + struct slice operator; + struct expression *left; + struct expression *right; +}; + +struct integer_expression { + struct token token; + int64_t value; +}; + +struct boolean_expression { + struct token token; + bool value; +}; + +struct if_expression { + struct token token; + struct expression *condition; + struct statement *consequence; + struct statement *alternative; +}; + +struct func_literal { + struct token token; + struct vector *parameters; // expression type ident + struct statement *body; +}; + +struct call_expression { + struct token token; + struct expression *func; + struct vector *arguments; // expressions +}; + +struct expression { + enum expression_type type; + union { + struct token token; // Common initial sequence + struct identifier ident; + struct integer_expression integer; + struct boolean_expression boolean; + struct prefix_expression prefix; + struct infix_expression infix; + struct if_expression cond; + struct func_literal func; + struct call_expression call; + }; +}; + +struct expression_statement { + struct token token; + struct expression *expr; +}; + +struct return_statement { + struct token token; + struct expression *value; +}; + +struct let_statement { + struct token token; + struct identifier *name; + struct expression *value; +}; + +struct block_statement { + struct token token; + struct vector *statements; +}; + +struct program { + struct vector *statements; +}; + +struct statement { + enum statement_type type; + union { + struct token token; // Common initial sequence + struct let_statement let; + struct return_statement retrn; + struct expression_statement expr; + struct block_statement block; + }; +}; + +struct slice expression_token_literal(struct expression *); +struct slice statement_token_literal(struct statement *); +struct slice program_token_literal(struct program *); + +#define node_token_literal(n) _Generic((n), \ + struct program *: program_token_literal, \ + struct statement *: statement_token_literal, \ + struct expression *: expression_token_literal \ + )(n) + +char *expression_sprint(struct expression *, char *str); +char *statement_sprint(struct statement *, char *str); +char *program_sprint(struct program *, char *str); + +#define node_sprint(n, a) _Generic((n), \ + struct expression *: expression_sprint, \ + struct statement *: statement_sprint, \ + struct program *: program_sprint \ + )(n, a) + +struct vector *expression_vector_dup(const struct vector *); +struct vector *statement_vector_dup(const struct vector *); + +struct expression *expression_dup(const struct expression *); +struct statement *statement_dup(const struct statement *); + +#define node_dup(n) _Generic((n), \ + struct expression *: expression_dup, \ + struct statement *: statement_dup \ + )(n) + +void expression_destroy(struct expression *); +void statement_destroy(struct statement *); +void program_destroy(struct program *); + +#define node_destroy(n) _Generic((n), \ + struct expression *: expression_destroy, \ + struct statement *: statement_destroy, \ + struct program *: program_destroy \ + )(n) + +#endif diff --git a/include/eval.h b/include/eval.h new file mode 100644 index 0000000..5f19271 --- /dev/null +++ b/include/eval.h @@ -0,0 +1,20 @@ +#ifndef CMONKEY_EVAL_H +#define CMONKEY_EVAL_H + +#include "ast.h" +#include "parser.h" +#include "object.h" + +struct object *eval_program(struct environment *, struct program *); + +struct object *eval_statement(struct environment *, struct statement *); + +struct object *eval_expression(struct environment *, struct expression *); + +#define eval(e, n) _Generic((n), \ + struct program *: eval_program, \ + struct statement *: eval_statement, \ + struct expression *: eval_expression \ + )(e, n) + +#endif diff --git a/include/hmap.h b/include/hmap.h new file mode 100644 index 0000000..853df1c --- /dev/null +++ b/include/hmap.h @@ -0,0 +1,53 @@ +#ifndef UNJA_HASHMAP_H +#define UNJA_HASHMAP_H +#include +#include + +#include "slice.h" + +#ifndef HASHMAP_CAP +#define HASHMAP_CAP 32 +#endif + +typedef void (hmap_cb)(const void *key, void *value); + +struct hmap { + struct hnode **buckets; + size_t cap; + size_t size; +}; + +/* allocate a new hmap */ +struct hmap *hmap_new_with_cap(size_t cap); + +#define hmap_new() hmap_new_with_cap(HASHMAP_CAP) + + +/* + * Inserts a key-value pair into the map. Returns NULL if map did not have key, + * old value if it did. + */ +void *hmap_sets(struct hmap *hm, struct slice key, void *value); +void *hmap_set(struct hmap *hm, const char *key, void *value); + +/* Returns a pointer to the value corresponding to the key. */ +void *hmap_gets(struct hmap *hm, const struct slice *key); +void *hmap_get(struct hmap *hm, const char *key); + +/* + * Removes a key from the map, returning the value at the key if the key was + * previously in the map. + */ +void *hmap_removes(struct hmap *hm, const struct slice *key); +void *hmap_remove(struct hmap *hm, const char *key); + +/* Iterate over keys in the hmap */ +void hmap_walk(struct hmap *hm, hmap_cb); + +/* free hmap related memory calling a function before freeing each node */ +void hmap_destroy(struct hmap *hm, hmap_cb cb); + +/* free hmap related memory */ +void hmap_free(struct hmap *hm); + +#endif diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..d693354 --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,25 @@ +#ifndef CMONKEY_LEXER_H +#define CMONKEY_LEXER_H + +#include "slice.h" +#include "token.h" + +#include + +struct lexer { + const char *input; + size_t len; + struct slice word; +}; + +struct lexer *lexer_new(); + +void lexer_reset(struct lexer *, const char *input); + +void lexer_read_char(struct lexer *); + +struct token lexer_next_token(struct lexer *); + +void lexer_destroy(struct lexer *); + +#endif diff --git a/include/object.h b/include/object.h new file mode 100644 index 0000000..55ac741 --- /dev/null +++ b/include/object.h @@ -0,0 +1,81 @@ +#ifndef CMONKEY_OBJECT_H +#define CMONKEY_OBJECT_H + +#include "ast.h" +#include "hmap.h" + +#include +#include + +enum object_type { + OBJECT_ERROR, + OBJECT_NULL, + OBJECT_INT, + OBJECT_BOOL, + OBJECT_RETURN, + OBJECT_FUNC, +}; + +struct error_object { + char *msg; +}; + +struct return_object { + struct object *value; +}; + +struct func_object { + struct vector *params; // identifier_expressions + struct statement *body; +}; + +struct object { + enum object_type type; + size_t refcount; + union { + bool boolean; + int64_t integer; + struct return_object retrn; + struct error_object error; + struct func_object func; + }; +}; + +struct environment { + struct hmap *store; + struct environment *outer; +}; + +char *object_sprint(struct object *, char *str); + +inline const char *object_type_print(enum object_type); + +struct object *object_new_int(int64_t val); +struct object *object_new_error(char *msg); +struct object *object_new_return(struct object *val); +struct object *object_new_func(struct expression *); + +#define object_new(v) _Generic((v), \ + int: object_new_int, \ + int64_t: object_new_int, \ + char *: object_new_error, \ + struct object *: object_new_return, \ + struct expression *: object_new_func \ + )(v) + +void object_ref(struct object *); + +void object_unref(struct object *); + +struct environment *environment_new_enclosed(struct environment *outer); + +#define environment_new() environment_new_enclosed(NULL) + +struct object *environment_set(struct environment *, + struct slice key, struct object *val); + +struct object *environment_get(struct environment *, const struct slice *key); + +void environment_destroy(struct environment *); + +#endif diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..b344a7b --- /dev/null +++ b/include/parser.h @@ -0,0 +1,33 @@ +#ifndef CMONKEY_PARSER_H +#define CMONKEY_PARSER_H + +#include "ast.h" +#include "hmap.h" +#include "lexer.h" +#include "token.h" +#include "vector.h" + +struct parser { + struct lexer *lexer; + struct token cur_token; + struct token peek_token; + struct vector *errors; + struct hmap *prefix_fns; + struct hmap *infix_fns; + struct hmap *precedences; +}; + +typedef struct expression *(*prefix_parse_f)(struct parser *); +typedef struct expression *(*infix_parse_f)(struct parser *, struct expression *); + +struct parser *parser_new(); + +void parser_reset(struct parser *, const char *input); + +void parser_next_token(struct parser *); + +struct program *parser_parse_program(struct parser *); + +void parser_destroy(struct parser *); + +#endif diff --git a/include/repl.h b/include/repl.h new file mode 100644 index 0000000..f7aeafc --- /dev/null +++ b/include/repl.h @@ -0,0 +1,8 @@ +#ifndef CMONKEY_REPL_H +#define CMONKEY_REPL_H + +#include + +void repl_start(FILE *in, FILE *out); + +#endif diff --git a/include/slice.h b/include/slice.h new file mode 100644 index 0000000..26f9fd7 --- /dev/null +++ b/include/slice.h @@ -0,0 +1,27 @@ +#ifndef CMONKEY_SLICE_H +#define CMONKEY_SLICE_H + +#include + +struct slice { + const char *str; + size_t start; + size_t end; +}; + +struct slice slice_new(const char *str, size_t start, size_t end); + +struct slice slice_fullstr(const char *str); + +void slice_set(struct slice *, const char *str, size_t start, size_t end); + +size_t slice_len(const struct slice *); + +/* Returns 0 if equal, 1 if a > b, -1 if a < b */ +int slice_cmp(const struct slice *restrict a, const struct slice *restrict b); + +void slice_cpy(struct slice *dst, const struct slice *src); + +char *slice_sprint(struct slice *, char *str); + +#endif diff --git a/include/tests/tests.h b/include/tests/tests.h new file mode 100644 index 0000000..8c89fcc --- /dev/null +++ b/include/tests/tests.h @@ -0,0 +1,45 @@ +#ifndef TESTS_H +#define TESTS_H +#include +#include + +#ifndef NOCOLOR +#define TBLD "\033[1m" +#define TRED "\033[31m" +#define TGRN "\033[32m" +#define TBLU "\033[34m" +#define TRST "\033[0m" +#else +#define TBLD "" +#define TRED "" +#define TGRN "" +#define TBLU "" +#define TRST "" +#endif + +#define RUN_TEST(test_func) \ + printf("%s:\t", #test_func); \ + fflush(stdout); \ + test_func(); \ + printf(TGRN "OK!\n" TRST) + +#define INIT_TESTS() \ + printf(TBLD "running %s tests\n" TRST, __FILE__) + +#define FAIL_TEST(reason) \ + printf(TBLD TRED "FAIL!\n" TRST); \ + printf("%s:%d: %s: ", __FILE__, __LINE__, __func__); \ + printf(reason); \ + abort() + +#define asserteq(a, b) \ + if (a != b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " == " #b TRST " failed\n"); \ + } + +#define assertneq(a, b) \ + if (a == b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " != " #b TRST " failed\n"); \ + } + +#endif diff --git a/include/token.h b/include/token.h new file mode 100644 index 0000000..2f3cbb3 --- /dev/null +++ b/include/token.h @@ -0,0 +1,55 @@ +#ifndef CMONKEY_TOKEN_H +#define CMONKEY_TOKEN_H + +#include "slice.h" + +enum token_type { + TOKEN_ILLEGAL, + TOKEN_EOF, + /* Identifiers/Literals */ + TOKEN_IDENT, + TOKEN_INT, + /* Operators */ + TOKEN_ASSIGN, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_BANG, + TOKEN_ASTERISK, + TOKEN_SLASH, + TOKEN_LT, + TOKEN_GT, + TOKEN_EQ, + TOKEN_NOTEQ, + /* Delimiters */ + TOKEN_COMMA, + TOKEN_SEMICOLON, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + /* Keywords */ + TOKEN_FUNC, + TOKEN_LET, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_RETURN, +}; + +struct token { + enum token_type type; + struct slice literal; +}; + +void token_init_keywords(void); + +enum token_type token_lookup_ident(const struct slice *ident); + +inline const char *token_type_print(enum token_type); + +char *token_sprint(struct token *, char *str); + +void token_free_keywords(void); + +#endif diff --git a/include/vector.h b/include/vector.h new file mode 100644 index 0000000..a34ecea --- /dev/null +++ b/include/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include +#include + +#define VEC_CAP 32 + +struct vector { + size_t cap; + size_t len; + void **values; +}; + +struct vector *vector_new_with_cap(size_t cap); + +#define vector_new() vector_new_with_cap(VEC_CAP) + +ssize_t vector_push(struct vector *, void *val); + +void vector_free(struct vector *); + +#define vector_foreach(vec, i, val) \ + for (i = 0, val = vec->values[i]; i < vec->len; i++, val = vec->values[i]) + +#endif diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..c86414e --- /dev/null +++ b/src/ast.c @@ -0,0 +1,381 @@ +#include "ast.h" +#include "slice.h" +#include "vector.h" + +#include +#include + +#define MEMCPY(d, s) memcpy(d, s, sizeof(*d)) + +#define MEMDUP(T, d, s) T d = malloc(sizeof(*d)); memcpy(d, s, sizeof(*d)) + +struct slice +expression_token_literal(struct expression *expr) +{ + return expr->token.literal; +} + +struct slice +statement_token_literal(struct statement *st) +{ + return st->token.literal; +} + +struct slice +program_token_literal(struct program *prog) +{ + if (prog->statements->len > 0) { + return node_token_literal( + (struct statement *)prog->statements->values[0]); + } else { + return slice_new("", 0, 0); + } +} + +static inline char * +let_statement_sprint(struct let_statement *st, char *str) +{ + char litbuf[32]; + char namebuf[32]; + sprintf(str, "%s %s = ", slice_sprint(&st->token.literal, litbuf), + slice_sprint(&st->name->value, namebuf)); + char *cur = str + strlen(str); + expression_sprint(st->value, cur); + cur += strlen(cur); + cur[0] = ';', cur[1] = '\0'; + return str; +} + +static inline char * +return_statement_sprint(struct return_statement *st, char *str) +{ + char litbuf[32]; + sprintf(str, "%s ", slice_sprint(&st->token.literal, litbuf)); + char *cur = str + strlen(str); + expression_sprint(st->value, cur); + cur += strlen(cur); + cur[0] = ';', cur[1] = '\0'; + return str; +} + +static inline char * +block_statement_sprint(struct block_statement *st, char *str) +{ + size_t i; + struct statement *v; + char *cur = str; + vector_foreach(st->statements, i, v) { + statement_sprint(v, cur); + cur += strlen(cur); + } + return str; +} + +static inline char * +expression_statement_sprint(struct expression_statement *st, char *str) +{ + return expression_sprint(st->expr, str); +} + +static inline char * +prefix_expression_sprint(struct prefix_expression *expr, char *str) +{ + *str = '('; + char *cur = str + 1; + slice_sprint(&expr->operator, cur); + cur += strlen(cur); + expression_sprint(expr->right, cur); + cur += strlen(cur); + cur[0] = ')', cur[1] = '\0'; + return str; +} + +static inline char * +infix_expression_sprint(struct infix_expression *expr, char *str) +{ + *str = '('; + char *cur = str + 1; + expression_sprint(expr->left, cur); + cur += strlen(cur); + *cur = ' ', cur++; + slice_sprint(&expr->operator, cur); + cur += strlen(cur); + *cur = ' ', cur++; + expression_sprint(expr->right, cur); + cur += strlen(cur); + cur[0] = ')', cur[1] = '\0'; + return str; +} + +static inline char * +if_expression_sprint(struct if_expression *expr, char *str) +{ + char *cur = str; + strcpy(cur, "if "); + cur += strlen(cur); + expression_sprint(expr->condition, cur); + cur += strlen(cur); + *cur = ' ', cur++; + statement_sprint(expr->consequence, cur); + if (expr->alternative) { + cur += strlen(cur); + strcpy(cur, "else "); + cur += strlen(cur); + statement_sprint(expr->alternative, cur); + } + + return str; +} + +static inline char * +func_literal_sprint(struct func_literal *expr, char *str) +{ + char *cur = str; + slice_sprint(&expr->token.literal, cur); + cur += strlen(cur); + *cur = '(', cur++; + size_t i; + struct expression *ident; + vector_foreach(expr->parameters, i, ident) { + slice_sprint(&ident->token.literal, cur); + cur += strlen(cur); + if (i < (expr->parameters->len - 1)) { + cur[0] = ',', cur[1] = ' ', cur += 2; + } + } + *cur = ')', cur++; + statement_sprint(expr->body, cur); + + return str; +} + +static inline char * +call_expression_sprint(struct call_expression *expr, char *str) +{ + char *cur = str; + expression_sprint(expr->func, cur); + cur += strlen(cur); + *cur = '(', cur++; + size_t i; + struct expression *arg; + vector_foreach(expr->arguments, i, arg) { + expression_sprint(arg, cur); + cur += strlen(cur); + if (i < (expr->arguments->len - 1)) { + cur[0] = ',', cur[1] = ' ', cur += 2; + } + } + cur[0] = ')', cur[1] = '\0'; + + return str; +} + +char * +expression_sprint(struct expression *expr, char *str) +{ + switch (expr->type) { + case EXPRESSION_PREFIX: + return prefix_expression_sprint(&expr->prefix, str); + case EXPRESSION_INFIX: + return infix_expression_sprint(&expr->infix, str); + case EXPRESSION_IF: + return if_expression_sprint(&expr->cond, str); + case EXPRESSION_FUNC: + return func_literal_sprint(&expr->func, str); + case EXPRESSION_CALL: + return call_expression_sprint(&expr->call, str); + case EXPRESSION_IDENT: + case EXPRESSION_INT: + case EXPRESSION_BOOL: + return slice_sprint(&expr->token.literal, str); + } +}; + +char * +statement_sprint(struct statement *st, char *str) +{ + switch (st->type) { + case STATEMENT_LET: + return let_statement_sprint(&st->let, str); + case STATEMENT_RETURN: + return return_statement_sprint(&st->retrn, str); + case STATEMENT_EXPRESSION: + return expression_statement_sprint(&st->expr, str); + case STATEMENT_BLOCK: + return block_statement_sprint(&st->block, str); + } +} + +char * +program_sprint(struct program *prog, char *str) +{ + size_t i; + struct statement *st; + char *cur = str; + vector_foreach(prog->statements, i, st) { + statement_sprint(st, cur); + cur += strlen(cur); + } + return str; +} + +struct vector * +expression_vector_dup(const struct vector *vec) +{ + struct vector *dupv = vector_new_with_cap(vec->cap); + size_t i; + struct expression *expr; + vector_foreach(vec, i, expr) { + struct expression *dupexpr = node_dup(expr); + vector_push(dupv, dupexpr); + } + + return dupv; +} + +struct vector * +statement_vector_dup(const struct vector *vec) +{ + struct vector *dupv = vector_new_with_cap(vec->cap); + size_t i; + struct statement *subst; + vector_foreach(vec, i, subst) { + struct statement *dupsub = node_dup(subst); + vector_push(dupv, dupsub); + } + + return dupv; +} + +struct expression * +expression_dup(const struct expression *expr) +{ + MEMDUP(struct expression *, dupd, expr); + switch (expr->type) { + case EXPRESSION_IDENT: + case EXPRESSION_INT: + case EXPRESSION_BOOL: + return dupd; + case EXPRESSION_PREFIX: + dupd->prefix.right = node_dup(expr->prefix.right); + return dupd; + case EXPRESSION_INFIX: + dupd->infix.left = node_dup(expr->infix.left); + dupd->infix.right = node_dup(expr->infix.right); + return dupd; + case EXPRESSION_IF: + dupd->cond.condition = node_dup(expr->cond.condition); + dupd->cond.consequence = node_dup(expr->cond.consequence); + dupd->cond.alternative = node_dup(expr->cond.alternative); + return dupd; + case EXPRESSION_FUNC: + dupd->func.parameters = expression_vector_dup(expr->func.parameters); + dupd->func.body = node_dup(expr->func.body); + return dupd; + case EXPRESSION_CALL: + dupd->call.func = node_dup(expr->call.func); + dupd->call.arguments = expression_vector_dup(expr->call.arguments); + return dupd; + } +} + +struct statement * +statement_dup(const struct statement *st) +{ + MEMDUP(struct statement *, dupd, st); + switch (st->type) { + case STATEMENT_LET: + MEMDUP(, dupd->let.name, st->let.name); + dupd->let.value = node_dup(st->let.value); + return dupd; + case STATEMENT_RETURN: + dupd->retrn.value = node_dup(st->retrn.value); + return dupd; + case STATEMENT_EXPRESSION: + dupd->expr.expr = node_dup(st->expr.expr); + return dupd; + case STATEMENT_BLOCK: + dupd->block.statements = statement_vector_dup(st->block.statements); + return dupd; + } +} + +void +expression_destroy(struct expression *expr) +{ + switch (expr->type) { + case EXPRESSION_PREFIX: + expression_destroy(expr->prefix.right); + break; + case EXPRESSION_INFIX: + expression_destroy(expr->infix.right); + expression_destroy(expr->infix.left); + break; + case EXPRESSION_IF: + expression_destroy(expr->cond.condition); + statement_destroy(expr->cond.consequence); + if (expr->cond.alternative) statement_destroy(expr->cond.alternative); + break; + case EXPRESSION_FUNC: { + size_t i; + struct expression *param; + vector_foreach(expr->func.parameters, i ,param) { + node_destroy(param); + } + vector_free(expr->func.parameters); + node_destroy(expr->func.body); + break; + } + case EXPRESSION_CALL: { + size_t i; + struct expression *arg; + vector_foreach(expr->call.arguments, i ,arg) { + node_destroy(arg); + } + expression_destroy(expr->call.func); + vector_free(expr->call.arguments); + break; + } + default: + break; + } + free(expr); +} + +void +statement_destroy(struct statement *st) +{ + switch (st->type) { + case STATEMENT_LET: + free(st->let.name); + node_destroy(st->let.value); + break; + case STATEMENT_RETURN: + node_destroy(st->retrn.value); + break; + case STATEMENT_EXPRESSION: + if (st->expr.expr) node_destroy(st->expr.expr); + break; + case STATEMENT_BLOCK:; + size_t i; + struct statement *subst; + vector_foreach(st->block.statements, i ,subst) { + node_destroy(subst); + } + vector_free(st->block.statements); + break; + } + free(st); +} + +void +program_destroy(struct program *prog) +{ + size_t i; + struct statement *st; + vector_foreach(prog->statements, i ,st) { + node_destroy(st); + } + vector_free(prog->statements); + free(prog); +} diff --git a/src/cmonkey.c b/src/cmonkey.c new file mode 100644 index 0000000..64e61d6 --- /dev/null +++ b/src/cmonkey.c @@ -0,0 +1,16 @@ +#include "repl.h" + +#include + +int +main(int argc, char *argv[]) +{ + /* TODO: read from file, interpret and exit + if (argc > 1) { + FILE *in = fopen(argv[1], "r"); + } + */ + printf("Welcome to the CMonkey interpreter of the Monkey language!\n"); + printf("Enter any commands to begin\n"); + repl_start(stdin, stdout); +} diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..c1a6a10 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,333 @@ +#include "eval.h" +#include "ast.h" +#include "object.h" +#include "token.h" + +#include +#include + +static struct object obj_null = { + OBJECT_NULL, + 2, + { 0 }, +}; +static struct object obj_true = { + OBJECT_BOOL, + 2, + .boolean = true, +}; +static struct object obj_false = { + OBJECT_BOOL, + 2, + .boolean = false, +}; + +static inline struct object * +get_bool_object(bool val) +{ + struct object *obj = val ? &obj_true : &obj_false; + return obj; +} + +static struct object * +new_error(char *fmt, ...) +{ + char *msg = malloc(1024); + struct object *obj = object_new(msg); + + va_list args; + va_start(args, fmt); + vsprintf(obj->error.msg, fmt, args); + va_end(args); + + return obj; +} + +static struct object * +eval_statements(struct environment *env, struct vector *sts) +{ + size_t i; + struct object *res; + struct statement *st; + vector_foreach(sts, i, st) { + res = eval(env, st); + if (res->type == OBJECT_RETURN || res->type == OBJECT_ERROR) { + return res; + } + } + return res; +} + +static inline struct object * +eval_bang_prefix_expression(struct environment *env, struct object *right) +{ + struct object *res; + res = get_bool_object(!right->integer); + object_unref(right); + return res; +} + +static inline struct object * +eval_minus_prefix_expression(struct environment *env, struct object *right) +{ + struct object *res; + if (right->type != OBJECT_INT) { + struct object *error = new_error("unknown operator: -%s", + object_type_print(right->type)); + object_unref(right); + return error; + } + res = object_new(-right->integer); + object_unref(right); + return res; +} + +static struct object * +eval_prefix_expression(struct environment *env, struct token op, + struct object *right) +{ + struct object *res; + switch (op.type) { + case TOKEN_BANG: + return eval_bang_prefix_expression(env, right); + case TOKEN_MINUS: + return eval_minus_prefix_expression(env, right); + default:; + char buf[256]; + struct object *error = new_error("unknown operator: %s%s", + slice_sprint(&op.literal, buf), object_type_print(right->type)); + object_unref(right); + return error; + } +} + +static inline struct object * +eval_integer_infix_expression(struct environment *env, struct token op, + struct object *left, struct object *right) +{ + bool bres; + struct object *ires; + switch (op.type) { + case TOKEN_PLUS: + ires = object_new(left->integer + right->integer); + break; + case TOKEN_MINUS: + ires = object_new(left->integer - right->integer); + break; + case TOKEN_ASTERISK: + ires = object_new(left->integer * right->integer); + break; + case TOKEN_SLASH: + ires = object_new(left->integer / right->integer); + break; + case TOKEN_LT: + bres = left->integer < right->integer; + goto boolres; + case TOKEN_GT: + bres = left->integer > right->integer; + goto boolres; + case TOKEN_EQ: + bres = left->integer == right->integer; + goto boolres; + case TOKEN_NOTEQ: + bres = left->integer != right->integer; + goto boolres; + default:; + char buf[256]; + struct object *error = new_error("unknown operator: %s %s %s", + object_type_print(left->type), slice_sprint(&op.literal, buf), + object_type_print(right->type)); + object_unref(left); + object_unref(right); + return error; + } + object_unref(left); + object_unref(right); + return ires; +boolres: + object_unref(left); + object_unref(right); + return get_bool_object(bres); +} + +static inline struct object * +eval_infix_expression(struct environment *env, struct token op, + struct object *left, struct object *right) +{ + if (left->type == OBJECT_INT && right->type == OBJECT_INT) { + return eval_integer_infix_expression(env, op, left, right); + } + bool res; + switch (op.type) { + case TOKEN_EQ: + res = left->integer == right->integer; + break; + case TOKEN_NOTEQ: + res = left->integer != right->integer; + break; + default:; + char buf[256]; + struct object *error; + if (left->type != right->type) { + error = new_error("type mismatch: %s %s %s", + object_type_print(left->type), slice_sprint(&op.literal, buf), + object_type_print(right->type)); + } else { + error = new_error("unknown operator: %s %s %s", + object_type_print(left->type), slice_sprint(&op.literal, buf), + object_type_print(right->type)); + } + object_unref(left); + object_unref(right); + return error; + } + object_unref(left); + object_unref(right); + return get_bool_object(res); +} + +static inline struct object * +eval_if_expression(struct environment *env, struct if_expression *expr) +{ + struct object *cond = eval(env, expr->condition); + struct object *res; + if (cond->boolean) { + res = eval(env, expr->consequence); + } else if (expr->alternative) { + res = eval(env, expr->alternative); + } else { + res = &obj_null; + } + object_unref(cond); + return res; +} + +static inline struct object * +eval_call_expression(struct environment *env, struct expression *expr) +{ + struct object *func = eval(env, expr->call.func); + if (func->type == OBJECT_ERROR) { + return func; + } else if (func->type != OBJECT_FUNC) { + char buf[256]; + struct object *err = new_error("not a function: %s", + object_sprint(func, buf)); + object_unref(func); + return err; + } + size_t i; + struct expression *arg; + struct environment *fenv = environment_new_enclosed(env); + vector_foreach(expr->call.arguments, i, arg) { + struct object *obj = eval(env, arg); + if (obj->type == OBJECT_ERROR) { + object_unref(func); + return obj; + } + struct expression *param = func->func.params->values[i]; + environment_set(fenv, param->ident.value, obj); + object_unref(obj); + } + struct object *res = eval(fenv, func->func.body); + if (res->type == OBJECT_RETURN) { + struct object *ret = res; + res = ret->retrn.value; + object_ref(res); + object_unref(ret); + } + object_unref(func); + environment_destroy(fenv); + return res; +} + +struct object * +eval_expression(struct environment *env, struct expression *expr) +{ + struct object *res = NULL; + switch (expr->type) { + case EXPRESSION_INT: + res = object_new(expr->integer.value); + return res; + case EXPRESSION_BOOL: + return expr->boolean.value ? &obj_true : &obj_false; + case EXPRESSION_PREFIX: + res = eval(env, expr->prefix.right); + if (res == OBJECT_ERROR) return res; + return eval_prefix_expression(env, expr->token, res); + case EXPRESSION_INFIX:; + struct object *left = eval(env, expr->infix.left); + if (left == OBJECT_ERROR) return res; + struct object *right = eval(env, expr->infix.right); + if (right == OBJECT_ERROR) { + object_unref(left); + return right; + } + return eval_infix_expression(env, expr->token, left, right); + case EXPRESSION_IF: + return eval_if_expression(env, &expr->cond); + case EXPRESSION_IDENT: + res = environment_get(env, &expr->ident.value); + if (!res) { + char ibuf[256]; + return new_error("not declared: %s", + slice_sprint(&expr->ident.value, ibuf)); + } + object_ref(res); + return res; + case EXPRESSION_FUNC: + res = object_new(expr); + return res; + case EXPRESSION_CALL: + return eval_call_expression(env, expr); + default: + return NULL; + } +} + +struct object * +eval_statement(struct environment *env, struct statement *st) +{ + struct object *val; + switch (st->type) { + case STATEMENT_EXPRESSION: + return eval(env, st->expr.expr); + case STATEMENT_BLOCK: + return eval_statements(env, st->block.statements); + case STATEMENT_RETURN: + val = eval(env, st->retrn.value); + if (val->type == OBJECT_ERROR) return val; + struct object *res = object_new(val); + return res; + case STATEMENT_LET: + val = eval(env, st->let.value); + if (val->type == OBJECT_ERROR) return val; + struct object *old = environment_set(env, st->let.name->value, val); + if (old) object_unref(old); + object_unref(val); + default: + return NULL; + } +} + +struct object * +eval_program(struct environment *env, struct program *prog) +{ + size_t i; + struct object *res = NULL; + struct statement *st; + vector_foreach(prog->statements, i, st) { + if (res) object_unref(res); + res = eval(env, st); + if (res) { + if (res->type == OBJECT_RETURN) { + struct object *val = res->retrn.value; + object_ref(val); + object_unref(res); + return val; + } else if (res->type == OBJECT_ERROR) { + return res; + } + } + } + return res; +} diff --git a/src/hmap.c b/src/hmap.c new file mode 100644 index 0000000..c605fad --- /dev/null +++ b/src/hmap.c @@ -0,0 +1,225 @@ +#include "hmap.h" +#include "slice.h" + +#include +#include +#include +#include + +#if SIZE_MAX == 0xFFFFFFFF +/* If size_t is 32bit */ +static const size_t fnv_prime = 16777619u; +static const size_t fnv_offsetb = 2166136261u; +#elif SIZE_MAX == 0xFFFFFFFFFFFFFFFF +/* If size_t is 64bit */ +static const size_t fnv_prime = 1099511628211u; +static const size_t fnv_offsetb = 14695981039346656037u; +#else +/* If size_t is 128bit. Maybe this is overdoing it? */ +static const size_t fnv_prime = 309485009821345068724781371u; +static const size_t fnv_offsetb = 144066263297769815596495629667062367629u; +#endif + +struct hnode { + struct slice key; + void *value; + struct hnode *next; +}; + +/* FNV1a */ +static size_t +hash(const char *str) +{ + size_t hash = fnv_offsetb; + size_t i = 0; + while (str[i] != '\0') { + hash ^= str[i]; + hash *= fnv_prime; + i++; + } + + return hash; +} + +static size_t +hash_slice(const struct slice *slice) +{ + size_t hash = fnv_offsetb; + size_t i = slice->start; + while (i < slice->end) { + hash ^= slice->str[i]; + hash *= fnv_prime; + i++; + } + + return hash; +} + +struct hmap * +hmap_new_with_cap(size_t cap) +{ + struct hmap *hm = malloc(sizeof *hm); + hm->cap = cap; + hm->size = 0; + if (hm == NULL) return NULL; + hm->buckets = calloc(cap, sizeof hm->buckets); + if (hm->buckets == NULL) { + free(hm); + return NULL; + } + + return hm; +} + +void * +hmap_sets(struct hmap *hm, struct slice key, void *value) +{ + int pos = hash_slice(&key) % hm->cap; + struct hnode *head = hm->buckets[pos]; + struct hnode *node = head; + void *old_value = NULL; + + while (node) { + if (slice_cmp(&node->key, &key) == 0) { + old_value = node->value; + node->value = value; + return old_value; + } + node = node->next; + } + + node = malloc(sizeof *node); + node->key = key; + node->value = value; + node->next = head; + hm->buckets[pos] = node; + hm->size++; + return old_value; +} + +void * +hmap_set(struct hmap *hm, const char *k, void *value) +{ + struct slice key = slice_fullstr(k); + return hmap_sets(hm, key, value); +} + +void * +hmap_gets(struct hmap *hm, const struct slice *key) +{ + size_t pos = hash_slice(key) % hm->cap; + struct hnode *node = hm->buckets[pos]; + while (node != NULL) { + if (slice_cmp(&node->key, key) == 0) { + return node->value; + } + + node = node->next; + } + + return NULL; +} + +void * +hmap_get(struct hmap *hm, const char *k) +{ + struct slice key = slice_fullstr(k); + return hmap_gets(hm, &key); +} + +void * +hmap_resolve(struct hmap *hm, const char *key) +{ + char tmp_key[64]; + int i = 0; + int j = 0; + + while (1) { + for (j=0; key[i] != '.' && key[i] != '\0'; i++, j++) { + tmp_key[j] = key[i]; + } + tmp_key[j] = '\0'; + hm = hmap_get(hm, tmp_key); + + // stop if we read key to end of string + if (key[i] == '\0') { + break; + } + + // otherwise, continue reading keys + i++; + } + + return hm; +} + +void * +hmap_removes(struct hmap *hm, const struct slice *key) +{ + int pos = hash_slice(key) % hm->cap; + struct hnode *node = hm->buckets[pos]; + struct hnode *prev = NULL; + void *old_value; + + while (node) { + if (slice_cmp(&node->key, key) == 0) { + if (prev) { + prev->next = node->next; + } else { + hm->buckets[pos] = node->next; + } + old_value = node->value; + free(node); + hm->size--; + return old_value; + } + + prev = node; + node = node->next; + } + + return NULL; +} + +void * +hmap_remove(struct hmap *hm, const char *k) +{ + struct slice key = slice_fullstr(k); + return hmap_removes(hm, &key); +} + +#define HMAP_WALK(hm, ...) \ + struct hnode *node; \ + struct hnode *next; \ + for (size_t i = 0; i < hm->cap; i++) { \ + node = hm->buckets[i]; \ + while (node) { \ + next = node->next; \ + __VA_ARGS__; \ + node = next; \ + } \ + } + +void +hmap_walk(struct hmap *hm, hmap_cb cb) +{ + HMAP_WALK(hm, cb(&node->key, node->value)); +} + +void +hmap_destroy(struct hmap *hm, hmap_cb cb) +{ + HMAP_WALK(hm, cb(&node->key, node->value), free(node)); + + free(hm->buckets); + free(hm); +} + +void +hmap_free(struct hmap *hm) +{ + HMAP_WALK(hm, free(node)); + + free(hm->buckets); + free(hm); +} diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..7ee9cbd --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,187 @@ +#include "lexer.h" +#include "token.h" + +#include +#include +#include +#include + +static bool +isidentc(char c) +{ + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +static void +lexer_eatspace(struct lexer *lexer) +{ + while(isspace(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } +} + +static void +set_token(struct token *token, enum token_type t, const struct slice *s) +{ + token->type = t; + if (s == NULL) { + token->literal.str = ""; + token->literal.start = 0; + token->literal.end = 0; + } else { + slice_cpy(&token->literal, s); + } +} + +static void +lexer_read_ident(struct lexer *lexer, struct token *token) +{ + size_t start = lexer->word.start; + token->literal.str = lexer->input; + while (isidentc(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } + token->literal.start = start; + token->literal.end = lexer->word.start; +} + +static void +lexer_read_num(struct lexer *lexer, struct token *token) +{ + size_t start = lexer->word.start; + token->literal.str = lexer->input; + while (isdigit(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } + token->literal.start = start; + token->literal.end = lexer->word.start; +} + +static char +lexer_peek_char(struct lexer *lexer) +{ + if (lexer->word.start >= lexer->len) { + return 0; + } + return lexer->input[lexer->word.start + 1]; +} + +struct lexer * +lexer_new() +{ + struct lexer *lexer = malloc(sizeof *lexer); + token_init_keywords(); + + return lexer; +} + +void +lexer_read_char(struct lexer *lexer) +{ + lexer->word.start = lexer->word.end; + if (lexer->word.end >= lexer->len) { + lexer->word.end = 0; + return; + } + lexer->word.end++; +} + +struct token +lexer_next_token(struct lexer *lexer) +{ + lexer_eatspace(lexer); + + struct token token; + char c = lexer->input[lexer->word.start]; + + switch (c) { + case '=': + if (lexer_peek_char(lexer) == '=') { + lexer->word.end++; + set_token(&token, TOKEN_EQ, &lexer->word); + } else { + set_token(&token, TOKEN_ASSIGN, &lexer->word); + } + break; + case '+': + set_token(&token, TOKEN_PLUS, &lexer->word); + break; + case '-': + set_token(&token, TOKEN_MINUS, &lexer->word); + break; + case '!': + if (lexer_peek_char(lexer) == '=') { + lexer->word.end++; + set_token(&token, TOKEN_NOTEQ, &lexer->word); + } else { + set_token(&token, TOKEN_BANG, &lexer->word); + } + break; + case '/': + set_token(&token, TOKEN_SLASH, &lexer->word); + break; + case '*': + set_token(&token, TOKEN_ASTERISK, &lexer->word); + break; + case '<': + set_token(&token, TOKEN_LT, &lexer->word); + break; + case '>': + set_token(&token, TOKEN_GT, &lexer->word); + break; + case ';': + set_token(&token, TOKEN_SEMICOLON, &lexer->word); + break; + case '(': + set_token(&token, TOKEN_LPAREN, &lexer->word); + break; + case ')': + set_token(&token, TOKEN_RPAREN, &lexer->word); + break; + case ',': + set_token(&token, TOKEN_COMMA, &lexer->word); + break; + case '{': + set_token(&token, TOKEN_LBRACE, &lexer->word); + break; + case '}': + set_token(&token, TOKEN_RBRACE, &lexer->word); + break; + case 0: + set_token(&token, TOKEN_EOF, NULL); + break; + default: + if (isidentc(c)) { + lexer_read_ident(lexer, &token); + token.type = token_lookup_ident(&token.literal); + return token; + } else if (isdigit(c)) { + lexer_read_num(lexer, &token); + token.type = TOKEN_INT; + return token; + } + set_token(&token, TOKEN_ILLEGAL, &lexer->word); + } + + lexer_read_char(lexer); + + return token; +} + +void +lexer_reset(struct lexer *lexer, const char *input) +{ + lexer->input = input; + lexer->len = strlen(lexer->input); + lexer->word.str = lexer->input; + lexer->word.start = 0; + lexer->word.end = 0; + lexer_read_char(lexer); +} + +void +lexer_destroy(struct lexer *lexer) +{ + free(lexer); + token_free_keywords(); +} diff --git a/src/object.c b/src/object.c new file mode 100644 index 0000000..d3f589d --- /dev/null +++ b/src/object.c @@ -0,0 +1,195 @@ +#include "object.h" +#include "ast.h" +#include "hmap.h" +#include "vector.h" + +#include +#include +#include + +static const char *object_types[] = { + "error", + "null", + "int", + "bool", + "return", +}; + +static void +environment_destroy_cb(const void *key, void *val) +{ + struct object *obj = val; + object_unref(obj); +} + +static inline char * +bool_sprint(bool val, char *str) +{ + sprintf(str, val ? "true" : "false"); + return str; +} + +extern inline const char * +object_type_print(enum object_type type) +{ + return object_types[type]; +} + +static inline char * +object_func_sprint(struct object *obj, char *str) +{ + char *cur = str; + sprintf(cur, "fn("); + cur += strlen(cur); + size_t i; + struct expression *ident; + vector_foreach(obj->func.params, i, ident) { + node_sprint(ident, cur); + cur += strlen(cur); + if (i < (obj->func.params->len - 1)) { + cur[0] = ',', cur[1] = ' ', cur += 2; + } + } + sprintf(cur, ") {\n"); + cur += strlen(cur); + node_sprint(obj->func.body, cur); + cur += strlen(cur); + sprintf(cur, "\n}"); + + return str; +} + +char * +object_sprint(struct object *obj, char *str) +{ + switch (obj->type) { + case OBJECT_NULL: + sprintf(str, "null"); + break; + case OBJECT_BOOL: + return bool_sprint(obj->boolean, str); + case OBJECT_INT: + sprintf(str, "%ld", obj->integer); + break; + case OBJECT_RETURN: + object_sprint(obj->retrn.value, str); + case OBJECT_ERROR: + strcpy(str, obj->error.msg); + case OBJECT_FUNC: + return object_func_sprint(obj, str); + } + return str; +} + +struct object * +object_new_int(int64_t val) +{ + struct object *obj = malloc(sizeof(*obj)); + obj->type = OBJECT_INT; + obj->refcount = 1; + obj->integer = val; + return obj; +} + +struct object * +object_new_error(char *msg) +{ + struct object *obj = malloc(sizeof(*obj)); + obj->type = OBJECT_ERROR; + obj->refcount = 1; + obj->error.msg = msg; + return obj; +} + +struct object * +object_new_return(struct object *val) +{ + struct object *obj = malloc(sizeof(*obj)); + obj->type = OBJECT_RETURN; + obj->refcount = 1; + obj->retrn.value = val; + return obj; +} + +struct object * +object_new_func(struct expression *expr) +{ + struct object *obj = malloc(sizeof(*obj)); + obj->type = OBJECT_FUNC; + obj->refcount = 1; + obj->func.body = node_dup(expr->func.body); + obj->func.params = expression_vector_dup(expr->func.parameters); + return obj; +} + +void +object_ref(struct object *obj) +{ + obj->refcount++; +} + +void +object_unref(struct object *obj) +{ + if (obj->type == OBJECT_NULL || obj->type == OBJECT_BOOL) return; + if (--obj->refcount < 1) { + switch (obj->type) { + case OBJECT_RETURN: + object_unref(obj->retrn.value); + break; + case OBJECT_ERROR: + free(obj->error.msg); + break; + case OBJECT_FUNC: + node_destroy(obj->func.body); + size_t i; + struct expression *param; + vector_foreach(obj->func.params, i, param) { + node_destroy(param); + } + vector_free(obj->func.params); + break; + default: + break; + } + free(obj); + } +} + +struct environment * +environment_new_enclosed(struct environment *outer) +{ + struct environment *env = malloc(sizeof(*env)); + env->store = hmap_new(); + if (!env->store) { + free(env); + return NULL; + } + env->outer = outer; + + return env; +} + +struct object * +environment_set(struct environment *env, struct slice key, struct object *val) +{ + object_ref(val); + return hmap_sets(env->store, key, val); +} + +struct object * +environment_get(struct environment *env, const struct slice *key) +{ + struct object *obj = hmap_gets(env->store, key); + if (!obj && env->outer) { + obj = environment_get(env->outer, key); + } + return obj; +} + +void +environment_destroy(struct environment *env) +{ + hmap_destroy(env->store, environment_destroy_cb); + free(env); +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..e8c44df --- /dev/null +++ b/src/parser.c @@ -0,0 +1,605 @@ +#include "parser.h" +#include "ast.h" +#include "token.h" +#include "vector.h" + +#include +#include + +enum precedence { + PRE_LOWEST = 1, + PRE_EQUALS, + PRE_LG, + PRE_SUM, + PRE_PROD, + PRE_PREFIX, + PRE_CALL, +}; + +static enum precedence precedences[] = { + 0, + PRE_LOWEST, + PRE_EQUALS, + PRE_LG, + PRE_SUM, + PRE_PROD, + PRE_PREFIX, + PRE_CALL, +}; + +static inline void +parser_register_prefix(struct parser *parser, enum token_type t, + prefix_parse_f fn) +{ + hmap_set(parser->prefix_fns, token_type_print(t), fn); +} + +static inline void +parser_register_infix(struct parser *parser, enum token_type t, + infix_parse_f fn) +{ + hmap_set(parser->infix_fns, token_type_print(t), fn); +} + +static inline void +parser_register_precedence(struct parser *parser, enum token_type t, + enum precedence pre) +{ + hmap_set(parser->precedences, token_type_print(t), &precedences[pre]); +} + +static inline prefix_parse_f +parser_get_prefix(struct parser *parser, enum token_type t) +{ + return hmap_get(parser->prefix_fns, token_type_print(t)); +} + +static inline infix_parse_f +parser_get_infix(struct parser *parser, enum token_type t) +{ + return hmap_get(parser->infix_fns, token_type_print(t)); +} + +static inline enum precedence +parser_get_precedence(struct parser *parser, enum token_type t) +{ + enum precedence *pre = hmap_get(parser->precedences, token_type_print(t)); + if (!pre) return PRE_LOWEST; + return *pre; +} + +static inline bool +parser_cur_token_is(struct parser *parser, enum token_type t) +{ + return parser->cur_token.type == t; +} + +static inline bool +parser_peek_token_is(struct parser *parser, enum token_type t) +{ + return parser->peek_token.type == t; +} + +static inline enum precedence +parser_peek_precedence(struct parser *parser) +{ + return parser_get_precedence(parser, parser->peek_token.type); +} + +static inline enum precedence +parser_cur_precedence(struct parser *parser) +{ + return parser_get_precedence(parser, parser->cur_token.type); +} + +static inline void +parser_peek_error(struct parser *parser, enum token_type t) +{ + char *msg = malloc(sizeof(*msg) * 1024); + if (msg) { + sprintf(msg, "expected token %s, got %s", token_type_print(t), + token_type_print(parser->peek_token.type)); + vector_push(parser->errors, msg); + } +} + +static inline bool +parser_expect_peek(struct parser *parser, enum token_type t) +{ + if (parser_peek_token_is(parser, t)) { + parser_next_token(parser); + return true; + } + parser_peek_error(parser, t); + return false; +} + +static inline void +parser_no_prefix_fn_error(struct parser *parser, enum token_type t) +{ + char *msg = malloc(256); + sprintf(msg, "%s not recognized as prefix", token_type_print(t)); + vector_push(parser->errors, msg); +} + +static struct expression * +parser_parse_expression(struct parser *parser, enum precedence pre) +{ + prefix_parse_f prefix = parser_get_prefix(parser, parser->cur_token.type); + if (!prefix) { + parser_no_prefix_fn_error(parser, parser->cur_token.type); + return NULL; + } + struct expression *lexpr = prefix(parser); + + while (pre < parser_peek_precedence(parser) + && !parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + infix_parse_f infix = parser_get_infix(parser, parser->peek_token.type); + if (!infix) { + return lexpr; + } + + parser_next_token(parser); + lexpr = infix(parser, lexpr); + } + + return lexpr; +} + +static struct expression * +parser_parse_identifier(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_IDENT; + expr->token = parser->cur_token; + expr->ident.value = parser->cur_token.literal; + + return expr; +} + +static struct expression * +parser_parse_integer(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_INT; + expr->token = parser->cur_token; + + char *end; + expr->integer.value = strtol(expr->token.literal.str + + expr->token.literal.start, + &end, 0); + if (*end != '\0' && (expr->token.literal.str + expr->token.literal.end) < end) { + char *msg = malloc(256); + char litbuf[32]; + sprintf(msg, "%s is not a valid integer", + slice_sprint(&expr->token.literal, litbuf)); + vector_push(parser->errors, msg); + free(expr); + return NULL; + } + + return expr; +} + +static struct expression * +parser_parse_boolean(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_BOOL; + expr->token = parser->cur_token; + expr->boolean.value = expr->token.type == TOKEN_TRUE; + + return expr; +} + +static struct expression * +parser_parse_grouped(struct parser *parser) +{ + parser_next_token(parser); + struct expression *expr = parser_parse_expression(parser, PRE_LOWEST); + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + return NULL; + } + + return expr; +} + +static struct expression * +parser_parse_prefix(struct parser *parser) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_PREFIX; + expr->token = parser->cur_token; + expr->prefix.operator = parser->cur_token.literal; + + parser_next_token(parser); + expr->prefix.right = parser_parse_expression(parser, PRE_PREFIX); + + return expr; +} + +static struct expression * +parser_parse_infix(struct parser *parser, struct expression *lexpr) +{ + struct expression *expr = malloc(sizeof(*expr)); + expr->type = EXPRESSION_INFIX; + expr->token = parser->cur_token; + expr->infix.operator = parser->cur_token.literal; + expr->infix.left = lexpr; + + enum precedence pre = parser_cur_precedence(parser); + parser_next_token(parser); + expr->infix.right = parser_parse_expression(parser, pre); + + return expr; +} + +static struct statement * +parser_parse_let_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_LET; + st->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_IDENT)) goto fail; + + st->let.name = malloc(sizeof(*st->let.name)); + if (!st->let.name) goto fail; + st->let.name->token = parser->cur_token; + st->let.name->value = parser->cur_token.literal; + + if (!parser_expect_peek(parser, TOKEN_ASSIGN)) goto ass_fail; + + parser_next_token(parser); + + st->let.value = parser_parse_expression(parser, PRE_LOWEST); + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +ass_fail: + free(st->let.name); +fail: + free(st); + return NULL; +} + +static struct statement * +parser_parse_return_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_RETURN; + st->token = parser->cur_token; + + parser_next_token(parser); + + st->retrn.value = parser_parse_expression(parser, PRE_LOWEST); + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +} + +static struct statement * +parser_parse_expression_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_EXPRESSION; + st->token = parser->cur_token; + st->expr.expr = parser_parse_expression(parser, PRE_LOWEST); + + if (parser_peek_token_is(parser, TOKEN_SEMICOLON)) { + parser_next_token(parser); + } + + return st; +} + +static struct statement *parser_parse_statement(struct parser *); + +static struct statement * +parser_parse_block_statement(struct parser *parser) +{ + struct statement *st = calloc(1, sizeof(*st)); + if (!st) return NULL; + st->type = STATEMENT_BLOCK; + st->block.statements = vector_new(); + parser_next_token(parser); + + while (!parser_cur_token_is(parser, TOKEN_RBRACE) + && !parser_cur_token_is(parser, TOKEN_EOF)) { + struct statement *subst = parser_parse_statement(parser); + if (subst) { + vector_push(st->block.statements, subst); + } + parser_next_token(parser); + } + + return st; +} + +static struct expression * +parser_parse_if(struct parser *parser) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->type = EXPRESSION_IF; + expr->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_LPAREN)) { + goto fail; + } + + parser_next_token(parser); + expr->cond.condition = parser_parse_expression(parser, PRE_LOWEST); + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + goto fail; + } + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + + expr->cond.consequence = parser_parse_block_statement(parser); + + if (parser_peek_token_is(parser, TOKEN_ELSE)) { + parser_next_token(parser); + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + expr->cond.alternative = parser_parse_block_statement(parser); + } + + return expr; +fail: + free(expr); + return NULL; +} + +static struct vector * +parser_parse_func_params(struct parser *parser) +{ + struct vector *idents = vector_new_with_cap(8); + + if (parser_peek_token_is(parser, TOKEN_RPAREN)) { + parser_next_token(parser); + return idents; + } + + parser_next_token(parser); + + struct expression *ident = calloc(1, sizeof(*ident)); + ident->token = parser->cur_token; + ident->ident.value = parser->cur_token.literal; + vector_push(idents, ident); + while (parser_peek_token_is(parser, TOKEN_COMMA)) { + parser_next_token(parser); + parser_next_token(parser); + ident = calloc(1, sizeof(*ident)); + ident->token = parser->cur_token; + ident->ident.value = parser->cur_token.literal; + vector_push(idents, ident); + } + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + size_t i; + vector_foreach(idents, i, ident) { + expression_destroy(ident); + } + vector_free(idents); + return NULL; + } + + return idents; +} + +static struct expression * +parser_parse_func_literal(struct parser *parser) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->type = EXPRESSION_FUNC; + expr->token = parser->cur_token; + + if (!parser_expect_peek(parser, TOKEN_LPAREN)) { + goto fail; + } + expr->func.parameters = parser_parse_func_params(parser); + if (!parser_expect_peek(parser, TOKEN_LBRACE)) { + goto fail; + } + + expr->func.body = parser_parse_block_statement(parser); + + return expr; +fail: + free(expr); + return NULL; +} + +static struct vector * +parser_parse_call_arguments(struct parser *parser) +{ + struct vector *args = vector_new_with_cap(8); + + if (parser_peek_token_is(parser, TOKEN_RPAREN)) { + parser_next_token(parser); + return args; + } + + parser_next_token(parser); + vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); + + while (parser_peek_token_is(parser, TOKEN_COMMA)) { + parser_next_token(parser); + parser_next_token(parser); + vector_push(args, parser_parse_expression(parser, PRE_LOWEST)); + } + + if (!parser_expect_peek(parser, TOKEN_RPAREN)) { + size_t i; + struct expression *expr; + vector_foreach(args, i, expr) { + expression_destroy(expr); + } + vector_free(args); + return NULL; + } + + return args; +} + +static struct expression * +parser_parse_call_expression(struct parser *parser, struct expression *func) +{ + struct expression *expr = calloc(1, sizeof(*expr)); + expr->token = parser->cur_token; + expr->type = EXPRESSION_CALL; + expr->call.func = func; + expr->call.arguments = parser_parse_call_arguments(parser); + + return expr; +} + +static struct statement * +parser_parse_statement(struct parser *parser) +{ + switch (parser->cur_token.type) { + case TOKEN_LET: + return parser_parse_let_statement(parser); + case TOKEN_RETURN: + return parser_parse_return_statement(parser); + case TOKEN_LBRACE: + return parser_parse_block_statement(parser); + default: + return parser_parse_expression_statement(parser); + } +} + +struct parser * +parser_new() +{ + struct parser *parser = malloc(sizeof *parser); + if (!parser) return NULL; + + struct lexer *lex = lexer_new(); + if (!lex) goto fail; + parser->lexer = lex; + + parser->errors = vector_new(); + if (!parser->errors) goto fail2; + + parser->prefix_fns = hmap_new(); + if (!parser->prefix_fns) goto fail3; + parser_register_prefix(parser, TOKEN_IDENT, parser_parse_identifier); + parser_register_prefix(parser, TOKEN_INT, parser_parse_integer); + parser_register_prefix(parser, TOKEN_BANG, parser_parse_prefix); + parser_register_prefix(parser, TOKEN_MINUS, parser_parse_prefix); + parser_register_prefix(parser, TOKEN_TRUE, parser_parse_boolean); + parser_register_prefix(parser, TOKEN_FALSE, parser_parse_boolean); + parser_register_prefix(parser, TOKEN_LPAREN, parser_parse_grouped); + parser_register_prefix(parser, TOKEN_RPAREN, parser_parse_grouped); + parser_register_prefix(parser, TOKEN_IF, parser_parse_if); + parser_register_prefix(parser, TOKEN_FUNC, parser_parse_func_literal); + + parser->infix_fns = hmap_new(); + if (!parser->infix_fns) goto fail4; + parser_register_infix(parser, TOKEN_PLUS, parser_parse_infix); + parser_register_infix(parser, TOKEN_MINUS, parser_parse_infix); + parser_register_infix(parser, TOKEN_SLASH, parser_parse_infix); + parser_register_infix(parser, TOKEN_ASTERISK, parser_parse_infix); + parser_register_infix(parser, TOKEN_EQ, parser_parse_infix); + parser_register_infix(parser, TOKEN_NOTEQ, parser_parse_infix); + parser_register_infix(parser, TOKEN_LT, parser_parse_infix); + parser_register_infix(parser, TOKEN_GT, parser_parse_infix); + parser_register_infix(parser, TOKEN_LPAREN, parser_parse_call_expression); + + parser->precedences = hmap_new(); + if (!parser->precedences) goto fail5; + parser_register_precedence(parser, TOKEN_EQ, PRE_EQUALS); + parser_register_precedence(parser, TOKEN_NOTEQ, PRE_EQUALS); + parser_register_precedence(parser, TOKEN_LT, PRE_LG); + parser_register_precedence(parser, TOKEN_GT, PRE_LG); + parser_register_precedence(parser, TOKEN_PLUS, PRE_SUM); + parser_register_precedence(parser, TOKEN_MINUS, PRE_SUM); + parser_register_precedence(parser, TOKEN_ASTERISK, PRE_PROD); + parser_register_precedence(parser, TOKEN_SLASH, PRE_PROD); + parser_register_precedence(parser, TOKEN_LPAREN, PRE_CALL); + + return parser; +fail5: + hmap_free(parser->infix_fns); +fail4: + hmap_free(parser->prefix_fns); +fail3: + vector_free(parser->errors); +fail2: + lexer_destroy(lex); +fail: + free(parser); + return NULL; +} + +void +parser_reset(struct parser *parser, const char *input) +{ + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + free(val); + } + parser->errors->len = 0; + + lexer_reset(parser->lexer, input); + + parser_next_token(parser); + parser_next_token(parser); +} + +void +parser_next_token(struct parser *parser) +{ + parser->cur_token = parser->peek_token; + parser->peek_token = lexer_next_token(parser->lexer); +} + +struct program * +parser_parse_program(struct parser *parser) +{ + struct program *program = malloc(sizeof *program); + if (!program) return NULL; + program->statements = vector_new(); + if (!program->statements) { + free(program); + return NULL; + } + + while (!parser_cur_token_is(parser, TOKEN_EOF)) { + struct statement *st = parser_parse_statement(parser); + if (st != NULL) { + vector_push(program->statements, st); + } + parser_next_token(parser); + } + + return program; +} + +void +parser_destroy(struct parser *parser) +{ + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + free(val); + } + vector_free(parser->errors); + hmap_free(parser->infix_fns); + hmap_free(parser->prefix_fns); + hmap_free(parser->precedences); + lexer_destroy(parser->lexer); + free(parser); +} diff --git a/src/repl.c b/src/repl.c new file mode 100644 index 0000000..236d9ec --- /dev/null +++ b/src/repl.c @@ -0,0 +1,67 @@ +#include "repl.h" + +#include "ast.h" +#include "object.h" +#include "token.h" +#include "vector.h" +#include "parser.h" +#include "eval.h" + +#define PROMPT ">> " + +static void +print_parser_errors(FILE *out, struct vector *errors) +{ + size_t i; + char *msg; + vector_foreach(errors, i, msg) { + fprintf(out, "\t%s\n", msg); + } +} + +void +repl_start(FILE *in, FILE *out) +{ + char obuf[2048]; + struct parser *parser = parser_new(); + struct environment *env = environment_new(); + struct vector *history = vector_new(); + for (;;) { + fprintf(out, PROMPT); + char *input = malloc(1024); + if (!fgets(input, 1024, in)) { + if (ferror(in)) { + perror("REPL: can't read"); + free(input); + goto end; + } + fprintf(out, "EOF\n"); + free(input); + goto end; + } + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + if (parser->errors->len > 0) { + print_parser_errors(out, parser->errors); + goto skip; + } + struct object *res = eval(env, prog); + if (res) { + fprintf(out, "%s\n", object_sprint(res, obuf)); + object_unref(res); + } + vector_push(history, input); +skip: + node_destroy(prog); + } + +end:; + size_t i; + char *input; + vector_foreach(history, i, input) { + free(input); + } + vector_free(history); + environment_destroy(env); + parser_destroy(parser); +} diff --git a/src/slice.c b/src/slice.c new file mode 100644 index 0000000..eb88f93 --- /dev/null +++ b/src/slice.c @@ -0,0 +1,77 @@ +#include "slice.h" + +#include +#include + +struct slice +slice_new(const char *str, size_t start, size_t end) +{ + struct slice slice = { + .str = str, + .start = start, + .end = end, + }; + + return slice; +} + +struct slice +slice_fullstr(const char *str) +{ + struct slice slice = { + .str = str, + .start = 0, + .end = strlen(str), + }; + + return slice; +} + +void +slice_set(struct slice *slice, const char *str, size_t start, size_t end) +{ + slice->str = str; + slice->start = start; + slice->end = end; +} + +size_t +slice_len(const struct slice *slice) +{ + return slice->end - slice->start; +} + +int +slice_cmp(const struct slice *restrict a, const struct slice *restrict b) +{ + size_t lena = slice_len(a), lenb = slice_len(b); + int lencmp = (lena > lenb) - (lena < lenb); + if (lencmp) { + return lencmp; + } + + for (size_t i = 0; i < lena; i++) { + char ca = a->str[a->start + i], cb = b->str[b->start + i]; + int cmp = (ca > cb) - (ca < cb); + if (cmp) return cmp; + } + + return 0; +} + +void +slice_cpy(struct slice *dst, const struct slice *src) +{ + dst->str = src->str; + dst->start = src->start; + dst->end = src->end; +} + +char * +slice_sprint(struct slice *slice, char *str) +{ + size_t len = slice->end - slice->start; + strncpy(str, slice->str + slice->start, len); + str[len] = '\0'; + return str; +} diff --git a/src/tests/ast.c b/src/tests/ast.c new file mode 100644 index 0000000..eb20e03 --- /dev/null +++ b/src/tests/ast.c @@ -0,0 +1,42 @@ +#include "tests/tests.h" +#include "ast.h" + +#include + +#include "parser.h" + +static void +check_parser_errors(struct parser *parser) +{ + if (parser->errors->len > 0) { + printf("\n"); + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + printf("parser error %lu: %s\n", i, val); + } + FAIL_TEST("parser encountered errors\n"); + } +} + +static void +test_string(void) +{ + char *input = "let var = anotherVar;"; + struct parser *parser = parser_new(input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + char progbuf[1024]; + node_sprint(prog, progbuf); + asserteq(strcmp(input, progbuf), 0); + node_destroy(prog); + parser_destroy(parser); +} + +int +main(void) +{ + INIT_TESTS(); + RUN_TEST(test_string); +} diff --git a/src/tests/eval.c b/src/tests/eval.c new file mode 100644 index 0000000..b341181 --- /dev/null +++ b/src/tests/eval.c @@ -0,0 +1,332 @@ +#include "eval.h" +#include "tests/tests.h" + +#include "parser.h" +#include "object.h" + +#include + +struct parser *parser; + +static struct object * +test_eval(const char *input) +{ + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + struct environment *env = environment_new(); + struct object *obj = eval(env, prog); + environment_destroy(env); + node_destroy(prog); + + return obj; +} + +static void +test_eval_integer_expressions(void) +{ + struct { + char *input; + int64_t expect; + } tests[] = { + { "5", 5 }, + { "69", 69 }, + { "-5", -5 }, + { "-69", -69 }, + { "5 + 5 + 5 + 5 - 10", 10 }, + { "2 * 2 * 2 * 2 * 2", 32 }, + { "-50 + 100 + -50", 0 }, + { "5 * 2 + 10", 20 }, + { "5 + 2 * 10", 25 }, + { "20 + 2 * -10", 0 }, + { "50 / 2 * 2 + 10", 60 }, + { "2 * (5 + 10)", 30 }, + { "3 * 3 * 3 + 10", 37 }, + { "3 * (3 * 3) + 10", 37 }, + { "(5 + 10 * 2 + 15 / 3) * 2 + -10", 50 }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_INT); + asserteq(tests[i].expect, res->integer); + object_unref(res); + } +} + +static void +test_eval_boolean_expressions(void) +{ + struct { + char *input; + bool expect; + } tests[] = { + { "true", true }, + { "false", false }, + { "1 < 2", true }, + { "1 > 2", false }, + { "1 < 1", false }, + { "1 > 1", false }, + { "1 == 1", true }, + { "1 != 1", false }, + { "1 == 2", false }, + { "1 != 2", true }, + { "true == true", true }, + { "false == false", true }, + { "true == false", false }, + { "true != false", true }, + { "false != true", true }, + { "(1 < 2) == true", true }, + { "(1 < 2) == false", false }, + { "(1 > 2) == true", false }, + { "(1 > 2) == false", true }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_BOOL); + asserteq(tests[i].expect, res->boolean); + } +} + +static void +test_eval_bang_operators(void) +{ + struct { + char *input; + bool expect; + } tests[] = { + { "!true", false }, + { "!false", true }, + { "!5", false }, + { "!!true", true }, + { "!!false", false }, + { "!!5", true }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_BOOL); + asserteq(tests[i].expect, res->boolean); + object_unref(res); + } +} + +#define OBJ_INT(v) (struct object) { OBJECT_INT, .integer = v } +#define OBJ_BOOL(v) (struct object) { OBJECT_BOOL, .boolean = v } +#define OBJ_NULL() (struct object) { OBJECT_NULL, 0, } + +#define OBJ(v) _Generic((v), \ + int64_t: OBJ_INT(v), \ + int: OBJ_INT(v), \ + bool: OBJ_BOOL(v) \ + ) + +static void +test_eval_ifelse_expressions(void) +{ + struct { + char *input; + struct object expect; + } tests[] = { + { "if (true) { 10 }", OBJ(10) }, + { "if (false) { 10 }", OBJ_NULL() }, + { "if (1) { 10 }", OBJ(10) }, + { "if (1 < 2) { 10 }", OBJ(10) }, + { "if (1 > 2) { 10 }", OBJ_NULL() }, + { "if (1 > 2) { 10 } else { 20 }", OBJ(20) }, + { "if (1 < 2) { 10 } else { 20 }", OBJ(10) }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + asserteq(tests[i].expect.type, res->type); + asserteq(tests[i].expect.integer, res->integer); + object_unref(res); + } +} + +static void +test_eval_return_statements(void) +{ + struct { + char *input; + int64_t expect; + } tests[] = { + { "return 10;", 10 }, + { "return 10; 9;", 10 }, + { "return 2 * 5; 9;", 10 }, + { "9; return 2 * 5; 9;", 10 }, + { + "if (10 > 1) {\n" + "\tif(10 > 1) {\n" + "\t\treturn 10;\n" + "\t\n}" + "return 1;\n" + "}", + 10, + }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_INT); + asserteq(tests[i].expect, res->integer); + object_unref(res); + } +} + +static void +test_errors(void) +{ + struct { + char *input; + char *expect; + } tests[] = { + { + "5 + true;", + "type mismatch: int + bool", + }, + { + "5 + true; 5;", + "type mismatch: int + bool", + }, + { + "-true", + "unknown operator: -bool", + }, + { + "true + false;", + "unknown operator: bool + bool", + }, + { + "5; true + false; 5;", + "unknown operator: bool + bool", + }, + { + "if (10 > 1) { true + false; }", + "unknown operator: bool + bool", + }, + { + "if (10 > 1) {\n" + "\tif(10 > 1) {\n" + "\t\treturn true + false;\n" + "\t\n}" + "return 1;\n" + "}", + "unknown operator: bool + bool", + }, + { + "foobar;", + "not declared: foobar", + }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_ERROR); + asserteq(strcmp(tests[i].expect, res->error.msg), 0); + object_unref(res); + } +} + +static void +test_eval_let_statements(void) +{ + struct { + char *input; + int64_t expect; + } tests[] = { + { "let a = 5; a;", 5 }, + { "let a = 5 * 5; a;", 25 }, + { "let a = 5; let b = a; b;", 5 }, + { "let a = 5; let b = a; let c = a + b + 5; c;", 15 }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_INT); + asserteq(tests[i].expect, res->integer); + object_unref(res); + } +} + +static void +test_eval_funcs(void) +{ + char *input = "fn(x) { x + 2; };"; + char *expect = "fn(x) {\n(x + 2)\n}"; + struct object *res = test_eval(input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_FUNC); + char fbuf[128]; + object_sprint(res, fbuf); + asserteq(strcmp(fbuf, expect), 0); + object_unref(res); +} + +static void +test_call_funcs(void) +{ + struct { + char *input; + int64_t expect; + } tests[] = { + { "let identity = fn(x) { x; }; identity(5);", 5 }, + { "let identity = fn(x) { return x; }; identity(5);", 5 }, + { "let double = fn(x) { x * 2; }; double(5);", 10 }, + { "let add = fn(x, y) { x + y; }; add(5, 5);", 10 }, + { "let add = fn(x, y) { x + y; }; add(5 + 5, add(5, 5));", 20 }, + { "fn(x) { x; }(5)", 5 }, + { 0 }, + }; + + for (size_t i = 0; tests[i].input != NULL; i++) { + struct object *res = test_eval(tests[i].input); + assertneq(res, NULL); + asserteq(res->type, OBJECT_INT); + asserteq(tests[i].expect, res->integer); + object_unref(res); + } +} + +static void +init(void) +{ + parser = parser_new(); +} + +static void +cleanup(void) +{ + parser_destroy(parser); +} + +int +main(void) +{ + init(); + INIT_TESTS(); + RUN_TEST(test_eval_integer_expressions); + RUN_TEST(test_eval_boolean_expressions); + RUN_TEST(test_eval_bang_operators); + RUN_TEST(test_eval_ifelse_expressions); + RUN_TEST(test_eval_return_statements); + RUN_TEST(test_errors); + RUN_TEST(test_eval_let_statements); + RUN_TEST(test_eval_funcs); + RUN_TEST(test_call_funcs); + cleanup(); +} diff --git a/src/tests/lexer.c b/src/tests/lexer.c new file mode 100644 index 0000000..3700ae6 --- /dev/null +++ b/src/tests/lexer.c @@ -0,0 +1,126 @@ +#include "tests/tests.h" +#include "lexer.h" + +#include + +#include "slice.h" +#include "token.h" + +static void +test_next_token(void) +{ + char *input = "let five = 5;\n" + "let ten = 10;\n" + "\n" + "let add = fn(x, y) {\n" + "\tx + y;\n" + "};\n" + "\n" + "let result = add(five, ten);" + "!-/*5;\n" + "5 < 10 > 5;\n" + "\n" + "if (5 < 10) {\n" + "\treturn true;\n" + "} else {\n" + "\treturn false;\n" + "}\n" + "\n" + "10 == 10;\n" + "10 != 9;\n"; + + struct lexer *lexer = lexer_new(input); + struct token expected[] = { + { TOKEN_LET, slice_fullstr("let") }, + { TOKEN_IDENT, slice_fullstr("five") }, + { TOKEN_ASSIGN, slice_fullstr("=") }, + { TOKEN_INT, slice_fullstr("5") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_LET, slice_fullstr("let") }, + { TOKEN_IDENT, slice_fullstr("ten") }, + { TOKEN_ASSIGN, slice_fullstr("=") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_LET, slice_fullstr("let") }, + { TOKEN_IDENT, slice_fullstr("add") }, + { TOKEN_ASSIGN, slice_fullstr("=") }, + { TOKEN_FUNC, slice_fullstr("fn") }, + { TOKEN_LPAREN, slice_fullstr("(") }, + { TOKEN_IDENT, slice_fullstr("x") }, + { TOKEN_COMMA, slice_fullstr(",") }, + { TOKEN_IDENT, slice_fullstr("y") }, + { TOKEN_RPAREN, slice_fullstr(")") }, + { TOKEN_LBRACE, slice_fullstr("{") }, + { TOKEN_IDENT, slice_fullstr("x") }, + { TOKEN_PLUS, slice_fullstr("+") }, + { TOKEN_IDENT, slice_fullstr("y") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_RBRACE, slice_fullstr("}") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_LET, slice_fullstr("let") }, + { TOKEN_IDENT, slice_fullstr("result") }, + { TOKEN_ASSIGN, slice_fullstr("=") }, + { TOKEN_IDENT, slice_fullstr("add") }, + { TOKEN_LPAREN, slice_fullstr("(") }, + { TOKEN_IDENT, slice_fullstr("five") }, + { TOKEN_COMMA, slice_fullstr(",") }, + { TOKEN_IDENT, slice_fullstr("ten") }, + { TOKEN_RPAREN, slice_fullstr(")") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_BANG, slice_fullstr("!") }, + { TOKEN_MINUS, slice_fullstr("-") }, + { TOKEN_SLASH, slice_fullstr("/") }, + { TOKEN_ASTERISK, slice_fullstr("*") }, + { TOKEN_INT, slice_fullstr("5") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_INT, slice_fullstr("5") }, + { TOKEN_LT, slice_fullstr("<") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_GT, slice_fullstr(">") }, + { TOKEN_INT, slice_fullstr("5") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_IF, slice_fullstr("if") }, + { TOKEN_LPAREN, slice_fullstr("(") }, + { TOKEN_INT, slice_fullstr("5") }, + { TOKEN_LT, slice_fullstr("<") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_RPAREN, slice_fullstr(")") }, + { TOKEN_LBRACE, slice_fullstr("{") }, + { TOKEN_RETURN, slice_fullstr("return") }, + { TOKEN_TRUE, slice_fullstr("true") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_RBRACE, slice_fullstr("}") }, + { TOKEN_ELSE, slice_fullstr("else") }, + { TOKEN_LBRACE, slice_fullstr("{") }, + { TOKEN_RETURN, slice_fullstr("return") }, + { TOKEN_FALSE, slice_fullstr("false") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_RBRACE, slice_fullstr("}") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_EQ, slice_fullstr("==") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_INT, slice_fullstr("10") }, + { TOKEN_NOTEQ, slice_fullstr("!=") }, + { TOKEN_INT, slice_fullstr("9") }, + { TOKEN_SEMICOLON, slice_fullstr(";") }, + { TOKEN_EOF, slice_fullstr("") }, + }; + size_t i = 0; + + do { + struct token token = lexer_next_token(lexer); + asserteq(token.type, expected[i].type); + asserteq(slice_cmp(&token.literal, &expected[i].literal), 0); + i++; + } while (expected[i].type != TOKEN_EOF); + + lexer_destroy(lexer); +} + +int +main(void) +{ + INIT_TESTS(); + RUN_TEST(test_next_token); +} diff --git a/src/tests/parser.c b/src/tests/parser.c new file mode 100644 index 0000000..49847ec --- /dev/null +++ b/src/tests/parser.c @@ -0,0 +1,640 @@ +#include "parser.h" +#include "tests/tests.h" + +#include "ast.h" +#include "slice.h" + +#include + +enum value_type { + VALUE_IDENT, + VALUE_INT, + VALUE_BOOL, +}; + +struct value { + enum value_type type; + union { + struct slice ident; + int64_t integer; + bool boolean; + }; +}; + +struct parser *parser; + +static void +check_parser_errors(struct parser *parser, const char *file, int line, + const char *func) +{ + if (parser->errors->len > 0) { + printf("\n"); + size_t i; + char *val; + vector_foreach (parser->errors, i, val) { + printf("parser error %lu: %s\n", i, val); + } + printf(TBLD TRED "FAIL!\n" TRST); + printf("%s:%d: %s: ", file, line, func); + printf("parser encountered errors\n"); + abort(); + } +} + +#define check_parser_errors(p) \ + check_parser_errors(p, __FILE__, __LINE__, __func__) + +static void +test_return_statement(struct statement *st) +{ + struct slice retrn_lit = slice_fullstr("return"); + struct slice st_lit = node_token_literal(st); + asserteq(st->type, STATEMENT_RETURN); + asserteq(slice_cmp(&retrn_lit, &st_lit), 0); +} + +static void +test_integer_literal(struct expression *expr, int64_t val) +{ + char buf[128]; + struct slice sval; + asserteq(expr->type, EXPRESSION_INT); + asserteq(expr->integer.value, val); + sprintf(buf, "%ld", val); + sval = slice_fullstr(buf); + asserteq(slice_cmp(&expr->token.literal, &sval), 0); +} + +static void +test_identifier(struct expression *expr, struct slice val) +{ + asserteq(expr->type, EXPRESSION_IDENT); +} + +static void +test_boolean_literal(struct expression *expr, bool val) +{ + char *str = val ? "true" : "false"; + struct slice sval = slice_fullstr(str); + asserteq(expr->type, EXPRESSION_BOOL); + asserteq(expr->boolean.value, val); + asserteq(slice_cmp(&expr->token.literal, &sval), 0); +} + +#define test_literal_expression(expr, expect) _Generic((expect), \ + int64_t: test_integer_literal, \ + struct slice: test_identifier, \ + bool: test_boolean_literal \ + )(expr, expect) + +static inline void +test_expected(struct expression *expr, struct value v) +{ + switch(v.type) { + case VALUE_IDENT: + test_literal_expression(expr, v.ident); + break; + case VALUE_INT: + test_literal_expression(expr, v.integer); + break; + case VALUE_BOOL: + test_literal_expression(expr, v.boolean); + break; + } +} + +#define VIDENT(v) \ + (struct value){ .type = VALUE_IDENT, .ident = slice_fullstr(v) } + +#define VINT(v) \ + (struct value){ .type = VALUE_INT, .integer = v } + +#define VBOOL(v) \ + (struct value){ .type = VALUE_BOOL, .boolean = v } + +static inline void +test_infix(struct infix_expression *expr, struct value lval, struct slice *op, + struct value rval) +{ + test_expected(expr->left, lval); + asserteq(slice_cmp(&expr->operator, op), 0); + test_expected(expr->right, rval); +} + +static void +test_let_statement(struct statement *st, struct slice *name) +{ + struct slice let_lit = slice_fullstr("let"); + struct slice st_lit = node_token_literal(st); + asserteq(slice_cmp(&let_lit, &st_lit), 0); + asserteq(st->type, STATEMENT_LET); + asserteq(slice_cmp(&st->let.name->value, name), 0); + asserteq(slice_cmp(&st->let.name->token.literal, name), 0); +} + +static void +test_let_statements(void) +{ + struct { + char *input; + struct slice ident; + struct value val; + } tests[] = { + { "let x = 5;", slice_fullstr("x"), VINT(5) }, + { "let y = true;", slice_fullstr("y"), VBOOL(true) }, + { "let foo = bar;", slice_fullstr("foo"), VIDENT("bar") }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + test_let_statement(st, &tests[i].ident); + test_expected(st->let.value, tests[i].val); + node_destroy(prog); + } + +} + +static void +test_return_statements(void) +{ + struct { + char *input; + struct slice ident; + struct value val; + } tests[] = { + { "return 5;", slice_fullstr("x"), VINT(5) }, + { "return true;", slice_fullstr("y"), VBOOL(true) }, + { "return bar;", slice_fullstr("foo"), VIDENT("bar") }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + test_return_statement(st); + test_expected(st->retrn.value, tests[i].val); + node_destroy(prog); + } +} + +static void +test_identifier_expression_statements(void) +{ + char *input = "foobar;\n"; + struct slice val = slice_fullstr("foobar"); + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + test_literal_expression(st->expr.expr, val); + + node_destroy(prog); +} + +static void +test_integer_expression_statements(void) +{ + char *input = "469;\n"; + int64_t val = 469; + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + test_literal_expression(st->expr.expr, val); + + node_destroy(prog); +} + +static void +test_prefix_expression_statements(void) +{ + struct { + char *input; + struct slice operator; + struct value val; + } tests[] = { + { "!5;", slice_fullstr("!"), VINT(5) }, + { "-15;", slice_fullstr("-"), VINT(15) }, + { "!foo;", slice_fullstr("!"), VIDENT("foo") }, + { "-bar;", slice_fullstr("-"), VIDENT("bar") }, + { "!true;", slice_fullstr("!"), VBOOL(true) }, + { "!false;", slice_fullstr("!"), VBOOL(false) }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_PREFIX); + test_expected(st->expr.expr->prefix.right, tests[i].val); + node_destroy(prog); + } +} + +static void +test_infix_expression_statements(void) +{ + struct { + char *input; + struct value lval; + struct slice operator; + struct value rval; + } tests[] = { + { "5 + 5;", VINT(5), slice_fullstr("+"), VINT(5) }, + { "5 - 5;", VINT(5), slice_fullstr("-"), VINT(5) }, + { "5 * 5;", VINT(5), slice_fullstr("*"), VINT(5) }, + { "5 / 5;", VINT(5), slice_fullstr("/"), VINT(5) }, + { "5 > 5;", VINT(5), slice_fullstr(">"), VINT(5) }, + { "5 < 5;", VINT(5), slice_fullstr("<"), VINT(5) }, + { "foo + bar;", VIDENT("foo"), slice_fullstr("+"), VIDENT("bar") }, + { "foo - bar;", VIDENT("foo"), slice_fullstr("-"), VIDENT("bar") }, + { "foo * bar;", VIDENT("foo"), slice_fullstr("*"), VIDENT("bar") }, + { "foo / bar;", VIDENT("foo"), slice_fullstr("/"), VIDENT("bar") }, + { "foo > bar;", VIDENT("foo"), slice_fullstr(">"), VIDENT("bar") }, + { "foo < bar;", VIDENT("foo"), slice_fullstr("<"), VIDENT("bar") }, + { "true == true;", VBOOL(true), slice_fullstr("=="), VBOOL(true) }, + { "true != false;", VBOOL(true), slice_fullstr("!="), VBOOL(false) }, + { "false != false;", VBOOL(false), slice_fullstr("!="), VBOOL(false) }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_INFIX); + test_infix(&st->expr.expr->infix, tests[i].lval, &tests[i].operator, + tests[i].rval); + node_destroy(prog); + } +} + +static void +test_operator_precedence(void) +{ + struct { + char *input; + char *expect; + } tests[] = { + { + "-a * b", + "((-a) * b)", + }, + { + "!-a", + "(!(-a))", + }, + { + "a + b + c", + "((a + b) + c)", + }, + { + "a + b - c", + "((a + b) - c)", + }, + { + "a * b * c", + "((a * b) * c)", + }, + { + "a * b / c", + "((a * b) / c)", + }, + { + "a + b / c", + "(a + (b / c))", + }, + { + "a + b * c + d / e - f", + "(((a + (b * c)) + (d / e)) - f)", + }, + { + "3 + 4; -5 * 5", + "(3 + 4)((-5) * 5)", + }, + { + "5 > 4 == 3 < 4", + "((5 > 4) == (3 < 4))", + }, + { + "5 < 4 != 3 > 4", + "((5 < 4) != (3 > 4))", + }, + { + "3 + 4 * 5 == 3 * 1 + 4 * 5", + "((3 + (4 * 5)) == ((3 * 1) + (4 * 5)))", + }, + { + "true", + "true", + }, + { + "false", + "false", + }, + { + "3 > 5 == false", + "((3 > 5) == false)", + }, + { + "3 < 5 == true", + "((3 < 5) == true)", + }, + { + "1 + (2 + 3) + 4", + "((1 + (2 + 3)) + 4)", + }, + { + "(5 + 5) * 2", + "((5 + 5) * 2)", + }, + { + "2 / (5 + 5)", + "(2 / (5 + 5))", + }, + { + "(5 + 5) * 2 * (5 + 5)", + "(((5 + 5) * 2) * (5 + 5))", + }, + { + "-(5 + 5)", + "(-(5 + 5))", + }, + { + "!(true == true)", + "(!(true == true))", + }, + { + "a + add(b * c) + d", + "((a + add((b * c))) + d)", + }, + { + "add(a, b, 1, 2 * 3, 4 + 5, add(6, 7 * 8))", + "add(a, b, 1, (2 * 3), (4 + 5), add(6, (7 * 8)))", + }, + { + "add(a + b + c * d / f + g)", + "add((((a + b) + ((c * d) / f)) + g))", + }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + char buf[128]; + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + node_sprint(prog, buf); + asserteq(strcmp(tests[i].expect, buf), 0); + node_destroy(prog); + } +} + +static void +test_boolean_expression(void) +{ + struct { + char *input; + bool value; + } tests[] = { + { "true;", true }, + { "false;", false }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + test_literal_expression(st->expr.expr, tests[i].value); + node_destroy(prog); + } +} + +static void +test_if_expression(void) +{ + char *input = "if (x < y) { x }"; + struct value x = VIDENT("x"); + struct slice op = slice_fullstr("<"); + struct value y = VIDENT("y"); + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_IF); + struct if_expression *ifexpr = &st->expr.expr->cond; + asserteq(ifexpr->condition->type, EXPRESSION_INFIX); + test_infix(&ifexpr->condition->infix, x, &op, y); + asserteq(ifexpr->consequence->type, STATEMENT_BLOCK); + asserteq(ifexpr->consequence->block.statements->len, 1); + struct statement *ifst = ifexpr->consequence->block.statements->values[0]; + asserteq(ifst->type, STATEMENT_EXPRESSION); + test_identifier(ifst->expr.expr, x.ident); + asserteq(ifexpr->alternative, NULL); + node_destroy(prog); +} + +static void +test_if_else_expression(void) +{ + char *input = "if (x < y) { x } else { y }"; + struct value x = VIDENT("x"); + struct slice op = slice_fullstr("<"); + struct value y = VIDENT("y"); + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_IF); + struct if_expression *ifexpr = &st->expr.expr->cond; + asserteq(ifexpr->condition->type, EXPRESSION_INFIX); + test_infix(&ifexpr->condition->infix, x, &op, y); + asserteq(ifexpr->consequence->type, STATEMENT_BLOCK); + asserteq(ifexpr->consequence->block.statements->len, 1); + struct statement *ifst = ifexpr->consequence->block.statements->values[0]; + asserteq(ifst->type, STATEMENT_EXPRESSION); + test_identifier(ifst->expr.expr, x.ident); + assertneq(ifexpr->alternative, NULL); + asserteq(ifexpr->alternative->block.statements->len, 1); + asserteq(ifexpr->alternative->type, STATEMENT_BLOCK); + struct statement *elsest = ifexpr->alternative->block.statements->values[0]; + asserteq(elsest->type, STATEMENT_EXPRESSION); + test_identifier(elsest->expr.expr, y.ident); + node_destroy(prog); +} + +static void +test_func_literal(void) +{ + char *input = "fn(x, y) { x + y; }"; + struct value x = VIDENT("x"); + struct slice op = slice_fullstr("+"); + struct value y = VIDENT("y"); + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_FUNC); + struct func_literal *func = &st->expr.expr->func; + asserteq(func->parameters->len, 2); + test_identifier(func->parameters->values[0], x.ident); + test_identifier(func->parameters->values[1], y.ident); + asserteq(func->body->type, STATEMENT_BLOCK); + asserteq(func->body->block.statements->len, 1); + struct statement *fst = func->body->block.statements->values[0]; + test_infix(&fst->expr.expr->infix, x, &op, y); + node_destroy(prog); +} + +static void +test_func_parameters(void) +{ + struct { + char *input; + struct { + struct slice vals[4]; + size_t len; + } params; + } tests[] = { + { + "fn() {};", + { 0 }, + }, + { + "fn(x) {};", + { + { slice_fullstr("x") }, + 1 + }, + }, + { + "fn(x, y) {};", + { + { slice_fullstr("x"), slice_fullstr("y") }, + 2, + } + }, + { + "fn(x, y, z) {};", + { + { slice_fullstr("x"), slice_fullstr("y"), slice_fullstr("z") }, + 3, + } + }, + { 0 }, + }; + for (size_t i = 0; tests[i].input != NULL; i++) { + parser_reset(parser, tests[i].input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_FUNC); + struct func_literal *func = &st->expr.expr->func; + asserteq(func->parameters->len, tests[i].params.len); + for (size_t j = 0; j < tests[i].params.len; j++) { + struct expression *param = func->parameters->values[j]; + asserteq(slice_cmp(¶m->ident.token.literal, + &tests[i].params.vals[j]), 0); + } + node_destroy(prog); + } +} + +static void +test_call_expression(void) +{ + char *input = "add(1, x * y, add(x, y, z));"; + int64_t par1 = 1; + struct value x = VIDENT("x"); + struct value y = VIDENT("y"); + struct slice op1 = slice_fullstr("*"); + parser_reset(parser, input); + struct program *prog = parser_parse_program(parser); + check_parser_errors(parser); + assertneq(prog, NULL); + asserteq(prog->statements->len, 1); + struct statement *st = prog->statements->values[0]; + asserteq(st->type, STATEMENT_EXPRESSION); + asserteq(st->expr.expr->type, EXPRESSION_CALL); + struct call_expression *call = &st->expr.expr->call; + test_identifier(call->func, slice_fullstr("add")); + asserteq(call->arguments->len, 3); + struct expression *expr1 = call->arguments->values[0], + *expr2 = call->arguments->values[1], + *expr3 = call->arguments->values[2]; + test_literal_expression(expr1, par1); + asserteq(expr2->type, EXPRESSION_INFIX); + test_infix(&expr2->infix, x, &op1, y); + asserteq(expr3->type, EXPRESSION_CALL); + asserteq(expr3->call.arguments->len, 3); +} + +static void +init(void) +{ + parser = parser_new(); +} + +static void +cleanup(void) +{ + parser_destroy(parser); +} + +int +main(void) +{ + INIT_TESTS(); + init(); + RUN_TEST(test_let_statements); + RUN_TEST(test_return_statements); + RUN_TEST(test_identifier_expression_statements); + RUN_TEST(test_integer_expression_statements); + RUN_TEST(test_prefix_expression_statements); + RUN_TEST(test_infix_expression_statements); + RUN_TEST(test_operator_precedence); + RUN_TEST(test_boolean_expression); + RUN_TEST(test_if_expression); + RUN_TEST(test_if_else_expression); + RUN_TEST(test_func_literal); + RUN_TEST(test_func_parameters); + RUN_TEST(test_call_expression); + cleanup(); +} diff --git a/src/tests/slice.c b/src/tests/slice.c new file mode 100644 index 0000000..e2382e8 --- /dev/null +++ b/src/tests/slice.c @@ -0,0 +1,41 @@ +#include "tests/tests.h" +#include "slice.h" + +#include + +static void +test_slice_cmp(void) +{ + struct slice s1a = { + .str = "hello world", + .start = 6, + .end = 11, + }; + struct slice s1b = { + .str = "world", + .start = 0, + .end = 5, + }; + asserteq(slice_cmp(&s1a, &s1b), 0); +} + +static void +test_slice_sprint(void) +{ + struct slice slice = { + .str = "hello world", + .start = 6, + .end = 11, + }; + char buf[32]; + slice_sprint(&slice, buf); + asserteq(strcmp("world", buf), 0); +} + +int +main(void) +{ + INIT_TESTS(); + RUN_TEST(test_slice_cmp); + RUN_TEST(test_slice_sprint); +} diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..300fa8c --- /dev/null +++ b/src/token.c @@ -0,0 +1,106 @@ +#include "token.h" + +#include +#include + +#include "hmap.h" + +static struct hmap *keywords = NULL; +static const char *keys[] = { + "fn", + "let", + "true", + "false", + "if", + "else", + "return", + NULL, +}; +enum token_type values[] = { + TOKEN_FUNC, + TOKEN_LET, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_RETURN, + TOKEN_EOF, +}; + +const char *token_types[] = { + "ILLEGAL", + "EOF", + /* Identifiers/Literals */ + "identifier", + "int", + /* Operators */ + "=", + "+", + "-", + "!", + "*", + "/", + "<", + ">", + "==", + "!=", + /* Delimiters */ + ",", + ";", + "(", + ")", + "{", + "}", + /* Keywords */ + "fn", + "let", + "true", + "false", + "if", + "else", + "return", +}; + +void +token_init_keywords(void) +{ + if (keywords == NULL) { + keywords = hmap_new(); + for (size_t i = 0; keys[i] != NULL; i++) { + hmap_set(keywords, keys[i], values + i); + } + } +} + +enum token_type +token_lookup_ident(const struct slice *ident) +{ + enum token_type *t = hmap_gets(keywords, ident); + if (t) { + return *t; + } + + return TOKEN_IDENT; +} + +extern inline const char * +token_type_print(enum token_type t) +{ + return token_types[t]; +} + +char * +token_sprint(struct token *token, char *str) +{ + const char *type = token_type_print(token->type); + char slicebuf[256]; + sprintf(str, "TOKEN: type: %s, literal: %s", type, + slice_sprint(&token->literal, slicebuf)); + return str; +} + +void +token_free_keywords(void) +{ + hmap_free(keywords); +} diff --git a/src/vector.c b/src/vector.c new file mode 100644 index 0000000..cdd1977 --- /dev/null +++ b/src/vector.c @@ -0,0 +1,42 @@ +#include "vector.h" + +inline static bool +vector_grow(struct vector *vec) +{ + vec->cap *= 2; + vec->values = realloc(vec->values, sizeof(vec->values) * vec->cap); + return vec->values != NULL; +} + +struct vector * +vector_new_with_cap(size_t cap) +{ + struct vector *vec = malloc(sizeof(*vec)); + if (!vec) return NULL; + vec->values = malloc(sizeof(vec->values) * cap); + if (!vec->values) { + free(vec); + return NULL; + } + vec->cap = cap; + vec->len = 0; + + return vec; +} + +ssize_t +vector_push(struct vector *vec, void *val) +{ + if (vec->len >= vec->cap && !vector_grow(vec)) return -1; + vec->values[vec->len] = val; + vec->len++; + + return vec->len - 1; +} + +void +vector_free(struct vector *vec) +{ + free(vec->values); + free(vec); +} -- cgit v1.2.3