From c0cd4e5f199e8567ec3b5e216fbee27837d21bea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yaroslav=20de=20la=20Pe=C3=B1a=20Smirnov?= Date: Thu, 20 Jan 2022 02:34:32 +0300 Subject: init --- include/ast.h | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/eval.h | 20 ++++++ include/hmap.h | 53 ++++++++++++++++ include/lexer.h | 25 ++++++++ include/object.h | 81 ++++++++++++++++++++++++ include/parser.h | 33 ++++++++++ include/repl.h | 8 +++ include/slice.h | 27 ++++++++ include/tests/tests.h | 45 +++++++++++++ include/token.h | 55 ++++++++++++++++ include/vector.h | 27 ++++++++ 11 files changed, 544 insertions(+) create mode 100644 include/ast.h create mode 100644 include/eval.h create mode 100644 include/hmap.h create mode 100644 include/lexer.h create mode 100644 include/object.h create mode 100644 include/parser.h create mode 100644 include/repl.h create mode 100644 include/slice.h create mode 100644 include/tests/tests.h create mode 100644 include/token.h create mode 100644 include/vector.h (limited to 'include') diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..eb3e1fc --- /dev/null +++ b/include/ast.h @@ -0,0 +1,170 @@ +#ifndef CMONKEY_AST_H +#define CMONKEY_AST_H + +#include "token.h" +#include "vector.h" + +enum node_type { + NODE_PROGRAM, + NODE_STATEMENT, + NODE_EXPRESSION, +}; + +enum statement_type { + STATEMENT_LET, + STATEMENT_RETURN, + STATEMENT_EXPRESSION, + STATEMENT_BLOCK, +}; + +enum expression_type { + EXPRESSION_IDENT, + EXPRESSION_INT, + EXPRESSION_BOOL, + EXPRESSION_PREFIX, + EXPRESSION_INFIX, + EXPRESSION_IF, + EXPRESSION_FUNC, + EXPRESSION_CALL, +}; + +struct identifier { + struct token token; + struct slice value; +}; + +struct prefix_expression { + struct token token; + struct slice operator; + struct expression *right; +}; + +struct infix_expression { + struct token token; + struct slice operator; + struct expression *left; + struct expression *right; +}; + +struct integer_expression { + struct token token; + int64_t value; +}; + +struct boolean_expression { + struct token token; + bool value; +}; + +struct if_expression { + struct token token; + struct expression *condition; + struct statement *consequence; + struct statement *alternative; +}; + +struct func_literal { + struct token token; + struct vector *parameters; // expression type ident + struct statement *body; +}; + +struct call_expression { + struct token token; + struct expression *func; + struct vector *arguments; // expressions +}; + +struct expression { + enum expression_type type; + union { + struct token token; // Common initial sequence + struct identifier ident; + struct integer_expression integer; + struct boolean_expression boolean; + struct prefix_expression prefix; + struct infix_expression infix; + struct if_expression cond; + struct func_literal func; + struct call_expression call; + }; +}; + +struct expression_statement { + struct token token; + struct expression *expr; +}; + +struct return_statement { + struct token token; + struct expression *value; +}; + +struct let_statement { + struct token token; + struct identifier *name; + struct expression *value; +}; + +struct block_statement { + struct token token; + struct vector *statements; +}; + +struct program { + struct vector *statements; +}; + +struct statement { + enum statement_type type; + union { + struct token token; // Common initial sequence + struct let_statement let; + struct return_statement retrn; + struct expression_statement expr; + struct block_statement block; + }; +}; + +struct slice expression_token_literal(struct expression *); +struct slice statement_token_literal(struct statement *); +struct slice program_token_literal(struct program *); + +#define node_token_literal(n) _Generic((n), \ + struct program *: program_token_literal, \ + struct statement *: statement_token_literal, \ + struct expression *: expression_token_literal \ + )(n) + +char *expression_sprint(struct expression *, char *str); +char *statement_sprint(struct statement *, char *str); +char *program_sprint(struct program *, char *str); + +#define node_sprint(n, a) _Generic((n), \ + struct expression *: expression_sprint, \ + struct statement *: statement_sprint, \ + struct program *: program_sprint \ + )(n, a) + +struct vector *expression_vector_dup(const struct vector *); +struct vector *statement_vector_dup(const struct vector *); + +struct expression *expression_dup(const struct expression *); +struct statement *statement_dup(const struct statement *); + +#define node_dup(n) _Generic((n), \ + struct expression *: expression_dup, \ + struct statement *: statement_dup \ + )(n) + +void expression_destroy(struct expression *); +void statement_destroy(struct statement *); +void program_destroy(struct program *); + +#define node_destroy(n) _Generic((n), \ + struct expression *: expression_destroy, \ + struct statement *: statement_destroy, \ + struct program *: program_destroy \ + )(n) + +#endif diff --git a/include/eval.h b/include/eval.h new file mode 100644 index 0000000..5f19271 --- /dev/null +++ b/include/eval.h @@ -0,0 +1,20 @@ +#ifndef CMONKEY_EVAL_H +#define CMONKEY_EVAL_H + +#include "ast.h" +#include "parser.h" +#include "object.h" + +struct object *eval_program(struct environment *, struct program *); + +struct object *eval_statement(struct environment *, struct statement *); + +struct object *eval_expression(struct environment *, struct expression *); + +#define eval(e, n) _Generic((n), \ + struct program *: eval_program, \ + struct statement *: eval_statement, \ + struct expression *: eval_expression \ + )(e, n) + +#endif diff --git a/include/hmap.h b/include/hmap.h new file mode 100644 index 0000000..853df1c --- /dev/null +++ b/include/hmap.h @@ -0,0 +1,53 @@ +#ifndef UNJA_HASHMAP_H +#define UNJA_HASHMAP_H +#include +#include + +#include "slice.h" + +#ifndef HASHMAP_CAP +#define HASHMAP_CAP 32 +#endif + +typedef void (hmap_cb)(const void *key, void *value); + +struct hmap { + struct hnode **buckets; + size_t cap; + size_t size; +}; + +/* allocate a new hmap */ +struct hmap *hmap_new_with_cap(size_t cap); + +#define hmap_new() hmap_new_with_cap(HASHMAP_CAP) + + +/* + * Inserts a key-value pair into the map. Returns NULL if map did not have key, + * old value if it did. + */ +void *hmap_sets(struct hmap *hm, struct slice key, void *value); +void *hmap_set(struct hmap *hm, const char *key, void *value); + +/* Returns a pointer to the value corresponding to the key. */ +void *hmap_gets(struct hmap *hm, const struct slice *key); +void *hmap_get(struct hmap *hm, const char *key); + +/* + * Removes a key from the map, returning the value at the key if the key was + * previously in the map. + */ +void *hmap_removes(struct hmap *hm, const struct slice *key); +void *hmap_remove(struct hmap *hm, const char *key); + +/* Iterate over keys in the hmap */ +void hmap_walk(struct hmap *hm, hmap_cb); + +/* free hmap related memory calling a function before freeing each node */ +void hmap_destroy(struct hmap *hm, hmap_cb cb); + +/* free hmap related memory */ +void hmap_free(struct hmap *hm); + +#endif diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..d693354 --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,25 @@ +#ifndef CMONKEY_LEXER_H +#define CMONKEY_LEXER_H + +#include "slice.h" +#include "token.h" + +#include + +struct lexer { + const char *input; + size_t len; + struct slice word; +}; + +struct lexer *lexer_new(); + +void lexer_reset(struct lexer *, const char *input); + +void lexer_read_char(struct lexer *); + +struct token lexer_next_token(struct lexer *); + +void lexer_destroy(struct lexer *); + +#endif diff --git a/include/object.h b/include/object.h new file mode 100644 index 0000000..55ac741 --- /dev/null +++ b/include/object.h @@ -0,0 +1,81 @@ +#ifndef CMONKEY_OBJECT_H +#define CMONKEY_OBJECT_H + +#include "ast.h" +#include "hmap.h" + +#include +#include + +enum object_type { + OBJECT_ERROR, + OBJECT_NULL, + OBJECT_INT, + OBJECT_BOOL, + OBJECT_RETURN, + OBJECT_FUNC, +}; + +struct error_object { + char *msg; +}; + +struct return_object { + struct object *value; +}; + +struct func_object { + struct vector *params; // identifier_expressions + struct statement *body; +}; + +struct object { + enum object_type type; + size_t refcount; + union { + bool boolean; + int64_t integer; + struct return_object retrn; + struct error_object error; + struct func_object func; + }; +}; + +struct environment { + struct hmap *store; + struct environment *outer; +}; + +char *object_sprint(struct object *, char *str); + +inline const char *object_type_print(enum object_type); + +struct object *object_new_int(int64_t val); +struct object *object_new_error(char *msg); +struct object *object_new_return(struct object *val); +struct object *object_new_func(struct expression *); + +#define object_new(v) _Generic((v), \ + int: object_new_int, \ + int64_t: object_new_int, \ + char *: object_new_error, \ + struct object *: object_new_return, \ + struct expression *: object_new_func \ + )(v) + +void object_ref(struct object *); + +void object_unref(struct object *); + +struct environment *environment_new_enclosed(struct environment *outer); + +#define environment_new() environment_new_enclosed(NULL) + +struct object *environment_set(struct environment *, + struct slice key, struct object *val); + +struct object *environment_get(struct environment *, const struct slice *key); + +void environment_destroy(struct environment *); + +#endif diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..b344a7b --- /dev/null +++ b/include/parser.h @@ -0,0 +1,33 @@ +#ifndef CMONKEY_PARSER_H +#define CMONKEY_PARSER_H + +#include "ast.h" +#include "hmap.h" +#include "lexer.h" +#include "token.h" +#include "vector.h" + +struct parser { + struct lexer *lexer; + struct token cur_token; + struct token peek_token; + struct vector *errors; + struct hmap *prefix_fns; + struct hmap *infix_fns; + struct hmap *precedences; +}; + +typedef struct expression *(*prefix_parse_f)(struct parser *); +typedef struct expression *(*infix_parse_f)(struct parser *, struct expression *); + +struct parser *parser_new(); + +void parser_reset(struct parser *, const char *input); + +void parser_next_token(struct parser *); + +struct program *parser_parse_program(struct parser *); + +void parser_destroy(struct parser *); + +#endif diff --git a/include/repl.h b/include/repl.h new file mode 100644 index 0000000..f7aeafc --- /dev/null +++ b/include/repl.h @@ -0,0 +1,8 @@ +#ifndef CMONKEY_REPL_H +#define CMONKEY_REPL_H + +#include + +void repl_start(FILE *in, FILE *out); + +#endif diff --git a/include/slice.h b/include/slice.h new file mode 100644 index 0000000..26f9fd7 --- /dev/null +++ b/include/slice.h @@ -0,0 +1,27 @@ +#ifndef CMONKEY_SLICE_H +#define CMONKEY_SLICE_H + +#include + +struct slice { + const char *str; + size_t start; + size_t end; +}; + +struct slice slice_new(const char *str, size_t start, size_t end); + +struct slice slice_fullstr(const char *str); + +void slice_set(struct slice *, const char *str, size_t start, size_t end); + +size_t slice_len(const struct slice *); + +/* Returns 0 if equal, 1 if a > b, -1 if a < b */ +int slice_cmp(const struct slice *restrict a, const struct slice *restrict b); + +void slice_cpy(struct slice *dst, const struct slice *src); + +char *slice_sprint(struct slice *, char *str); + +#endif diff --git a/include/tests/tests.h b/include/tests/tests.h new file mode 100644 index 0000000..8c89fcc --- /dev/null +++ b/include/tests/tests.h @@ -0,0 +1,45 @@ +#ifndef TESTS_H +#define TESTS_H +#include +#include + +#ifndef NOCOLOR +#define TBLD "\033[1m" +#define TRED "\033[31m" +#define TGRN "\033[32m" +#define TBLU "\033[34m" +#define TRST "\033[0m" +#else +#define TBLD "" +#define TRED "" +#define TGRN "" +#define TBLU "" +#define TRST "" +#endif + +#define RUN_TEST(test_func) \ + printf("%s:\t", #test_func); \ + fflush(stdout); \ + test_func(); \ + printf(TGRN "OK!\n" TRST) + +#define INIT_TESTS() \ + printf(TBLD "running %s tests\n" TRST, __FILE__) + +#define FAIL_TEST(reason) \ + printf(TBLD TRED "FAIL!\n" TRST); \ + printf("%s:%d: %s: ", __FILE__, __LINE__, __func__); \ + printf(reason); \ + abort() + +#define asserteq(a, b) \ + if (a != b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " == " #b TRST " failed\n"); \ + } + +#define assertneq(a, b) \ + if (a == b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " != " #b TRST " failed\n"); \ + } + +#endif diff --git a/include/token.h b/include/token.h new file mode 100644 index 0000000..2f3cbb3 --- /dev/null +++ b/include/token.h @@ -0,0 +1,55 @@ +#ifndef CMONKEY_TOKEN_H +#define CMONKEY_TOKEN_H + +#include "slice.h" + +enum token_type { + TOKEN_ILLEGAL, + TOKEN_EOF, + /* Identifiers/Literals */ + TOKEN_IDENT, + TOKEN_INT, + /* Operators */ + TOKEN_ASSIGN, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_BANG, + TOKEN_ASTERISK, + TOKEN_SLASH, + TOKEN_LT, + TOKEN_GT, + TOKEN_EQ, + TOKEN_NOTEQ, + /* Delimiters */ + TOKEN_COMMA, + TOKEN_SEMICOLON, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + /* Keywords */ + TOKEN_FUNC, + TOKEN_LET, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_RETURN, +}; + +struct token { + enum token_type type; + struct slice literal; +}; + +void token_init_keywords(void); + +enum token_type token_lookup_ident(const struct slice *ident); + +inline const char *token_type_print(enum token_type); + +char *token_sprint(struct token *, char *str); + +void token_free_keywords(void); + +#endif diff --git a/include/vector.h b/include/vector.h new file mode 100644 index 0000000..a34ecea --- /dev/null +++ b/include/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include +#include + +#define VEC_CAP 32 + +struct vector { + size_t cap; + size_t len; + void **values; +}; + +struct vector *vector_new_with_cap(size_t cap); + +#define vector_new() vector_new_with_cap(VEC_CAP) + +ssize_t vector_push(struct vector *, void *val); + +void vector_free(struct vector *); + +#define vector_foreach(vec, i, val) \ + for (i = 0, val = vec->values[i]; i < vec->len; i++, val = vec->values[i]) + +#endif -- cgit v1.2.3