From 5d66c96a190a396a1535c89bed4e33c2a005fe8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yaroslav=20de=20la=20Pe=C3=B1a=20Smirnov?= Date: Thu, 24 Mar 2022 01:04:02 +0300 Subject: Initial commit Basically it works, just needs some polishing and maybe a couple of features that I could actually use. Also probably better docs. Not sure if it will be of use to anybody besides me. --- include/ast.h | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/hmap.h | 72 ++++++++++++++++ include/lexer.h | 33 ++++++++ include/object.h | 171 ++++++++++++++++++++++++++++++++++++++ include/parser.h | 54 ++++++++++++ include/roscha.h | 54 ++++++++++++ include/slice.h | 37 +++++++++ include/tests/tests.h | 45 ++++++++++ include/token.h | 84 +++++++++++++++++++ include/vector.h | 29 +++++++ 10 files changed, 800 insertions(+) create mode 100644 include/ast.h create mode 100644 include/hmap.h create mode 100644 include/lexer.h create mode 100644 include/object.h create mode 100644 include/parser.h create mode 100644 include/roscha.h create mode 100644 include/slice.h create mode 100644 include/tests/tests.h create mode 100644 include/token.h create mode 100644 include/vector.h (limited to 'include') diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..fe722bf --- /dev/null +++ b/include/ast.h @@ -0,0 +1,221 @@ +#ifndef ROSCHA_AST_H +#define ROSCHA_AST_H + +#include "hmap.h" +#include "token.h" +#include "vector.h" + +#include "sds/sds.h" + +/* AST node structures */ + +enum block_type { + BLOCK_CONTENT, + BLOCK_VARIABLE, + BLOCK_TAG, +}; + +enum tag_type { + TAG_IF, + TAG_FOR, + TAG_BLOCK, + TAG_EXTENDS, + /* keyword-only tags */ + TAG_BREAK, + TAG_CLOSE, +}; + +enum expression_type { + EXPRESSION_IDENT, + EXPRESSION_INT, + EXPRESSION_BOOL, + EXPRESSION_STRING, + EXPRESSION_PREFIX, + EXPRESSION_INFIX, + EXPRESSION_MAPKEY, + EXPRESSION_INDEX, +}; + +struct ident { + struct token token; +}; + +struct integer { + struct token token; + int64_t value; +}; + +struct boolean { + struct token token; + bool value; +}; + +struct string { + struct token token; + struct slice value; +}; + +struct prefix { + struct token token; + struct slice operator; + struct expression *right; +}; + +struct infix { + struct token token; + struct slice operator; + struct expression *left; + struct expression *right; +}; + +/* Either a map key (map.k) or an array/vector index (arr[i]) */ +struct indexkey { + struct token token; + struct expression *left; + struct expression *key; +}; + +struct expression { + enum expression_type type; + union { + struct token token; + struct ident ident; + struct integer integer; + struct boolean boolean; + struct string string; + struct prefix prefix; + struct infix infix; + struct indexkey indexkey; + }; +}; + +/* if, elif, else branch */ +struct branch { + struct token token; + /* if condition is null it means it is an else branch */ + struct expression *condition; + struct vector *subblocks; + /* elif or else */ + struct branch *next; +}; + +/* start of if, elif, else */ +struct cond { + struct token token; + struct branch *root; +}; + +/* for loop */ +struct loop { + struct token token; + struct ident item; + struct expression *seq; + struct vector *subblocks; +}; + +/* template block {% block ... %} */ +struct tblock { + struct token token; + struct ident name; + struct vector *subblocks; +}; + +/* {% extends ... %} */ +struct parent { + struct token token; + struct string *name; +}; + +/* {% ... %} blocks */ +struct tag { + union{ + struct token token; + struct cond cond; + struct loop loop; + struct tblock tblock; + struct parent parent; + }; + enum tag_type type; +}; + +/* {{ ... }} blocks */ +struct variable { + struct token token; + struct expression *expression; +}; + +/* blocks with content that doesn't need evaluation */ +struct content { + struct token token; +}; + +/* + * The template is divided into blocks or chunks which are either plain text + * content, {% %} tags or {{ }} variables. Not to be confused with + * {% block ... %} tags. + */ +struct block { + union { + struct token token; + struct content content; + struct tag tag; + struct variable variable; + }; + enum block_type type; +}; + +/* Root of the AST */ +struct template { + /* + * The name of the template, might be a file name; used to identifiy the + * template in error messages. Will be free'd by template_destroy function + * so a copy should be made if it is needed after destroying the AST. + */ + char *name; + /* + * The source text of the template before parsing. Should be free'd manually + * by the caller of roscha_env_render. + */ + char *source; + /* + * struct that holds references to {% block ... %} tags, for easier/faster + * access to said blocks. + */ + struct hmap *tblocks; + /* + * Holds a child template if there is one. Populated during evaluation, + * NULL'ed after evaluation, since a parent template can have different + * children depending on the context. + */ + struct template *child; + /* vector of blocks */ + struct vector *blocks; +}; + +/* Concatenate to an SDS string a human friendly representation of the node */ + +sds expression_string(struct expression *, sds str); + +sds tag_string(struct tag *, sds str); + +sds variable_string(struct variable *, sds str); + +sds content_string(struct content *, sds str); + +sds block_string(struct block *, sds str); + +sds template_string(struct template *, sds str); + +/* Free all memory related with the objects */ + +void branch_destroy(struct branch *); + +void tag_destroy(struct tag *); + +void expression_destroy(struct expression *); + +void block_destroy(struct block *); + +void template_destroy(struct template *); + +#endif diff --git a/include/hmap.h b/include/hmap.h new file mode 100644 index 0000000..fa24982 --- /dev/null +++ b/include/hmap.h @@ -0,0 +1,72 @@ +#ifndef ROSCHA_HASHMAP_H +#define ROSCHA_HASHMAP_H +#include +#include + +#include "slice.h" + +#ifndef HASHMAP_CAP +#define HASHMAP_CAP 32 +#endif + +typedef void (hmap_cb)(const struct slice *key, void *value); + +struct hmap { + struct hnode **buckets; + size_t cap; + size_t size; +}; + +struct hmap_iter; + +/* allocate a new hmap */ +struct hmap *hmap_new_with_cap(size_t cap); + +#define hmap_new() hmap_new_with_cap(HASHMAP_CAP) + +/* + * Inserts a key-value pair into the map. Returns NULL if map did not have key, + * old value if it did. + */ +void *hmap_sets(struct hmap *hm, struct slice key, void *value); + +/* Same as hmap_sets but pass a C string instead */ +#define hmap_set(h, k, v) hmap_sets(h, slice_whole(k), v) + +/* Returns a pointer to the value corresponding to the key. */ +void *hmap_gets(struct hmap *hm, const struct slice *key); + +/* Same as hmap_gets but pass a C string instead */ +void *hmap_get(struct hmap *hm, const char *key); + +/* + * Removes a key from the map, returning the value at the key if the key was + * previously in the map. + */ +void *hmap_removes(struct hmap *hm, const struct slice *key); + +/* Same as hmap_removes but pass a C string instead */ +void *hmap_remove(struct hmap *hm, const char *key); + +/* Iterate over keys in the hmap */ +void hmap_walk(struct hmap *hm, hmap_cb); + +/* Allocate a new hmap iterator */ +struct hmap_iter *hmap_iter_new(struct hmap *); + +/* Get the next key, value */ +bool hmap_iter_next(struct hmap_iter *iter, const struct slice **key, + void **value); + +#define hmap_iter_foreach(it, k, v) while (hmap_iter_next(it, k, v)) + +/* Free a hmap iterator */ +void hmap_iter_free(struct hmap_iter *iter); + +/* free hmap related memory calling a function before freeing each node */ +void hmap_destroy(struct hmap *hm, hmap_cb cb); + +/* free hmap related memory */ +void hmap_free(struct hmap *hm); + +#endif diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..8491c5a --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,33 @@ +#ifndef ROSCHA_LEXER_H +#define ROSCHA_LEXER_H + +#include "slice.h" +#include "token.h" + +#include +#include + +/* The lexer */ +struct lexer { + /* Source input */ + const char *input; + /* Length of input */ + size_t len; + /* The current slice of the input string that will be tokenized */ + struct slice word; + /* The current character belongs to content and should not be tokenized */ + bool in_content; + size_t line; + size_t column; +}; + +/* Allocate a new lexer with input as the source */ +struct lexer *lexer_new(const char *input); + +/* Get the next token from the lexer */ +struct token lexer_next_token(struct lexer *); + +/* Free all memory related to the lexer */ +void lexer_destroy(struct lexer *); + +#endif diff --git a/include/object.h b/include/object.h new file mode 100644 index 0000000..157ac0a --- /dev/null +++ b/include/object.h @@ -0,0 +1,171 @@ +#ifndef ROSCHA_OBJECT_H +#define ROSCHA_OBJECT_H + +#include "hmap.h" +#include "slice.h" +#include "vector.h" + +#include "sds/sds.h" + +#include +#include + +/* Types of roscha objects */ +enum roscha_type { + /* Only used internally; a variable that hasn't been set or defined. Does + * not have a correspoding field in the union. + */ + ROSCHA_NULL, + /* an integer number. */ + ROSCHA_INT, + /* Only used internally; a boolean value. */ + ROSCHA_BOOL, + /* A text string */ + ROSCHA_STRING, + /* A slice of a string; basically functions the same as a string */ + ROSCHA_SLICE, + /* A vector of roscha objects */ + ROSCHA_VECTOR, + /* A hashmap of roscha objects */ + ROSCHA_HMAP, +}; + +/* A reference counted object for use in the environment */ +struct roscha_object { + enum roscha_type type; + size_t refcount; + union { + /* booleans are only used internally */ + bool boolean; + /* integer numbers */ + int64_t integer; + /* A dynamic string using the sds library */ + sds string; + /* String slice */ + struct slice slice; + /* vector of roscha_objects */ + struct vector *vector; + /* hashmap of roscha_objects */ + struct hmap *hmap; + }; +}; + +/* Concatenate the textual representation of the object to an sds string */ +sds roscha_object_string(const struct roscha_object *, sds str); + +/* Return the textual representation of the type */ +inline const char *roscha_type_print(enum roscha_type); + +/* Create a new roscha object based on its type */ +struct roscha_object *roscha_object_new_int(int64_t val); +struct roscha_object *roscha_object_new_string(sds str); +struct roscha_object *roscha_object_new_slice(struct slice); +struct roscha_object *roscha_object_new_vector(struct vector *); +struct roscha_object *roscha_object_new_hmap(struct hmap *); + +#define roscha_object_new(v) _Generic((v), \ + int: roscha_object_new_int, \ + int64_t: roscha_object_new_int, \ + sds: roscha_object_new_string, \ + struct slice: roscha_object_new_slice, \ + struct vector *: roscha_object_new_vector, \ + struct hmap *: roscha_object_new_hmap \ + )(v) + +/* Increment reference count of object */ +void roscha_object_ref(struct roscha_object *); + +/* Decrement reference count of object */ +void roscha_object_unref(struct roscha_object *); + +/* + * Helper macro to create a roscha object wrapper and push to the vector in one + * line. + */ +#define roscha_vector_push_new(vec, val) \ + vector_push(vec->vector, roscha_object_new(val)) + +/* + * Helper function to push a value to a reference counted vector; increments the + * count after adding the value to it. + */ +void roscha_vector_push(struct roscha_object *vec, struct roscha_object *val); + +/* + * Removes and returns the last value from a reference counted vector; doesn't + * decrement the reference count since the value is returned. + */ +struct roscha_object *roscha_vector_pop(struct roscha_object *vec); + +/* + * Helper macro to create a roscha object wrapper and insert it to the hmap in + * one line. + */ +#define roscha_hmap_set_new(h, k, v) hmap_set(h->hmap, k, roscha_object_new(v)) + +/* + * Helper function to add a value to reference counted hmap; increments the + * count after adding the value; returns the old value if it was present in the + * hmap. + */ +struct roscha_object *roscha_hmap_sets(struct roscha_object *hmap, + struct slice key, struct roscha_object *value); + +/* Same as roscha_hmap_sets but use a C string instead */ +struct roscha_object *roscha_hmap_setstr(struct roscha_object *hmap, + const char *key, struct roscha_object *value); + +#define roscha_hmap_set(h, k, v) _Generic((k), \ + char *: roscha_hmap_setstr, \ + struct slice: roscha_hmap_sets \ + )(h, k, v) + +/* + * Get a value from a reference counted hmap; the value's reference count is not + * incremented, should be incremented by the receiver if needed. + */ +struct roscha_object *roscha_hmap_gets(struct roscha_object *hmap, + const struct slice *key); + +/* Same as roscha_hmap_gets but use a C string instead */ +struct roscha_object *roscha_hmap_getstr(struct roscha_object *hmap, + const char *key); + +#define roscha_hmap_get(h, k) _Generic((k), \ + char *: roscha_hmap_getstr, \ + struct slice *: roscha_hmap_gets \ + )(h, k) + +/* + * Remove a value from a reference counted hmap; the value's reference count is + * not decremented, since the value is returned. + */ +struct roscha_object *roscha_hmap_pops(struct roscha_object *hmap, + const struct slice *key); + +/* Same as roscha_hmap_pops but use a C string instead */ +struct roscha_object *roscha_hmap_popstr(struct roscha_object *hmap, + const char *key); + +#define roscha_hmap_pop(h, k) _Generic((k), \ + char *: roscha_hmap_popstr, \ + struct slice *: roscha_hmap_pops \ + )(h, k) + +/* + * Remove a value from a reference counted hmap; the value's reference count is + * decremented. + */ +void roscha_hmap_unsets(struct roscha_object *hmap, + const struct slice *key); + +/* Same as roscha_hmap_unsets but use a C string instead */ +void roscha_hmap_unsetstr(struct roscha_object *hmap, + const char *key); + +#define roscha_hmap_unset(h, k) _Generic((k), \ + char *: roscha_hmap_unsetstr, \ + struct slice *: roscha_hmap_unsets \ + )(h, k) + +#endif diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..bdf052f --- /dev/null +++ b/include/parser.h @@ -0,0 +1,54 @@ +#ifndef ROSCHA_PARSER_H +#define ROSCHA_PARSER_H + +#include "ast.h" +#include "hmap.h" +#include "lexer.h" +#include "token.h" +#include "vector.h" +#include "sds/sds.h" + +struct parser { + /* The name of the template; transfered to resulting template AST */ + char *name; + /* The lexer that is ought to tokenize our input */ + struct lexer *lexer; + /* Current token */ + struct token cur_token; + /* Next token */ + struct token peek_token; + /* + * Temporary field that holds {% block ... %} tags, for easier/faster + * access to said blocks without having to traverse all the AST, in case the + * template is a child template. This hashmap will be transfered to the + * resulting AST upon finishing parsing. + */ + struct hmap *tblocks; + /* vector of sds */ + struct vector *errors; +}; + +typedef struct expression *(*prefix_parse_f)(struct parser *); +typedef struct expression *(*infix_parse_f)(struct parser *, struct expression *); + +/* Allocate a new parser */ +struct parser *parser_new(char *name, char *input); + +/* Parse template into an AST */ +struct template *parser_parse_template(struct parser *); + +/* Free all memory asociated with the parser */ +void parser_destroy(struct parser *); + +/* + * Initialize variables needed for parsing; may be used by several parsers. + */ +void parser_init(void); + +/* + * Free all static memory related to parsing; called when parsing/evaluation is + * no longer needed + */ +void parser_deinit(void); + +#endif diff --git a/include/roscha.h b/include/roscha.h new file mode 100644 index 0000000..82a2062 --- /dev/null +++ b/include/roscha.h @@ -0,0 +1,54 @@ +#ifndef ROSCHA_H +#define ROSCHA_H + +#include "object.h" + +/* The environment for evaluation templates */ +struct roscha_env { + /* Template variables; reference counted hmap of roscha objects */ + struct roscha_object *vars; + /* vector of sds with error messages */ + struct vector *errors; + /* internal */ + struct roscha_ *internal; +}; + +/* + * Initialize variables needed for parsing; may be used by several roscha + * parsers. + */ +void roscha_init(void); + +/* + * Free all static memory related to roscha; called when parsing/evaluation is + * no longer needed + */ +void roscha_deinit(void); + +/* Allocate a new environment */ +struct roscha_env *roscha_env_new(void); + +/* + * Parse and add a template to the environment. Returns false upon encountering + * a parsing error. + */ +bool roscha_env_add_template(struct roscha_env *, char *name, char *body); + +/* + * Load and parse templates from dir (non-recursively). All non-dir files are + * read and parsed. Returns false if an error occurred. + */ +bool roscha_env_load_dir(struct roscha_env *, const char *path); + +/* Render/evaluate the template */ +sds roscha_env_render(struct roscha_env *, const char *name); + +struct vector *roscha_env_check_errors(struct roscha_env *env); + +/* + * Free all memory associated with the environment, including parsed templates, + * and reducing reference counts of objects. + */ +void roscha_env_destroy(struct roscha_env *); + +#endif diff --git a/include/slice.h b/include/slice.h new file mode 100644 index 0000000..add82d5 --- /dev/null +++ b/include/slice.h @@ -0,0 +1,37 @@ +#ifndef CMONKEY_SLICE_H +#define CMONKEY_SLICE_H + +#include "sds/sds.h" + +#include +#include + +/* A slice of a C string */ +struct slice { + const char *str; + size_t start; + size_t end; +}; + +/* Create a new slice from an existing string indicating its bounds */ +struct slice slice_new(const char *str, size_t start, size_t end); + +/* Create a new slice from a string literal */ +#define slice_whole(s) (struct slice){ s, 0, strlen(s), } + +/* Set a slice to a new string and bounds */ +void slice_set(struct slice *, const char *str, size_t start, size_t end); + +/* Get the length of the slice */ +size_t slice_len(const struct slice *); + +/* Returns 0 if equal, 1 if a > b, -1 if a < b */ +int slice_cmp(const struct slice *restrict a, const struct slice *restrict b); + +/* Copy the slice from src to dst; dst should already be allocated */ +void slice_cpy(struct slice *dst, const struct slice *src); + +/* Concatenate the slice to an SDS string */ +sds slice_string(const struct slice *, sds str); + +#endif diff --git a/include/tests/tests.h b/include/tests/tests.h new file mode 100644 index 0000000..8c89fcc --- /dev/null +++ b/include/tests/tests.h @@ -0,0 +1,45 @@ +#ifndef TESTS_H +#define TESTS_H +#include +#include + +#ifndef NOCOLOR +#define TBLD "\033[1m" +#define TRED "\033[31m" +#define TGRN "\033[32m" +#define TBLU "\033[34m" +#define TRST "\033[0m" +#else +#define TBLD "" +#define TRED "" +#define TGRN "" +#define TBLU "" +#define TRST "" +#endif + +#define RUN_TEST(test_func) \ + printf("%s:\t", #test_func); \ + fflush(stdout); \ + test_func(); \ + printf(TGRN "OK!\n" TRST) + +#define INIT_TESTS() \ + printf(TBLD "running %s tests\n" TRST, __FILE__) + +#define FAIL_TEST(reason) \ + printf(TBLD TRED "FAIL!\n" TRST); \ + printf("%s:%d: %s: ", __FILE__, __LINE__, __func__); \ + printf(reason); \ + abort() + +#define asserteq(a, b) \ + if (a != b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " == " #b TRST " failed\n"); \ + } + +#define assertneq(a, b) \ + if (a == b) { \ + FAIL_TEST("assertion " TBLD TBLU #a " != " #b TRST " failed\n"); \ + } + +#endif diff --git a/include/token.h b/include/token.h new file mode 100644 index 0000000..edff1d2 --- /dev/null +++ b/include/token.h @@ -0,0 +1,84 @@ +#ifndef ROSCHA_TOKEN_H +#define ROSCHA_TOKEN_H + +#include "slice.h" + +#include + +enum token_type { + TOKEN_ILLEGAL, + TOKEN_EOF, + /* Identifiers/Literals */ + TOKEN_IDENT, + TOKEN_INT, + TOKEN_STRING, + /* Operators */ + TOKEN_ASSIGN, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_BANG, + TOKEN_ASTERISK, + TOKEN_SLASH, + TOKEN_LT, + TOKEN_GT, + TOKEN_LTE, + TOKEN_GTE, + TOKEN_EQ, + TOKEN_NOTEQ, + /* Keyword-like operators */ + TOKEN_AND, + TOKEN_OR, + TOKEN_NOT, + /* Delimiters */ + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_POUND, + TOKEN_PERCENT, + /* Keywords */ + TOKEN_FOR, + TOKEN_IN, + TOKEN_BREAK, + TOKEN_ENDFOR, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_IF, + TOKEN_ELIF, + TOKEN_ELSE, + TOKEN_ENDIF, + TOKEN_EXTENDS, + TOKEN_BLOCK, + TOKEN_ENDBLOCK, + /* The document content */ + TOKEN_CONTENT, +}; + +/* A token in our template */ +struct token { + enum token_type type; + struct slice literal; + size_t line; + size_t column; +}; + +/* Intialize our keywords hashmap */ +void token_init_keywords(void); + +/* Get the token type for a keyword, if it is a registered keyword. */ +enum token_type token_lookup_ident(const struct slice *ident); + +/* Return a C string with the token type name */ +inline const char *token_type_print(enum token_type); + +/* Concatenate this token to a sds string */ +sds token_string(struct token *, sds str); + +/* Free memory allocated by the keywords hashmap */ +void token_free_keywords(void); + +#endif diff --git a/include/vector.h b/include/vector.h new file mode 100644 index 0000000..fc0eb4c --- /dev/null +++ b/include/vector.h @@ -0,0 +1,29 @@ +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include +#include + +#define VEC_CAP 32 + +struct vector { + size_t cap; + size_t len; + void **values; +}; + +struct vector *vector_new_with_cap(size_t cap); + +#define vector_new() vector_new_with_cap(VEC_CAP) + +ssize_t vector_push(struct vector *, void *val); + +void *vector_pop(struct vector *); + +void vector_free(struct vector *); + +#define vector_foreach(vec, i, val) \ + for (i = 0, val = vec->values[i]; i < vec->len; i++, val = vec->values[i]) + +#endif -- cgit v1.2.3