diff options
Diffstat (limited to 'src/lexer.c')
-rw-r--r-- | src/lexer.c | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..7ee9cbd --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,187 @@ +#include "lexer.h" +#include "token.h" + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> + +static bool +isidentc(char c) +{ + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +static void +lexer_eatspace(struct lexer *lexer) +{ + while(isspace(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } +} + +static void +set_token(struct token *token, enum token_type t, const struct slice *s) +{ + token->type = t; + if (s == NULL) { + token->literal.str = ""; + token->literal.start = 0; + token->literal.end = 0; + } else { + slice_cpy(&token->literal, s); + } +} + +static void +lexer_read_ident(struct lexer *lexer, struct token *token) +{ + size_t start = lexer->word.start; + token->literal.str = lexer->input; + while (isidentc(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } + token->literal.start = start; + token->literal.end = lexer->word.start; +} + +static void +lexer_read_num(struct lexer *lexer, struct token *token) +{ + size_t start = lexer->word.start; + token->literal.str = lexer->input; + while (isdigit(lexer->input[lexer->word.start])) { + lexer_read_char(lexer); + } + token->literal.start = start; + token->literal.end = lexer->word.start; +} + +static char +lexer_peek_char(struct lexer *lexer) +{ + if (lexer->word.start >= lexer->len) { + return 0; + } + return lexer->input[lexer->word.start + 1]; +} + +struct lexer * +lexer_new() +{ + struct lexer *lexer = malloc(sizeof *lexer); + token_init_keywords(); + + return lexer; +} + +void +lexer_read_char(struct lexer *lexer) +{ + lexer->word.start = lexer->word.end; + if (lexer->word.end >= lexer->len) { + lexer->word.end = 0; + return; + } + lexer->word.end++; +} + +struct token +lexer_next_token(struct lexer *lexer) +{ + lexer_eatspace(lexer); + + struct token token; + char c = lexer->input[lexer->word.start]; + + switch (c) { + case '=': + if (lexer_peek_char(lexer) == '=') { + lexer->word.end++; + set_token(&token, TOKEN_EQ, &lexer->word); + } else { + set_token(&token, TOKEN_ASSIGN, &lexer->word); + } + break; + case '+': + set_token(&token, TOKEN_PLUS, &lexer->word); + break; + case '-': + set_token(&token, TOKEN_MINUS, &lexer->word); + break; + case '!': + if (lexer_peek_char(lexer) == '=') { + lexer->word.end++; + set_token(&token, TOKEN_NOTEQ, &lexer->word); + } else { + set_token(&token, TOKEN_BANG, &lexer->word); + } + break; + case '/': + set_token(&token, TOKEN_SLASH, &lexer->word); + break; + case '*': + set_token(&token, TOKEN_ASTERISK, &lexer->word); + break; + case '<': + set_token(&token, TOKEN_LT, &lexer->word); + break; + case '>': + set_token(&token, TOKEN_GT, &lexer->word); + break; + case ';': + set_token(&token, TOKEN_SEMICOLON, &lexer->word); + break; + case '(': + set_token(&token, TOKEN_LPAREN, &lexer->word); + break; + case ')': + set_token(&token, TOKEN_RPAREN, &lexer->word); + break; + case ',': + set_token(&token, TOKEN_COMMA, &lexer->word); + break; + case '{': + set_token(&token, TOKEN_LBRACE, &lexer->word); + break; + case '}': + set_token(&token, TOKEN_RBRACE, &lexer->word); + break; + case 0: + set_token(&token, TOKEN_EOF, NULL); + break; + default: + if (isidentc(c)) { + lexer_read_ident(lexer, &token); + token.type = token_lookup_ident(&token.literal); + return token; + } else if (isdigit(c)) { + lexer_read_num(lexer, &token); + token.type = TOKEN_INT; + return token; + } + set_token(&token, TOKEN_ILLEGAL, &lexer->word); + } + + lexer_read_char(lexer); + + return token; +} + +void +lexer_reset(struct lexer *lexer, const char *input) +{ + lexer->input = input; + lexer->len = strlen(lexer->input); + lexer->word.str = lexer->input; + lexer->word.start = 0; + lexer->word.end = 0; + lexer_read_char(lexer); +} + +void +lexer_destroy(struct lexer *lexer) +{ + free(lexer); + token_free_keywords(); +} |