#include "lexer.h" #include "token.h" #include #include #include #include static bool isidentc(char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; } static void lexer_eatspace(struct lexer *lexer) { while(isspace(lexer->input[lexer->word.start])) { lexer_read_char(lexer); } } static void set_token(struct token *token, enum token_type t, const struct slice *s) { token->type = t; if (s == NULL) { token->literal.str = ""; token->literal.start = 0; token->literal.end = 0; } else { slice_cpy(&token->literal, s); } } static void lexer_read_ident(struct lexer *lexer, struct token *token) { size_t start = lexer->word.start; token->literal.str = lexer->input; while (isidentc(lexer->input[lexer->word.start])) { lexer_read_char(lexer); } token->literal.start = start; token->literal.end = lexer->word.start; } static void lexer_read_num(struct lexer *lexer, struct token *token) { size_t start = lexer->word.start; token->literal.str = lexer->input; while (isdigit(lexer->input[lexer->word.start])) { lexer_read_char(lexer); } token->literal.start = start; token->literal.end = lexer->word.start; } static char lexer_peek_char(struct lexer *lexer) { if (lexer->word.start >= lexer->len) { return 0; } return lexer->input[lexer->word.start + 1]; } struct lexer * lexer_new() { struct lexer *lexer = malloc(sizeof *lexer); token_init_keywords(); return lexer; } void lexer_read_char(struct lexer *lexer) { lexer->word.start = lexer->word.end; if (lexer->word.end >= lexer->len) { lexer->word.end = 0; return; } lexer->word.end++; } struct token lexer_next_token(struct lexer *lexer) { lexer_eatspace(lexer); struct token token; char c = lexer->input[lexer->word.start]; switch (c) { case '=': if (lexer_peek_char(lexer) == '=') { lexer->word.end++; set_token(&token, TOKEN_EQ, &lexer->word); } else { set_token(&token, TOKEN_ASSIGN, &lexer->word); } break; case '+': set_token(&token, TOKEN_PLUS, &lexer->word); break; case '-': set_token(&token, TOKEN_MINUS, &lexer->word); break; case '!': if (lexer_peek_char(lexer) == '=') { lexer->word.end++; set_token(&token, TOKEN_NOTEQ, &lexer->word); } else { set_token(&token, TOKEN_BANG, &lexer->word); } break; case '/': set_token(&token, TOKEN_SLASH, &lexer->word); break; case '*': set_token(&token, TOKEN_ASTERISK, &lexer->word); break; case '<': set_token(&token, TOKEN_LT, &lexer->word); break; case '>': set_token(&token, TOKEN_GT, &lexer->word); break; case ';': set_token(&token, TOKEN_SEMICOLON, &lexer->word); break; case '(': set_token(&token, TOKEN_LPAREN, &lexer->word); break; case ')': set_token(&token, TOKEN_RPAREN, &lexer->word); break; case ',': set_token(&token, TOKEN_COMMA, &lexer->word); break; case '{': set_token(&token, TOKEN_LBRACE, &lexer->word); break; case '}': set_token(&token, TOKEN_RBRACE, &lexer->word); break; case 0: set_token(&token, TOKEN_EOF, NULL); break; default: if (isidentc(c)) { lexer_read_ident(lexer, &token); token.type = token_lookup_ident(&token.literal); return token; } else if (isdigit(c)) { lexer_read_num(lexer, &token); token.type = TOKEN_INT; return token; } set_token(&token, TOKEN_ILLEGAL, &lexer->word); } lexer_read_char(lexer); return token; } void lexer_reset(struct lexer *lexer, const char *input) { lexer->input = input; lexer->len = strlen(lexer->input); lexer->word.str = lexer->input; lexer->word.start = 0; lexer->word.end = 0; lexer_read_char(lexer); } void lexer_destroy(struct lexer *lexer) { free(lexer); token_free_keywords(); }