aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c187
1 files changed, 187 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..7ee9cbd
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,187 @@
+#include "lexer.h"
+#include "token.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+static bool
+isidentc(char c)
+{
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
+}
+
+static void
+lexer_eatspace(struct lexer *lexer)
+{
+ while(isspace(lexer->input[lexer->word.start])) {
+ lexer_read_char(lexer);
+ }
+}
+
+static void
+set_token(struct token *token, enum token_type t, const struct slice *s)
+{
+ token->type = t;
+ if (s == NULL) {
+ token->literal.str = "";
+ token->literal.start = 0;
+ token->literal.end = 0;
+ } else {
+ slice_cpy(&token->literal, s);
+ }
+}
+
+static void
+lexer_read_ident(struct lexer *lexer, struct token *token)
+{
+ size_t start = lexer->word.start;
+ token->literal.str = lexer->input;
+ while (isidentc(lexer->input[lexer->word.start])) {
+ lexer_read_char(lexer);
+ }
+ token->literal.start = start;
+ token->literal.end = lexer->word.start;
+}
+
+static void
+lexer_read_num(struct lexer *lexer, struct token *token)
+{
+ size_t start = lexer->word.start;
+ token->literal.str = lexer->input;
+ while (isdigit(lexer->input[lexer->word.start])) {
+ lexer_read_char(lexer);
+ }
+ token->literal.start = start;
+ token->literal.end = lexer->word.start;
+}
+
+static char
+lexer_peek_char(struct lexer *lexer)
+{
+ if (lexer->word.start >= lexer->len) {
+ return 0;
+ }
+ return lexer->input[lexer->word.start + 1];
+}
+
+struct lexer *
+lexer_new()
+{
+ struct lexer *lexer = malloc(sizeof *lexer);
+ token_init_keywords();
+
+ return lexer;
+}
+
+void
+lexer_read_char(struct lexer *lexer)
+{
+ lexer->word.start = lexer->word.end;
+ if (lexer->word.end >= lexer->len) {
+ lexer->word.end = 0;
+ return;
+ }
+ lexer->word.end++;
+}
+
+struct token
+lexer_next_token(struct lexer *lexer)
+{
+ lexer_eatspace(lexer);
+
+ struct token token;
+ char c = lexer->input[lexer->word.start];
+
+ switch (c) {
+ case '=':
+ if (lexer_peek_char(lexer) == '=') {
+ lexer->word.end++;
+ set_token(&token, TOKEN_EQ, &lexer->word);
+ } else {
+ set_token(&token, TOKEN_ASSIGN, &lexer->word);
+ }
+ break;
+ case '+':
+ set_token(&token, TOKEN_PLUS, &lexer->word);
+ break;
+ case '-':
+ set_token(&token, TOKEN_MINUS, &lexer->word);
+ break;
+ case '!':
+ if (lexer_peek_char(lexer) == '=') {
+ lexer->word.end++;
+ set_token(&token, TOKEN_NOTEQ, &lexer->word);
+ } else {
+ set_token(&token, TOKEN_BANG, &lexer->word);
+ }
+ break;
+ case '/':
+ set_token(&token, TOKEN_SLASH, &lexer->word);
+ break;
+ case '*':
+ set_token(&token, TOKEN_ASTERISK, &lexer->word);
+ break;
+ case '<':
+ set_token(&token, TOKEN_LT, &lexer->word);
+ break;
+ case '>':
+ set_token(&token, TOKEN_GT, &lexer->word);
+ break;
+ case ';':
+ set_token(&token, TOKEN_SEMICOLON, &lexer->word);
+ break;
+ case '(':
+ set_token(&token, TOKEN_LPAREN, &lexer->word);
+ break;
+ case ')':
+ set_token(&token, TOKEN_RPAREN, &lexer->word);
+ break;
+ case ',':
+ set_token(&token, TOKEN_COMMA, &lexer->word);
+ break;
+ case '{':
+ set_token(&token, TOKEN_LBRACE, &lexer->word);
+ break;
+ case '}':
+ set_token(&token, TOKEN_RBRACE, &lexer->word);
+ break;
+ case 0:
+ set_token(&token, TOKEN_EOF, NULL);
+ break;
+ default:
+ if (isidentc(c)) {
+ lexer_read_ident(lexer, &token);
+ token.type = token_lookup_ident(&token.literal);
+ return token;
+ } else if (isdigit(c)) {
+ lexer_read_num(lexer, &token);
+ token.type = TOKEN_INT;
+ return token;
+ }
+ set_token(&token, TOKEN_ILLEGAL, &lexer->word);
+ }
+
+ lexer_read_char(lexer);
+
+ return token;
+}
+
+void
+lexer_reset(struct lexer *lexer, const char *input)
+{
+ lexer->input = input;
+ lexer->len = strlen(lexer->input);
+ lexer->word.str = lexer->input;
+ lexer->word.start = 0;
+ lexer->word.end = 0;
+ lexer_read_char(lexer);
+}
+
+void
+lexer_destroy(struct lexer *lexer)
+{
+ free(lexer);
+ token_free_keywords();
+}