From 1e53c78157bc00ec7b8f25e453206ddfef2e026e Mon Sep 17 00:00:00 2001 From: Thomas Bruce Date: Fri, 21 Feb 2025 11:39:59 -0500 Subject: [PATCH] add bi-token equalities (GTEQ, LTEQ) --- lexer.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++------ lexer.h | 8 +++- parser.c | 24 +++++++++++- 3 files changed, 128 insertions(+), 13 deletions(-) diff --git a/lexer.c b/lexer.c index 9098221..1c56c67 100644 --- a/lexer.c +++ b/lexer.c @@ -34,6 +34,7 @@ static Token makeToken(TokenKind kind, const char* lexeme) { t.kind = kind; t.lexeme = strdup(lexeme); t.intValue = 0; + t.floatValue = 0; return t; } @@ -43,9 +44,70 @@ static Token makeNumberToken(int value, const char *lexeme) { t.kind = TK_NUMBER; t.intValue = value; t.lexeme = strdup(lexeme); + t.floatValue = 0.0; return t; } +/* makeFloatToken: create token for floating literals */ +static Token makeFloatToken(double value, const char* lexeme) { + Token t; + t.kind = TK_FLOATLIT; + t.floatValue = value; + t.lexeme = strdup(lexeme); + t.intValue = 0; + return t; +} + +/* lexNum: lexing numeric literals */ +static Token lexNum(void) { + char buffer[256]; + int idx = 0; + bool hasDot = false; + bool hasExponent = false; + + /* capture digits */ + while (isdigit(g_currentChar) && idx < 255) { + buffer[idx++] = (char)g_currentChar; + advance(); + } + + /* check for dot */ + if (g_currentChar == '.') { + hasDot = true; + buffer[idx++] = (char)g_currentChar; + advance(); + while (isdigit(g_currentChar) && idx < 255) { + buffer[idx++] = (char)g_currentChar; + advance(); + } + } + + /* check for exponent */ + if (g_currentChar == 'e' || g_currentChar == 'E') { + hasExponent = true; + buffer[idx++] = (char)g_currentChar; + advance(); + if (g_currentChar == '+' || g_currentChar == '-') { + buffer[idx++] = (char)g_currentChar; + advance(); + } + while (isdigit(g_currentChar) && idx <255) { + buffer[idx++] = (char)g_currentChar; + advance(); + } + } + + buffer[idx] = '\0'; + + if (hasDot || hasExponent) { + double value = strtod(buffer, NULL); + return makeFloatToken(value, buffer); + } else { + int value = atoi(buffer); + return makeNumberToken(value, buffer); + } +} + /* getNextToken: return next token from stream */ Token getNextToken(void) { skipWhitespace(); @@ -65,7 +127,7 @@ Token getNextToken(void) { } } - /* Handle operators and punctuation */ + /* handle operators and punctuation */ if (g_currentChar == '=') { int nextChar = fgetc(g_input); if (nextChar == '=') { @@ -79,6 +141,33 @@ Token getNextToken(void) { } } + /* dual equalities */ + if (g_currentChar == '<') { + int nextChar = fgetc(g_input); + if (nextChar == '=') { + advance(); + advance(); + return makeToken(TK_LTEQ, "<="); + } else { + ungetc(nextChar, g_input); + advance(); + return makeToken(TK_LT, "<"); + } + } + + if (g_currentChar == '>') { + int nextChar = fgetc(g_input); + if (nextChar == '=') { + advance(); + advance(); + return makeToken(TK_GTEQ, ">="); + } else { + ungetc(nextChar, g_input); + advance(); + return makeToken(TK_GT, ">"); + } + } + if (g_currentChar == '+') { advance(); return makeToken(TK_PLUS, "+"); } if (g_currentChar == '-') { advance(); return makeToken(TK_MINUS, "-"); } if (g_currentChar == '*') { advance(); return makeToken(TK_STAR, "*"); } @@ -88,19 +177,17 @@ Token getNextToken(void) { if (g_currentChar == '}') { advance(); return makeToken(TK_RBRACE, "}"); } if (g_currentChar == ';') { advance(); return makeToken(TK_SEMICOLON, ";"); } if (g_currentChar == ',') { advance(); return makeToken(TK_COMMA, ","); } - if (g_currentChar == '>') { advance(); return makeToken(TK_GT, ">"); } + //if (g_currentChar == '>') { advance(); return makeToken(TK_GT, ">"); } + //if (g_currentChar == '<') { advance(); return makeToken(TK_LT, "<"); } /* handle numeric literal */ if (isdigit(g_currentChar)) { - char buffer[64]; - int idx = 0; - while (isdigit(g_currentChar) && idx < 63) { - buffer[idx++] = (char)g_currentChar; - advance(); - } - buffer[idx] = '\0'; - int value = atoi(buffer); - return makeNumberToken(value, buffer); + return lexNum(); + } + if (g_currentChar == '.') { + int next = fgetc(g_input); + ungetc(next, g_input); + if (isdigit(next)) return lexNum(); } /* handle ident or keyword */ diff --git a/lexer.h b/lexer.h index 4fc77dd..b12fb0d 100644 --- a/lexer.h +++ b/lexer.h @@ -17,7 +17,10 @@ typedef enum { TK_WHILE, // `while` TK_IDENT, // identifier TK_NUMBER, // integer literal + TK_FLOATLIT, // fp numeric literal TK_EQ, // `==` + TK_GTEQ, // `>=` + TK_LTEQ, // `<=` TK_ASSIGN, // `=` TK_PLUS, // `+` TK_MINUS, // `-` @@ -28,8 +31,10 @@ typedef enum { TK_LBRACE, // `{` TK_RBRACE, // `}` TK_SEMICOLON, // `;` - TK_COMMA, + TK_COMMA, // `,` + TK_DOT, // `.` TK_GT, // `>` + TK_LT, // `<` TK_EOF, // EOF TK_UNKNOWN // UNKNOWN } TokenKind; @@ -39,6 +44,7 @@ typedef struct { TokenKind kind; char* lexeme; // textual representation int intValue; // numeric literal value + double floatValue; } Token; /* initialize the lexer; take source filename */ diff --git a/parser.c b/parser.c index 0bac1fa..e78f972 100644 --- a/parser.c +++ b/parser.c @@ -19,6 +19,7 @@ typedef enum { AST_EXPR_STMT, // expresison statement AST_BINARY, // bunary expression AST_NUM, // numeric literal + AST_FLOAT, // fp literal AST_IDENT, // identifier AST_DECL, // variable declaration AST_CALL // function calls @@ -35,6 +36,7 @@ struct ASTNode { char* funcName; // for function definitions ASTNode* funcBody; // body block int intValue; // numeric literals + double floatValue; char* identName; // identifiers and operator string in binary nodes ASTNode** args; // array of argument nodes int argCount; // number of arguments @@ -63,6 +65,12 @@ static ASTNode* newNumNode(int value) { return node; } +static ASTNode* newFloatNode(double value) { + ASTNode* node = newASTNode(AST_FLOAT); + node->floatValue = value; + return node; +} + static ASTNode* newIdentNode(const char* name) { ASTNode* node = newASTNode(AST_IDENT); node->identName = strdup(name); @@ -251,7 +259,9 @@ static ASTNode* parseExpression(void) { /* equality := additive ( "==" additive )* */ static ASTNode* parseEquality(void) { ASTNode* node = parseAdditive(); - while (g_currentToken.kind == TK_EQ || g_currentToken.kind == TK_GT) { + while (g_currentToken.kind == TK_EQ || g_currentToken.kind == TK_GT + || g_currentToken.kind == TK_LTEQ + || g_currentToken.kind == TK_GTEQ) { TokenKind op = g_currentToken.kind; nextToken(); ASTNode* rhs = parseAdditive(); @@ -259,6 +269,10 @@ static ASTNode* parseEquality(void) { node = newBinaryNode(node, rhs, "=="); else if (op == TK_GT) node = newBinaryNode(node, rhs, ">"); + else if (op == TK_LTEQ) + node = newBinaryNode(node, rhs, "<="); + else if (op == TK_GTEQ) + node = newBinaryNode(node, rhs, ">="); } return node; } @@ -345,6 +359,10 @@ static ASTNode* parseFactor(void) { nextToken(); return node; } + if (g_currentToken.kind == TK_FLOATLIT) { + ASTNode* node = newFloatNode(g_currentToken.floatValue); + return node; + } fprintf(stderr, "Parse err: unexpected token '%s' in factor\n", g_currentToken.lexeme); exit(1); return NULL; // unreachable, but silences compiler warnings. @@ -454,6 +472,10 @@ static void printAST(ASTNode* node, int indent) { printIndent(indent); printf("Number (%d)\n", node->intValue); break; + case AST_FLOAT: + printIndent(indent); + printf("Float (%d)\n", node->floatValue); + break; case AST_IDENT: printIndent(indent); printf("Identifier (%s)\n", node->identName); -- 2.39.5