From: Thomas Bruce Date: Thu, 20 Feb 2025 05:56:24 +0000 (-0500) Subject: properly parse function calls X-Git-Url: https://git.deglebe.com/sitemap.xml?a=commitdiff_plain;h=d10c822e78b6b66ab6e3b0765e267501e26d5f14;p=barec%2Fbarec.git properly parse function calls --- diff --git a/docs/lexer-doc.txt b/docs/lexer-doc.txt new file mode 100644 index 0000000..5040973 --- /dev/null +++ b/docs/lexer-doc.txt @@ -0,0 +1,57 @@ +bareC lexer documentation +date: 20/02/2025 +========================= + +overview: +--------- +the bareC lexer converts the raw source code into a stream of tokens that the +parser can easily process. it reads the source file character-by-character, +skips whitespace and comments (single-line only), and recognizes: + - keywords + - identifiers + - literals (integers) + - operators (+, -, /, *, ==, =, >) + - punctuation + +token types: +------------ +the lexer recognizes the following token types: + - keywords: "int", "return", "if", "else", "while" + - identifiers: variable and function names + - literals: integer literals + - operators: "=", "==", ">", "+", "-", "/", "*" + - punctuation: "(", ")", "{", "}", ";", "," + - end-of-file: eof and unknown + +EBNF for lexical elements: +-------------------------- +1. letters and digits: + = "A" | "B" | ... | "Z" | "a" | "b" | ... | "z" ; + = "0" | "1" | ... | "9" ; + +2. identifier: + = ( | "_" ) { | | "_" } ; + +3. integer literal: + = { }; + +TODO: keywords, operators + +whitespace and comments: +------------------------ + - whitespace (space, tab, newline) is skipped + - single-line comments start with "//" and terminate at line end + - unrecognized returns as TK_UNKNOWN + +processing flow: +---------------- +1. open the source file and prime the first character +2. repeat: + - skip whitespace and comments + - check current character: + - if digit -> accumulate a number + - if letter or _, accumulate an identifier and check reserved + - match multi-character operators + - directly return tokens +3. continue until EOF + diff --git a/lexer.c b/lexer.c index d259454..9098221 100644 --- a/lexer.c +++ b/lexer.c @@ -87,6 +87,7 @@ Token getNextToken(void) { if (g_currentChar == '{') { advance(); return makeToken(TK_LBRACE, "{"); } if (g_currentChar == '}') { advance(); return makeToken(TK_RBRACE, "}"); } if (g_currentChar == ';') { advance(); return makeToken(TK_SEMICOLON, ";"); } + if (g_currentChar == ',') { advance(); return makeToken(TK_COMMA, ","); } if (g_currentChar == '>') { advance(); return makeToken(TK_GT, ">"); } /* handle numeric literal */