--- /dev/null
+/* lexer.c
+ * bareC lexer implementation (c source)
+ */
+
+#include "lexer.h"
+
+/* global state for lexer */
+static int g_currentChar = ' ';
+static FILE* g_input = NULL;
+
+/* advance: read next char from g_input */
+static void advance(void) { g_currentChar = fgetc(g_input); }
+
+/* isEOF: check if the current token is end-of-file */
+static bool isEOF(void) { return (g_currentChar == EOF); }
+
+/* skipWhitespace: skip over whitespace characters */
+static void skipWhitespace(void) { while (isspace(g_currentChar)) advance(); }
+
+/* skipSingleLineComment: skips over singleline comment on seeing `//` */
+static void skipSingleLineComment(void) {
+ advance(); // consume second `/`
+ while (!isEOF() && g_currentChar != '\n') advance();
+ if (!isEOF()) advance();
+}
+
+/* identifier helpers */
+static bool isIdentStart(int c) { return (isalpha(c) || c == '_'); }
+static bool isIdentChar(int c) { return (isalnum(c) || c == '_'); }
+
+/* makeToken: create token of given kind and lexeme */
+static Token makeToken(TokenKind kind, const char* lexeme) {
+ Token t;
+ t.kind = kind;
+ t.lexeme = strdup(lexeme);
+ t.intValue = 0;
+ return t;
+}
+
+/* makeNumberToken: create token with numeric literal value */
+static Token makeNumberToken(int value, const char *lexeme) {
+ Token t;
+ t.kind = TK_NUMBER;
+ t.intValue = value;
+ t.lexeme = strdup(lexeme);
+ return t;
+}
+
+/* getNextToken: return next token from stream */
+Token getNextToken(void) {
+ skipWhitespace();
+
+ if (isEOF()) return makeToken(TK_EOF, "EOF");
+
+ /* handle comments: `/` check next char also equals `/` */
+ if (g_currentChar == '/') {
+ int nextChar = fgetc(g_input);
+ ungetc(nextChar, g_input);
+ if (nextChar == '/') {
+ skipSingleLineComment();
+ return getNextToken();
+ } else {
+ advance();
+ return makeToken(TK_SLASH, "/");
+ }
+ }
+
+ /* Handle operators and punctuation */
+ if (g_currentChar == '=') {
+ int nextChar = fgetc(g_input);
+ if (nextChar == '=') {
+ advance(); // consume '='
+ advance(); // consume second '='
+ return makeToken(TK_EQ, "==");
+ } else {
+ ungetc(nextChar, g_input);
+ advance();
+ return makeToken(TK_ASSIGN, "=");
+ }
+ }
+
+ if (g_currentChar == '+') { advance(); return makeToken(TK_PLUS, "+"); }
+ if (g_currentChar == '-') { advance(); return makeToken(TK_MINUS, "-"); }
+ if (g_currentChar == '*') { advance(); return makeToken(TK_STAR, "*"); }
+ if (g_currentChar == '(') { advance(); return makeToken(TK_LPAREN, "("); }
+ if (g_currentChar == ')') { advance(); return makeToken(TK_RPAREN, ")"); }
+ if (g_currentChar == '{') { advance(); return makeToken(TK_LBRACE, "{"); }
+ if (g_currentChar == '}') { advance(); return makeToken(TK_RBRACE, "}"); }
+ if (g_currentChar == ';') { advance(); return makeToken(TK_SEMICOLON, ";"); }
+ if (g_currentChar == '>') { advance(); return makeToken(TK_GT, ">"); }
+
+ /* handle numeric literal */
+ if (isdigit(g_currentChar)) {
+ char buffer[64];
+ int idx = 0;
+ while (isdigit(g_currentChar) && idx < 63) {
+ buffer[idx++] = (char)g_currentChar;
+ advance();
+ }
+ buffer[idx] = '\0';
+ int value = atoi(buffer);
+ return makeNumberToken(value, buffer);
+ }
+
+ /* handle ident or keyword */
+ if (isIdentStart(g_currentChar)) {
+ char buffer[128];
+ int idx = 0;
+ while (isIdentChar(g_currentChar) && idx < 127) {
+ buffer[idx++] = (char)g_currentChar;
+ advance();
+ }
+ buffer[idx] = '\0';
+
+ if (strcmp(buffer, "int") == 0)
+ return makeToken(TK_INT, "int");
+ if (strcmp(buffer, "return") == 0)
+ return makeToken(TK_RETURN, "return");
+ if (strcmp(buffer, "if") == 0)
+ return makeToken(TK_IF, "if");
+ if (strcmp(buffer, "else") == 0)
+ return makeToken(TK_ELSE, "else");
+ if (strcmp(buffer, "while") == 0)
+ return makeToken(TK_WHILE, "while");
+ return makeToken(TK_IDENT, buffer);
+ }
+
+ /* else: unknown */
+ char unknownLexeme[2] = { (char)g_currentChar, '\0' };
+ advance();
+ return makeToken(TK_UNKNOWN, unknownLexeme);
+}
+
+/* initLexer: initialize lexer with source filename */
+void initLexer(int argc, char** argv) {
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <source file>\n", argv[0]);
+ exit(1);
+ }
+ g_input = fopen(argv[1], "r");
+ if (!g_input) {
+ fprintf(stderr, "Err: could not open '%s'\n", argv[1]);
+ exit(1);
+ }
+ advance();
+}
--- /dev/null
+/* parser.c
+ * bareC parser and ast structure (c source)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "lexer.h"
+
+/* ast definitions */
+typedef enum {
+ AST_FUNCDEF, // function def
+ AST_BLOCK, // compound statement block
+ AST_RETURN, // return statement
+ AST_IF, // if statement
+ AST_WHILE, // while loop
+ AST_EXPR_STMT, // expresison statement
+ AST_BINARY, // bunary expression
+ AST_NUM, // numeric literal
+ AST_IDENT, // identifier
+ AST_DECL // variable declaration
+} ASTKind;
+
+typedef struct ASTNode ASTNode;
+struct ASTNode {
+ ASTKind kind;
+ ASTNode* left; // for binary expr, condition of if, etc.
+ ASTNode* right; // second child
+ ASTNode* third; // for if: else
+ ASTNode** blockStatements; // compound blocks
+ int blockCount; // num statements in block
+ char* funcName; // for function definitions
+ ASTNode* funcBody; // body block
+ int intValue; // numeric literals
+ char* identName; // identifiers and operator string in binary nodes
+};
+
+/* utilities for creating new ast nodes */
+static ASTNode* newASTNode(ASTKind kind) {
+ ASTNode* node = (ASTNode*)calloc(1, sizeof(ASTNode));
+ node->kind = kind;
+ return node;
+}
+
+static ASTNode* newBinaryNode(ASTNode* lhs, ASTNode* rhs, const char* op) {
+ ASTNode* node = newASTNode(AST_BINARY);
+ node->left = lhs;
+ node->right = rhs;
+ node->identName = strdup(op);
+ return node;
+}
+
+static ASTNode* newNumNode(int value) {
+ ASTNode* node = newASTNode(AST_NUM);
+ node->intValue = value;
+ return node;
+}
+
+static ASTNode* newIdentNode(const char* name) {
+ ASTNode* node = newASTNode(AST_IDENT);
+ node->identName = strdup(name);
+ return node;
+}
+
+/* parsing state and utility */
+static Token g_currentToken;
+
+/* advance to the next token */
+static void nextToken(void) {
+ g_currentToken = getNextToken();
+}
+
+/* if match, consume */
+static bool match(TokenKind kind) {
+ if (g_currentToken.kind == kind) {
+ nextToken();
+ return true;
+ }
+ return false;
+}
+
+/* expect to be of a given kind, exit on error */
+static void expect(TokenKind kind) {
+ if (g_currentToken.kind == kind) {
+ nextToken();
+ } else {
+ fprintf(stderr, "Parse err: expected kind %d, got %d\n",
+ kind, g_currentToken.kind);
+ exit(1);
+ }
+}
+
+/* forward decl. */
+static ASTNode* parseExpression(void);
+static ASTNode* parseEquality(void);
+static ASTNode* parseAdditive(void);
+static ASTNode* parseTerm(void);
+static ASTNode* parseFactor(void);
+static ASTNode* parseDeclaration(void);
+static ASTNode* parseStatement(void);
+static ASTNode* parseCompoundStatement(void);
+static ASTNode* parseFunctionDefinition(void);
+static ASTNode* parseProgram(void);
+
+/* parsing */
+
+/* program := (function-definition)* EOF */
+
+static ASTNode* parseProgram(void) {
+ ASTNode* root = newASTNode(AST_BLOCK);
+ root->blockStatements = NULL;
+ root->blockCount = 0;
+ while (g_currentToken.kind != TK_EOF) {
+ ASTNode* func = parseFunctionDefinition();
+ root->blockCount++;
+ root->blockStatements = (ASTNode**)realloc(
+ root->blockStatements,
+ sizeof(ASTNode*) * root->blockCount
+ );
+ root->blockStatements[root->blockCount - 1] = func;
+ }
+ return root;
+}
+
+/* function-definition := "int" IDENT "(" ")" compound-statement */
+static ASTNode* parseFunctionDefinition(void) {
+ expect(TK_INT);
+ if (g_currentToken.kind != TK_IDENT) {
+ fprintf(stderr, "Parse error: expected identifier after 'int'\n");
+ exit(1);
+ }
+ /* Duplicate the function name string */
+ char* funcName = strdup(g_currentToken.lexeme);
+ nextToken();
+ expect(TK_LPAREN);
+ expect(TK_RPAREN);
+ ASTNode* body = parseCompoundStatement();
+ ASTNode* funcNode = newASTNode(AST_FUNCDEF);
+ funcNode->funcName = funcName;
+ funcNode->funcBody = body;
+ return funcNode;
+}
+
+/* compound-statement := "{" statement* "}" */
+static ASTNode* parseCompoundStatement(void) {
+ ASTNode* blockNode = newASTNode(AST_BLOCK);
+ blockNode->blockStatements = NULL;
+ blockNode->blockCount = 0;
+ expect(TK_LBRACE);
+ while (g_currentToken.kind != TK_RBRACE) {
+ ASTNode* stmt = parseStatement();
+ blockNode->blockCount++;
+ blockNode->blockStatements = (ASTNode**)realloc(
+ blockNode->blockStatements,
+ sizeof(ASTNode*) * blockNode->blockCount
+ );
+ blockNode->blockStatements[blockNode->blockCount - 1] = stmt;
+ }
+ expect(TK_RBRACE);
+ return blockNode;
+}
+
+/* statement :=
+ return-statement
+ | if-statement
+ | while-statement
+ | expression-statement
+ | compound-statement
+*/
+static ASTNode* parseStatement(void) {
+ if (match(TK_RETURN)) {
+ ASTNode* node = newASTNode(AST_RETURN);
+ if (g_currentToken.kind != TK_SEMICOLON)
+ node->left = parseExpression();
+ expect(TK_SEMICOLON);
+ return node;
+ } else if (match(TK_IF)) {
+ ASTNode* node = newASTNode(AST_IF);
+ expect(TK_LPAREN);
+ node->left = parseExpression();
+ expect(TK_RPAREN);
+ node->right = parseStatement();
+ if (match(TK_ELSE))
+ node->third = parseStatement();
+ return node;
+ } else if (match(TK_WHILE)) {
+ ASTNode* node = newASTNode(AST_WHILE);
+ expect(TK_LPAREN);
+ node->left = parseExpression();
+ expect(TK_RPAREN);
+ node->right = parseStatement();
+ return node;
+ } else if (g_currentToken.kind == TK_LBRACE) {
+ return parseCompoundStatement();
+ } else if (g_currentToken.kind == TK_INT) {
+ return parseDeclaration();
+ } else {
+ /* Expression-statement or empty statement */
+ ASTNode* node = newASTNode(AST_EXPR_STMT);
+ if (g_currentToken.kind != TK_SEMICOLON)
+ node->left = parseExpression();
+ expect(TK_SEMICOLON);
+ return node;
+ }
+}
+
+/* expression := equality */
+static ASTNode* parseExpression(void) {
+ return parseEquality();
+}
+
+/* equality := additive ( "==" additive )* */
+static ASTNode* parseEquality(void) {
+ ASTNode* node = parseAdditive();
+ while (g_currentToken.kind == TK_EQ || g_currentToken.kind == TK_GT) {
+ TokenKind op = g_currentToken.kind;
+ nextToken();
+ ASTNode* rhs = parseAdditive();
+ if (op == TK_EQ)
+ node = newBinaryNode(node, rhs, "==");
+ else if (op == TK_GT)
+ node = newBinaryNode(node, rhs, ">");
+ }
+ return node;
+}
+
+/* additive := term ( ("+" | "-") term )* */
+static ASTNode* parseAdditive(void) {
+ ASTNode* node = parseTerm();
+ while (true) {
+ if (match(TK_PLUS)) {
+ ASTNode* rhs = parseTerm();
+ node = newBinaryNode(node, rhs, "+");
+ } else if (match(TK_MINUS)) {
+ ASTNode* rhs = parseTerm();
+ node = newBinaryNode(node, rhs, "-");
+ } else {
+ break;
+ }
+ }
+ return node;
+}
+
+/* term := factor ( ("*" | "/") factor )* */
+static ASTNode* parseTerm(void) {
+ ASTNode* node = parseFactor();
+ while (true) {
+ if (match(TK_STAR)) {
+ ASTNode* rhs = parseFactor();
+ node = newBinaryNode(node, rhs, "*");
+ } else if (match(TK_SLASH)) {
+ ASTNode* rhs = parseFactor();
+ node = newBinaryNode(node, rhs, "/");
+ } else {
+ break;
+ }
+ }
+ return node;
+}
+
+/* factor := "(" expression ")" | IDENT | NUMBER */
+static ASTNode* parseFactor(void) {
+ if (match(TK_LPAREN)) {
+ ASTNode* node = parseExpression();
+ expect(TK_RPAREN);
+ return node;
+ }
+ if (g_currentToken.kind == TK_IDENT) {
+ ASTNode* node = newIdentNode(g_currentToken.lexeme);
+ nextToken();
+ return node;
+ }
+ if (g_currentToken.kind == TK_NUMBER) {
+ ASTNode* node = newNumNode(g_currentToken.intValue);
+ nextToken();
+ return node;
+ }
+ fprintf(stderr, "Parse err: unexpected token '%s' in factor\n", g_currentToken.lexeme);
+ exit(1);
+ return NULL; // unreachable, but silences compiler warnings.
+}
+
+/* declaration */
+static ASTNode* parseDeclaration(void) {
+ expect(TK_INT);
+
+ if (g_currentToken.kind != TK_IDENT) {
+ fprintf(stderr, "Parse err: expected identifier in declaration, got '%s'\n", g_currentToken.lexeme);
+ exit(1);
+ }
+ char *varName = strdup(g_currentToken.lexeme);
+ nextToken();
+
+ ASTNode* decl = newASTNode(AST_DECL);
+ decl->identName = varName;
+
+ if (match(TK_ASSIGN)) { decl->left = parseExpression(); }
+
+ expect(TK_SEMICOLON);
+ return decl;
+}
+
+
+/* demonstration: ast printing*/
+static void printIndent(int indent) {
+ for (int i = 0; i < indent; i++)
+ printf(" ");
+}
+
+static void printAST(ASTNode* node, int indent) {
+ if (!node) return;
+ switch (node->kind) {
+ case AST_FUNCDEF:
+ printIndent(indent);
+ printf("FunctionDef: name=%s\n", node->funcName);
+ printAST(node->funcBody, indent + 1);
+ break;
+ case AST_BLOCK:
+ printIndent(indent);
+ printf("{\n");
+ for (int i = 0; i < node->blockCount; i++)
+ printAST(node->blockStatements[i], indent + 1);
+ printIndent(indent);
+ printf("}\n");
+ break;
+ case AST_RETURN:
+ printIndent(indent);
+ printf("Return\n");
+ if (node->left)
+ printAST(node->left, indent + 1);
+ break;
+ case AST_IF:
+ printIndent(indent);
+ printf("If\n");
+ printIndent(indent + 1);
+ printf("Condition:\n");
+ printAST(node->left, indent + 2);
+ printIndent(indent + 1);
+ printf("Then:\n");
+ printAST(node->right, indent + 2);
+ if (node->third) {
+ printIndent(indent + 1);
+ printf("Else:\n");
+ printAST(node->third, indent + 2);
+ }
+ break;
+ case AST_WHILE:
+ printIndent(indent);
+ printf("While\n");
+ printIndent(indent + 1);
+ printf("Condition:\n");
+ printAST(node->left, indent + 2);
+ printIndent(indent + 1);
+ printf("Body:\n");
+ printAST(node->right, indent + 2);
+ break;
+ case AST_EXPR_STMT:
+ printIndent(indent);
+ printf("ExprStmt\n");
+ if (node->left)
+ printAST(node->left, indent + 1);
+ break;
+ case AST_BINARY:
+ printIndent(indent);
+ printf("BinaryOp (%s)\n", node->identName);
+ printAST(node->left, indent + 1);
+ printAST(node->right, indent + 1);
+ break;
+ case AST_NUM:
+ printIndent(indent);
+ printf("Number (%d)\n", node->intValue);
+ break;
+ case AST_IDENT:
+ printIndent(indent);
+ printf("Identifier (%s)\n", node->identName);
+ break;
+ default:
+ printIndent(indent);
+ printf("Unknown AST node kind\n");
+ break;
+ }
+}
+
+int main(int argc, char** argv) {
+ /* initialize the lexer with the source filename */
+ initLexer(argc, argv);
+ nextToken(); // prime the token stream
+
+ ASTNode* root = parseProgram();
+
+ printf("=== AST DUMP ===\n");
+ printAST(root, 0);
+
+ return 0;
+}