mrcrilly · September 4, 2024 01:57
diff --git a/ml_lexer.c b/ml_lexer.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>

 #define MAX_TOKENS 1000
 #define MAX_TOKEN_LENGTH 100

 typedef enum {
    TOKEN_IDENTIFIER,
    TOKEN_NUMBER,
    TOKEN_ASSIGN,
    TOKEN_PRINT,
    TOKEN_RETURN,
    TOKEN_MULTIPLY,
    TOKEN_ADD,
    TOKEN_SUBTRACT,
    TOKEN_DIVIDE,
    TOKEN_LPAREN,
    TOKEN_RPAREN,
    TOKEN_FUNCTION,
    TOKEN_EOF
 } TokenType;

 typedef struct {
    TokenType type;
    char value[MAX_TOKEN_LENGTH];
 } Token;

 Token tokens[MAX_TOKENS];
 int tokenIndex = 0;
 int currentToken = 0;

 void tokenize(const char *input) {
    const char *p = input;
    while (*p) {
        if (isspace(*p)) {
            p++;
        } else if (isdigit(*p)) {
            tokens[tokenIndex].type = TOKEN_NUMBER;
            int len = 0;
            while (isdigit(*p)) {
                tokens[tokenIndex].value[len++] = *p++;
            }
            tokens[tokenIndex].value[len] = '\0';
            tokenIndex++;
        } else if (isalpha(*p)) {
            int len = 0;
            while (isalnum(*p)) {
                tokens[tokenIndex].value[len++] = *p++;
            }
            tokens[tokenIndex].value[len] = '\0';
            if (strcmp(tokens[tokenIndex].value, "print") == 0) {
                tokens[tokenIndex].type = TOKEN_PRINT;
            } else if (strcmp(tokens[tokenIndex].value, "return") == 0) {
                tokens[tokenIndex].type = TOKEN_RETURN;
            } else if (strcmp(tokens[tokenIndex].value, "function") == 0) {
                tokens[tokenIndex].type = TOKEN_FUNCTION;
            } else {
                tokens[tokenIndex].type = TOKEN_IDENTIFIER;
            }
            tokenIndex++;
        } else if (*p == '<' && *(p+1) == '-') {
            tokens[tokenIndex].type = TOKEN_ASSIGN;
            strcpy(tokens[tokenIndex].value, "<-");
            tokenIndex++;
            p += 2;
        } else if (*p == '+') {
            tokens[tokenIndex].type = TOKEN_ADD;
            strcpy(tokens[tokenIndex].value, "+");
            tokenIndex++;
            p++;
        } else if (*p == '-') {
            tokens[tokenIndex].type = TOKEN_SUBTRACT;
            strcpy(tokens[tokenIndex].value, "-");
            tokenIndex++;
            p++;
        } else if (*p == '*') {
            tokens[tokenIndex].type = TOKEN_MULTIPLY;
            strcpy(tokens[tokenIndex].value, "*");
            tokenIndex++;
            p++;
        } else if (*p == '/') {
            tokens[tokenIndex].type = TOKEN_DIVIDE;
            strcpy(tokens[tokenIndex].value, "/");
            tokenIndex++;
            p++;
        } else if (*p == '(') {
            tokens[tokenIndex].type = TOKEN_LPAREN;
            strcpy(tokens[tokenIndex].value, "(");
            tokenIndex++;
            p++;
        } else if (*p == ')') {
            tokens[tokenIndex].type = TOKEN_RPAREN;
            strcpy(tokens[tokenIndex].value, ")");
            tokenIndex++;
            p++;
        } else {
            printf("Unexpected character: %c\n", *p);
            exit(1);
        }
    }
    tokens[tokenIndex].type = TOKEN_EOF;
 }

 #include <stdio.h>

 void parseProgram();
 void parseStatement();
 void parseExpression();

 void parseProgram() {
    while (tokens[currentToken].type != TOKEN_EOF) {
        parseStatement();
    }
 }

 void parseStatement() {
    if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
        printf("%s = ", tokens[currentToken].value);
        currentToken++; // consume identifier
        if (tokens[currentToken].type == TOKEN_ASSIGN) {
            currentToken++; // consume <-
            parseExpression();
            printf(";\n");
        }
    } else if (tokens[currentToken].type == TOKEN_PRINT) {
        currentToken++; // consume print
        printf("printf(\"%%d\\n\", ");
        parseExpression();
        printf(");\n");
    } else if (tokens[currentToken].type == TOKEN_RETURN) {
        currentToken++; // consume return
        printf("return ");
        parseExpression();
        printf(";\n");
    } else {
        printf("Unexpected statement.\n");
        exit(1);
    }
 }

 void parseExpression() {
    parseTerm();
    while (tokens[currentToken].type == TOKEN_ADD || tokens[currentToken].type == TOKEN_SUBTRACT) {
        if (tokens[currentToken].type == TOKEN_ADD) {
            printf(" + ");
        } else if (tokens[currentToken].type == TOKEN_SUBTRACT) {
            printf(" - ");
        }
        currentToken++; // consume + or -
        parseTerm();
    }
 }

 void parseTerm() {
    parseFactor();
    while (tokens[currentToken].type == TOKEN_MULTIPLY || tokens[currentToken].type == TOKEN_DIVIDE) {
        if (tokens[currentToken].type == TOKEN_MULTIPLY) {
            printf(" * ");
        } else if (tokens[currentToken].type == TOKEN_DIVIDE) {
            printf(" / ");
        }
        currentToken++; // consume * or /
        parseFactor();
    }
 }

 void parseFactor() {
    if (tokens[currentToken].type == TOKEN_NUMBER) {
        printf("%s", tokens[currentToken].value);
        currentToken++; // consume number
    } else if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
        printf("%s", tokens[currentToken].value);
        currentToken++; // consume identifier
    } else if (tokens[currentToken].type == TOKEN_LPAREN) {
        currentToken++; // consume (
        printf("(");
        parseExpression();
        if (tokens[currentToken].type == TOKEN_RPAREN) {
            printf(")");
            currentToken++; // consume )
        } else {
            printf("Expected closing parenthesis.\n");
            exit(1);
        }
    } else {
        printf("Unexpected factor.\n");
        exit(1);
    }
 }


 int main() {
    const char *mlProgram = 
        "x <- 8\n"
        "y <- 3\n"
        "print x * y\n";

    tokenize(mlProgram);
    parseProgram();

    return 0;
 }
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <ctype.h>

	#define MAX_TOKENS 1000
	#define MAX_TOKEN_LENGTH 100

	typedef enum {
	TOKEN_IDENTIFIER,
	TOKEN_NUMBER,
	TOKEN_ASSIGN,
	TOKEN_PRINT,
	TOKEN_RETURN,
	TOKEN_MULTIPLY,
	TOKEN_ADD,
	TOKEN_SUBTRACT,
	TOKEN_DIVIDE,
	TOKEN_LPAREN,
	TOKEN_RPAREN,
	TOKEN_FUNCTION,
	TOKEN_EOF
	} TokenType;

	typedef struct {
	TokenType type;
	char value[MAX_TOKEN_LENGTH];
	} Token;

	Token tokens[MAX_TOKENS];
	int tokenIndex = 0;
	int currentToken = 0;

	void tokenize(const char *input) {
	const char *p = input;
	while (*p) {
	if (isspace(*p)) {
	p++;
	} else if (isdigit(*p)) {
	tokens[tokenIndex].type = TOKEN_NUMBER;
	int len = 0;
	while (isdigit(*p)) {
	tokens[tokenIndex].value[len++] = *p++;
	}
	tokens[tokenIndex].value[len] = '\0';
	tokenIndex++;
	} else if (isalpha(*p)) {
	int len = 0;
	while (isalnum(*p)) {
	tokens[tokenIndex].value[len++] = *p++;
	}
	tokens[tokenIndex].value[len] = '\0';
	if (strcmp(tokens[tokenIndex].value, "print") == 0) {
	tokens[tokenIndex].type = TOKEN_PRINT;
	} else if (strcmp(tokens[tokenIndex].value, "return") == 0) {
	tokens[tokenIndex].type = TOKEN_RETURN;
	} else if (strcmp(tokens[tokenIndex].value, "function") == 0) {
	tokens[tokenIndex].type = TOKEN_FUNCTION;
	} else {
	tokens[tokenIndex].type = TOKEN_IDENTIFIER;
	}
	tokenIndex++;
	} else if (p == '<' && (p+1) == '-') {
	tokens[tokenIndex].type = TOKEN_ASSIGN;
	strcpy(tokens[tokenIndex].value, "<-");
	tokenIndex++;
	p += 2;
	} else if (*p == '+') {
	tokens[tokenIndex].type = TOKEN_ADD;
	strcpy(tokens[tokenIndex].value, "+");
	tokenIndex++;
	p++;
	} else if (*p == '-') {
	tokens[tokenIndex].type = TOKEN_SUBTRACT;
	strcpy(tokens[tokenIndex].value, "-");
	tokenIndex++;
	p++;
	} else if (p == '') {
	tokens[tokenIndex].type = TOKEN_MULTIPLY;
	strcpy(tokens[tokenIndex].value, "*");
	tokenIndex++;
	p++;
	} else if (*p == '/') {
	tokens[tokenIndex].type = TOKEN_DIVIDE;
	strcpy(tokens[tokenIndex].value, "/");
	tokenIndex++;
	p++;
	} else if (*p == '(') {
	tokens[tokenIndex].type = TOKEN_LPAREN;
	strcpy(tokens[tokenIndex].value, "(");
	tokenIndex++;
	p++;
	} else if (*p == ')') {
	tokens[tokenIndex].type = TOKEN_RPAREN;
	strcpy(tokens[tokenIndex].value, ")");
	tokenIndex++;
	p++;
	} else {
	printf("Unexpected character: %c\n", *p);
	exit(1);
	}
	}
	tokens[tokenIndex].type = TOKEN_EOF;
	}

	#include <stdio.h>

	void parseProgram();
	void parseStatement();
	void parseExpression();

	void parseProgram() {
	while (tokens[currentToken].type != TOKEN_EOF) {
	parseStatement();
	}
	}

	void parseStatement() {
	if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
	printf("%s = ", tokens[currentToken].value);
	currentToken++; // consume identifier
	if (tokens[currentToken].type == TOKEN_ASSIGN) {
	currentToken++; // consume <-
	parseExpression();
	printf(";\n");
	}
	} else if (tokens[currentToken].type == TOKEN_PRINT) {
	currentToken++; // consume print
	printf("printf(\"%%d\\n\", ");
	parseExpression();
	printf(");\n");
	} else if (tokens[currentToken].type == TOKEN_RETURN) {
	currentToken++; // consume return
	printf("return ");
	parseExpression();
	printf(";\n");
	} else {
	printf("Unexpected statement.\n");
	exit(1);
	}
	}

	void parseExpression() {
	parseTerm();
	while (tokens[currentToken].type == TOKEN_ADD \|\| tokens[currentToken].type == TOKEN_SUBTRACT) {
	if (tokens[currentToken].type == TOKEN_ADD) {
	printf(" + ");
	} else if (tokens[currentToken].type == TOKEN_SUBTRACT) {
	printf(" - ");
	}
	currentToken++; // consume + or -
	parseTerm();
	}
	}

	void parseTerm() {
	parseFactor();
	while (tokens[currentToken].type == TOKEN_MULTIPLY \|\| tokens[currentToken].type == TOKEN_DIVIDE) {
	if (tokens[currentToken].type == TOKEN_MULTIPLY) {
	printf(" * ");
	} else if (tokens[currentToken].type == TOKEN_DIVIDE) {
	printf(" / ");
	}
	currentToken++; // consume * or /
	parseFactor();
	}
	}

	void parseFactor() {
	if (tokens[currentToken].type == TOKEN_NUMBER) {
	printf("%s", tokens[currentToken].value);
	currentToken++; // consume number
	} else if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
	printf("%s", tokens[currentToken].value);
	currentToken++; // consume identifier
	} else if (tokens[currentToken].type == TOKEN_LPAREN) {
	currentToken++; // consume (
	printf("(");
	parseExpression();
	if (tokens[currentToken].type == TOKEN_RPAREN) {
	printf(")");
	currentToken++; // consume )
	} else {
	printf("Expected closing parenthesis.\n");
	exit(1);
	}
	} else {
	printf("Unexpected factor.\n");
	exit(1);
	}
	}


	int main() {
	const char *mlProgram =
	"x <- 8\n"
	"y <- 3\n"
	"print x * y\n";

	tokenize(mlProgram);
	parseProgram();

	return 0;
	}