Created
September 4, 2024 01:57
-
-
Save mrcrilly/65d48c9f4a6db204cf25628d82f8d3aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ctype.h> | |
#define MAX_TOKENS 1000 | |
#define MAX_TOKEN_LENGTH 100 | |
typedef enum { | |
TOKEN_IDENTIFIER, | |
TOKEN_NUMBER, | |
TOKEN_ASSIGN, | |
TOKEN_PRINT, | |
TOKEN_RETURN, | |
TOKEN_MULTIPLY, | |
TOKEN_ADD, | |
TOKEN_SUBTRACT, | |
TOKEN_DIVIDE, | |
TOKEN_LPAREN, | |
TOKEN_RPAREN, | |
TOKEN_FUNCTION, | |
TOKEN_EOF | |
} TokenType; | |
typedef struct { | |
TokenType type; | |
char value[MAX_TOKEN_LENGTH]; | |
} Token; | |
Token tokens[MAX_TOKENS]; | |
int tokenIndex = 0; | |
int currentToken = 0; | |
void tokenize(const char *input) { | |
const char *p = input; | |
while (*p) { | |
if (isspace(*p)) { | |
p++; | |
} else if (isdigit(*p)) { | |
tokens[tokenIndex].type = TOKEN_NUMBER; | |
int len = 0; | |
while (isdigit(*p)) { | |
tokens[tokenIndex].value[len++] = *p++; | |
} | |
tokens[tokenIndex].value[len] = '\0'; | |
tokenIndex++; | |
} else if (isalpha(*p)) { | |
int len = 0; | |
while (isalnum(*p)) { | |
tokens[tokenIndex].value[len++] = *p++; | |
} | |
tokens[tokenIndex].value[len] = '\0'; | |
if (strcmp(tokens[tokenIndex].value, "print") == 0) { | |
tokens[tokenIndex].type = TOKEN_PRINT; | |
} else if (strcmp(tokens[tokenIndex].value, "return") == 0) { | |
tokens[tokenIndex].type = TOKEN_RETURN; | |
} else if (strcmp(tokens[tokenIndex].value, "function") == 0) { | |
tokens[tokenIndex].type = TOKEN_FUNCTION; | |
} else { | |
tokens[tokenIndex].type = TOKEN_IDENTIFIER; | |
} | |
tokenIndex++; | |
} else if (*p == '<' && *(p+1) == '-') { | |
tokens[tokenIndex].type = TOKEN_ASSIGN; | |
strcpy(tokens[tokenIndex].value, "<-"); | |
tokenIndex++; | |
p += 2; | |
} else if (*p == '+') { | |
tokens[tokenIndex].type = TOKEN_ADD; | |
strcpy(tokens[tokenIndex].value, "+"); | |
tokenIndex++; | |
p++; | |
} else if (*p == '-') { | |
tokens[tokenIndex].type = TOKEN_SUBTRACT; | |
strcpy(tokens[tokenIndex].value, "-"); | |
tokenIndex++; | |
p++; | |
} else if (*p == '*') { | |
tokens[tokenIndex].type = TOKEN_MULTIPLY; | |
strcpy(tokens[tokenIndex].value, "*"); | |
tokenIndex++; | |
p++; | |
} else if (*p == '/') { | |
tokens[tokenIndex].type = TOKEN_DIVIDE; | |
strcpy(tokens[tokenIndex].value, "/"); | |
tokenIndex++; | |
p++; | |
} else if (*p == '(') { | |
tokens[tokenIndex].type = TOKEN_LPAREN; | |
strcpy(tokens[tokenIndex].value, "("); | |
tokenIndex++; | |
p++; | |
} else if (*p == ')') { | |
tokens[tokenIndex].type = TOKEN_RPAREN; | |
strcpy(tokens[tokenIndex].value, ")"); | |
tokenIndex++; | |
p++; | |
} else { | |
printf("Unexpected character: %c\n", *p); | |
exit(1); | |
} | |
} | |
tokens[tokenIndex].type = TOKEN_EOF; | |
} | |
#include <stdio.h> | |
void parseProgram(); | |
void parseStatement(); | |
void parseExpression(); | |
void parseProgram() { | |
while (tokens[currentToken].type != TOKEN_EOF) { | |
parseStatement(); | |
} | |
} | |
void parseStatement() { | |
if (tokens[currentToken].type == TOKEN_IDENTIFIER) { | |
printf("%s = ", tokens[currentToken].value); | |
currentToken++; // consume identifier | |
if (tokens[currentToken].type == TOKEN_ASSIGN) { | |
currentToken++; // consume <- | |
parseExpression(); | |
printf(";\n"); | |
} | |
} else if (tokens[currentToken].type == TOKEN_PRINT) { | |
currentToken++; // consume print | |
printf("printf(\"%%d\\n\", "); | |
parseExpression(); | |
printf(");\n"); | |
} else if (tokens[currentToken].type == TOKEN_RETURN) { | |
currentToken++; // consume return | |
printf("return "); | |
parseExpression(); | |
printf(";\n"); | |
} else { | |
printf("Unexpected statement.\n"); | |
exit(1); | |
} | |
} | |
void parseExpression() { | |
parseTerm(); | |
while (tokens[currentToken].type == TOKEN_ADD || tokens[currentToken].type == TOKEN_SUBTRACT) { | |
if (tokens[currentToken].type == TOKEN_ADD) { | |
printf(" + "); | |
} else if (tokens[currentToken].type == TOKEN_SUBTRACT) { | |
printf(" - "); | |
} | |
currentToken++; // consume + or - | |
parseTerm(); | |
} | |
} | |
void parseTerm() { | |
parseFactor(); | |
while (tokens[currentToken].type == TOKEN_MULTIPLY || tokens[currentToken].type == TOKEN_DIVIDE) { | |
if (tokens[currentToken].type == TOKEN_MULTIPLY) { | |
printf(" * "); | |
} else if (tokens[currentToken].type == TOKEN_DIVIDE) { | |
printf(" / "); | |
} | |
currentToken++; // consume * or / | |
parseFactor(); | |
} | |
} | |
void parseFactor() { | |
if (tokens[currentToken].type == TOKEN_NUMBER) { | |
printf("%s", tokens[currentToken].value); | |
currentToken++; // consume number | |
} else if (tokens[currentToken].type == TOKEN_IDENTIFIER) { | |
printf("%s", tokens[currentToken].value); | |
currentToken++; // consume identifier | |
} else if (tokens[currentToken].type == TOKEN_LPAREN) { | |
currentToken++; // consume ( | |
printf("("); | |
parseExpression(); | |
if (tokens[currentToken].type == TOKEN_RPAREN) { | |
printf(")"); | |
currentToken++; // consume ) | |
} else { | |
printf("Expected closing parenthesis.\n"); | |
exit(1); | |
} | |
} else { | |
printf("Unexpected factor.\n"); | |
exit(1); | |
} | |
} | |
int main() { | |
const char *mlProgram = | |
"x <- 8\n" | |
"y <- 3\n" | |
"print x * y\n"; | |
tokenize(mlProgram); | |
parseProgram(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment