Skip to content

Instantly share code, notes, and snippets.

@mrcrilly
Created September 4, 2024 01:57
Show Gist options
  • Save mrcrilly/65d48c9f4a6db204cf25628d82f8d3aa to your computer and use it in GitHub Desktop.
Save mrcrilly/65d48c9f4a6db204cf25628d82f8d3aa to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_TOKENS 1000
#define MAX_TOKEN_LENGTH 100
typedef enum {
TOKEN_IDENTIFIER,
TOKEN_NUMBER,
TOKEN_ASSIGN,
TOKEN_PRINT,
TOKEN_RETURN,
TOKEN_MULTIPLY,
TOKEN_ADD,
TOKEN_SUBTRACT,
TOKEN_DIVIDE,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_FUNCTION,
TOKEN_EOF
} TokenType;
typedef struct {
TokenType type;
char value[MAX_TOKEN_LENGTH];
} Token;
Token tokens[MAX_TOKENS];
int tokenIndex = 0;
int currentToken = 0;
void tokenize(const char *input) {
const char *p = input;
while (*p) {
if (isspace(*p)) {
p++;
} else if (isdigit(*p)) {
tokens[tokenIndex].type = TOKEN_NUMBER;
int len = 0;
while (isdigit(*p)) {
tokens[tokenIndex].value[len++] = *p++;
}
tokens[tokenIndex].value[len] = '\0';
tokenIndex++;
} else if (isalpha(*p)) {
int len = 0;
while (isalnum(*p)) {
tokens[tokenIndex].value[len++] = *p++;
}
tokens[tokenIndex].value[len] = '\0';
if (strcmp(tokens[tokenIndex].value, "print") == 0) {
tokens[tokenIndex].type = TOKEN_PRINT;
} else if (strcmp(tokens[tokenIndex].value, "return") == 0) {
tokens[tokenIndex].type = TOKEN_RETURN;
} else if (strcmp(tokens[tokenIndex].value, "function") == 0) {
tokens[tokenIndex].type = TOKEN_FUNCTION;
} else {
tokens[tokenIndex].type = TOKEN_IDENTIFIER;
}
tokenIndex++;
} else if (*p == '<' && *(p+1) == '-') {
tokens[tokenIndex].type = TOKEN_ASSIGN;
strcpy(tokens[tokenIndex].value, "<-");
tokenIndex++;
p += 2;
} else if (*p == '+') {
tokens[tokenIndex].type = TOKEN_ADD;
strcpy(tokens[tokenIndex].value, "+");
tokenIndex++;
p++;
} else if (*p == '-') {
tokens[tokenIndex].type = TOKEN_SUBTRACT;
strcpy(tokens[tokenIndex].value, "-");
tokenIndex++;
p++;
} else if (*p == '*') {
tokens[tokenIndex].type = TOKEN_MULTIPLY;
strcpy(tokens[tokenIndex].value, "*");
tokenIndex++;
p++;
} else if (*p == '/') {
tokens[tokenIndex].type = TOKEN_DIVIDE;
strcpy(tokens[tokenIndex].value, "/");
tokenIndex++;
p++;
} else if (*p == '(') {
tokens[tokenIndex].type = TOKEN_LPAREN;
strcpy(tokens[tokenIndex].value, "(");
tokenIndex++;
p++;
} else if (*p == ')') {
tokens[tokenIndex].type = TOKEN_RPAREN;
strcpy(tokens[tokenIndex].value, ")");
tokenIndex++;
p++;
} else {
printf("Unexpected character: %c\n", *p);
exit(1);
}
}
tokens[tokenIndex].type = TOKEN_EOF;
}
#include <stdio.h>
void parseProgram();
void parseStatement();
void parseExpression();
void parseProgram() {
while (tokens[currentToken].type != TOKEN_EOF) {
parseStatement();
}
}
void parseStatement() {
if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
printf("%s = ", tokens[currentToken].value);
currentToken++; // consume identifier
if (tokens[currentToken].type == TOKEN_ASSIGN) {
currentToken++; // consume <-
parseExpression();
printf(";\n");
}
} else if (tokens[currentToken].type == TOKEN_PRINT) {
currentToken++; // consume print
printf("printf(\"%%d\\n\", ");
parseExpression();
printf(");\n");
} else if (tokens[currentToken].type == TOKEN_RETURN) {
currentToken++; // consume return
printf("return ");
parseExpression();
printf(";\n");
} else {
printf("Unexpected statement.\n");
exit(1);
}
}
void parseExpression() {
parseTerm();
while (tokens[currentToken].type == TOKEN_ADD || tokens[currentToken].type == TOKEN_SUBTRACT) {
if (tokens[currentToken].type == TOKEN_ADD) {
printf(" + ");
} else if (tokens[currentToken].type == TOKEN_SUBTRACT) {
printf(" - ");
}
currentToken++; // consume + or -
parseTerm();
}
}
void parseTerm() {
parseFactor();
while (tokens[currentToken].type == TOKEN_MULTIPLY || tokens[currentToken].type == TOKEN_DIVIDE) {
if (tokens[currentToken].type == TOKEN_MULTIPLY) {
printf(" * ");
} else if (tokens[currentToken].type == TOKEN_DIVIDE) {
printf(" / ");
}
currentToken++; // consume * or /
parseFactor();
}
}
void parseFactor() {
if (tokens[currentToken].type == TOKEN_NUMBER) {
printf("%s", tokens[currentToken].value);
currentToken++; // consume number
} else if (tokens[currentToken].type == TOKEN_IDENTIFIER) {
printf("%s", tokens[currentToken].value);
currentToken++; // consume identifier
} else if (tokens[currentToken].type == TOKEN_LPAREN) {
currentToken++; // consume (
printf("(");
parseExpression();
if (tokens[currentToken].type == TOKEN_RPAREN) {
printf(")");
currentToken++; // consume )
} else {
printf("Expected closing parenthesis.\n");
exit(1);
}
} else {
printf("Unexpected factor.\n");
exit(1);
}
}
int main() {
const char *mlProgram =
"x <- 8\n"
"y <- 3\n"
"print x * y\n";
tokenize(mlProgram);
parseProgram();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment