Created
April 29, 2018 13:26
-
-
Save jstimpfle/5ebaa88315ae804563c231b316448a70 to your computer and use it in GitHub Desktop.
Example lexer: possible starting point for language experiments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f | |
Got KIND_NAME token | |
f f | |
Got KIND_NAME token | |
Got KIND_NAME token | |
f f"ab c" x | |
Got KIND_NAME token | |
Got KIND_NAME token | |
Got KIND_STRING token | |
Got KIND_NAME token |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ctype.h> | |
#include <stdarg.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#define TRUE 1 | |
#define FALSE 0 | |
enum { | |
KIND_NAME, | |
KIND_NUMBER, | |
KIND_STRING, | |
NUM_KINDS, | |
}; | |
const char *kindstring[NUM_KINDS] = { | |
#define MAKE(x) #x | |
MAKE(KIND_NAME), | |
MAKE(KIND_NUMBER), | |
MAKE(KIND_STRING), | |
}; | |
struct Token { | |
int kind; | |
int pos; | |
int line; | |
int col; | |
union { | |
int number; | |
struct { | |
char name[24]; | |
int namelen; | |
}; | |
struct { | |
char string[24]; | |
int stringlen; | |
}; | |
}; | |
}; | |
/* for now, global stream */ | |
static FILE *fstream; | |
static int iseof; | |
static int curchar; | |
static int curpos; | |
static int curline; | |
static int curcol; | |
static void faillex(const char *msg, ...) | |
{ | |
va_list ap; | |
va_start(ap, msg); | |
fprintf(stderr, "At %d:%d: ", curline, curcol); | |
vfprintf(stderr, msg, ap); | |
fprintf(stderr, "\n"); | |
va_end(ap); | |
abort(); | |
} | |
static int endofstream(void) | |
{ | |
return curchar == EOF; | |
} | |
static void take(void) | |
{ | |
int c = fgetc(fstream); | |
if (c == EOF && ferror(fstream)) | |
faillex("I/O error while lexing"); | |
if (c == EOF) | |
iseof = TRUE; | |
else { | |
curpos++; | |
if (curchar == '\n') { | |
curline++; | |
curcol = 1; | |
} else { | |
curcol++; | |
} | |
curchar = c; | |
} | |
} | |
static void initstream(void) | |
{ | |
fstream = stdin; | |
curpos = 0; | |
curline = 1; | |
curcol = 1; | |
take(); //XXX | |
} | |
static int iswhitespace(int c) | |
{ | |
return isspace(c); | |
} | |
static int easylex(struct Token *out) | |
{ | |
int c; | |
while (!endofstream() && iswhitespace(c = curchar)) | |
take(); | |
if (endofstream()) | |
return FALSE; | |
out->pos = curpos; | |
out->line = curline; | |
out->col = curcol; | |
if (isalpha(c)) { | |
out->kind = KIND_NAME; | |
out->namelen = 0; | |
for (;;) { | |
out->name[out->namelen++] = c; | |
take(); | |
if (endofstream() || !isalpha(c = curchar)) | |
break; | |
} | |
} | |
else if (isdigit(c)) { | |
out->kind = KIND_NUMBER; | |
out->number = c - '0'; | |
for (;;) { | |
take(); | |
if (endofstream() || !isdigit((c = curchar))) | |
break; | |
out->number = out->number * 10 + c - '0'; | |
} | |
} | |
else if (c == '"') { | |
out->kind = KIND_STRING; | |
out->stringlen = 0; | |
for (;;) { | |
out->string[out->stringlen++] = c; | |
take(); | |
if (endofstream() || (c = curchar) == '"') | |
break; | |
} | |
if (endofstream()) | |
faillex("Unexpected end of stream: Expected " | |
"closing '\"' character"); | |
take(); | |
} | |
else { | |
faillex("Invalid token starting with '%c'", c); | |
} | |
return TRUE; | |
} | |
int main(void) | |
{ | |
struct Token tok; | |
initstream(); | |
while (easylex(&tok)) { | |
printf("Got %s token\n", kindstring[tok.kind]); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment