Skip to content

Instantly share code, notes, and snippets.

@jstimpfle
Created April 29, 2018 13:26
Show Gist options
  • Save jstimpfle/5ebaa88315ae804563c231b316448a70 to your computer and use it in GitHub Desktop.
Save jstimpfle/5ebaa88315ae804563c231b316448a70 to your computer and use it in GitHub Desktop.
Example lexer: possible starting point for language experiments
f
Got KIND_NAME token
f f
Got KIND_NAME token
Got KIND_NAME token
f f"ab c" x
Got KIND_NAME token
Got KIND_NAME token
Got KIND_STRING token
Got KIND_NAME token
#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#define TRUE 1
#define FALSE 0
enum {
KIND_NAME,
KIND_NUMBER,
KIND_STRING,
NUM_KINDS,
};
const char *kindstring[NUM_KINDS] = {
#define MAKE(x) #x
MAKE(KIND_NAME),
MAKE(KIND_NUMBER),
MAKE(KIND_STRING),
};
struct Token {
int kind;
int pos;
int line;
int col;
union {
int number;
struct {
char name[24];
int namelen;
};
struct {
char string[24];
int stringlen;
};
};
};
/* for now, global stream */
static FILE *fstream;
static int iseof;
static int curchar;
static int curpos;
static int curline;
static int curcol;
static void faillex(const char *msg, ...)
{
va_list ap;
va_start(ap, msg);
fprintf(stderr, "At %d:%d: ", curline, curcol);
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end(ap);
abort();
}
static int endofstream(void)
{
return curchar == EOF;
}
static void take(void)
{
int c = fgetc(fstream);
if (c == EOF && ferror(fstream))
faillex("I/O error while lexing");
if (c == EOF)
iseof = TRUE;
else {
curpos++;
if (curchar == '\n') {
curline++;
curcol = 1;
} else {
curcol++;
}
curchar = c;
}
}
static void initstream(void)
{
fstream = stdin;
curpos = 0;
curline = 1;
curcol = 1;
take(); //XXX
}
static int iswhitespace(int c)
{
return isspace(c);
}
static int easylex(struct Token *out)
{
int c;
while (!endofstream() && iswhitespace(c = curchar))
take();
if (endofstream())
return FALSE;
out->pos = curpos;
out->line = curline;
out->col = curcol;
if (isalpha(c)) {
out->kind = KIND_NAME;
out->namelen = 0;
for (;;) {
out->name[out->namelen++] = c;
take();
if (endofstream() || !isalpha(c = curchar))
break;
}
}
else if (isdigit(c)) {
out->kind = KIND_NUMBER;
out->number = c - '0';
for (;;) {
take();
if (endofstream() || !isdigit((c = curchar)))
break;
out->number = out->number * 10 + c - '0';
}
}
else if (c == '"') {
out->kind = KIND_STRING;
out->stringlen = 0;
for (;;) {
out->string[out->stringlen++] = c;
take();
if (endofstream() || (c = curchar) == '"')
break;
}
if (endofstream())
faillex("Unexpected end of stream: Expected "
"closing '\"' character");
take();
}
else {
faillex("Invalid token starting with '%c'", c);
}
return TRUE;
}
int main(void)
{
struct Token tok;
initstream();
while (easylex(&tok)) {
printf("Got %s token\n", kindstring[tok.kind]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment