Skip to content

Instantly share code, notes, and snippets.

@Chubek
Created August 24, 2025 22:07
Show Gist options
  • Select an option

  • Save Chubek/da5524d9c39b17f9e7114c8a9d3b1614 to your computer and use it in GitHub Desktop.

Select an option

Save Chubek/da5524d9c39b17f9e7114c8a9d3b1614 to your computer and use it in GitHub Desktop.
#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#define FNV_32_PRIME 0x1000193U
#define FNV_32_OFFSET_BASIS 0x811c9dc5U
#define TOP_CHAR INPUT_STREAM[INPTR]
#define PEEK_CHAR INPUT_STREAM[INPTR + 1]
#define PEEK_LOWER tolower (PEEK_CHAR)
#define NUM_TOKEN_SLOTS (1 << 14)
#define Token \
struct \
{ \
int toktype; \
const Value_t *lexeme; \
}
#define Symbol \
struct \
{ \
const char *key; \
const Value_t value; \
bool reserved; \
}
#define Value \
struct \
{ \
enum \
{ \
VAL_String, \
VAL_Integer, \
VAL_Rational, \
VAL_Char, \
VAL_Id, \
} type; \
union \
{ \
intmax_t as_integer; \
double as_real; \
const char32_t *as_string; \
char32_t as_char; \
const char *as_ident; \
Keyword_t as_kw; \
}; \
}
typedef Value Value_t;
typedef Token Token_t;
typedef Symbol Symbol_t;
static Token_t *TOKEN_STREAM = NULL;
static Symbol_t *SYMBOL_TABLE = NULL;
static const char *INPUT_STREAM = NULL;
static unsigned INLEN = 0;
static unsigned SYMSZ = 0;
static unsigned TSPTR = 0;
static unsigned INPTR = 0;
static inline uint32_t
fnv1a_hash32 (const char *sym)
{
uint32_t h = FNV_32_OFFSET_BASIS;
char c = 0;
while ((c = *sym++))
{
h ^= c;
h *= FNV_32_PRIME;
}
return h % SYMSZE;
}
static inline void
insert_str_token (const char32_t *lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_String;
TOKEN_STREAM[TSPTR].lexeme.as_string = lexeme;
TSPTR++;
}
static inline void
insert_int_token (intmax_t lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_Integer;
TOKEN_STREAM[TSPTR].lexeme.as_integer = lexeme;
TSPTR++;
}
static inline void
insert_char_token (char32_t lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_Char;
TOKEN_STREAM[TSPTR].lexeme.as_char = lexeme;
TSPTR++;
}
static inline void
insert_real_token (double lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_Real;
TOKEN_STREAM[TSPTR].lexeme.as_real = lexeme;
TSPTR++;
}
static inline void
insert_ident_token (const char *lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_Ident;
TOKEN_STREAM[TSPTR].lexeme.as_ident = lexeme;
TSPTR++;
}
static inline void
insert_kw_token (Keyword_t lexeme)
{
TOKEN_STREAM[TSPTR].toktype = TOK_Keyword;
TOKEN_STREAM[TSPTR].lexeme.as_kw = lexeme;
TSPTR++;
}
void
cleanup_scanner (void)
{
free (TOKEN_STREAM);
free (INPUT_STREAM);
free (SYMBOL_TABLE);
}
void
scan_program (void)
{
atexit (cleanup_scanner);
TOKEN_STREAM = calloc (NUM_TOKEN_SLOTS, sizeof (Token_t));
init_symtbl (&SYMBOL_TABLE, &SYMSZ);
handle_input (&INPUT_STREAM, &INLEN);
TSPTR = 0;
INPTR = 0;
while (INPTR < INLEN && TOP_CHAR)
{
while (isblank (TOP_CHAR))
INPTR++;
if (isalpha (TOP_CHAR) || TOP_CHAR == '_')
{
const char *lexeme = lex_ident ();
uint32_t h = fnv1a_hash32 (lexeme);
if (SYMBOL_TABLE[h].reserved)
insert_kw_token (SYMBOL_TABLE[h].value.as_kw);
else if (SYMBOL_TABLE[h].key != NULL)
{
insert_ident_token (SYMBOL_TABLE[h].key);
free (lexeme);
}
else
{
SYMBOL_TABLE[h].key = lexeme;
SYMBOL_TABLE[h].value.as_ident = NULL;
SYMBOL_TABLE[h].reserved = false;
insert_ident_token (lexeme);
}
}
else if (isdigit (TOP_CHAR))
{
if (PEEK_LOWER == 'x')
{
INPTR += 2;
intmax_t lexeme = lex_hex ();
insert_int_token (lexeme);
}
else if (PEEK_LOWER == 'o')
{
INPTR += 2;
intmax_t lexeme = lex_oct ();
insert_int_token (lexeme);
}
else if (PEEK_LOWER == 'b')
{
INPTR += 2;
intmax_t lexeme = lex_bin ();
insert_int_token (lexeme);
}
else
{
const char *lexeme = lex_number ();
if (strchr (lexeme, '.') || strchr (lexeme, 'e')
|| strchr (lexeme, 'E'))
{
const double lexeme = lex_real ();
insert_real_token (lexeme);
}
else
{
intmax_t lexeme = lex_integer ();
insert_int_token (lexeme);
}
free (lexeme);
}
}
else if (TOP_CHAR == '\'')
{
INPTR++;
char32_t lexeme = lex_char ();
INPTR++;
insert_char_token (lexeme);
}
else if (TOP_CHAR == '"')
{
INPTR++;
const char32_t *lexeme = lex_string ();
INPTR++;
uint32_t h = fnv1a_hash32 ((const char *)lexeme);
if (SYMBOL_TALBE[h].key == NULL)
{
SYMBOL_TABLE[h].key = (const char *)lexeme;
SYMBOL_TABLE[h].value.as_string = lexeme;
SYMBOL_TABLE[h].reserved = false;
insert_str_token (lexeme);
}
else
{
insert_str_token (SYMBOL_TABLE[h].value.as_string);
free (lexeme);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment