Created
January 4, 2013 10:15
-
-
Save xjia1/4451478 to your computer and use it in GitHub Desktop.
Try parser combinators in C
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Try parser combinators in C | |
#include <stdio.h> | |
#include <stdlib.h> | |
struct token | |
{ | |
const char *text; | |
}; | |
struct token_list | |
{ | |
struct token *token; | |
struct token_list *next; | |
}; | |
struct parse_node | |
{ | |
enum { LIT, VAR, PAIR } type; | |
union { | |
struct { | |
const char *text; | |
} as_lit; | |
struct { | |
const char *text; | |
} as_var; | |
struct { | |
struct parse_node *fst; | |
struct parse_node *snd; | |
} as_pair; | |
}; | |
}; | |
struct parse_list | |
{ | |
struct parse_node *a; | |
struct token_list *t; | |
struct parse_list *next; | |
}; | |
typedef struct parse_list* (*parse_func)(struct token_list *, void *); | |
struct parser | |
{ | |
void *data; | |
parse_func parse; | |
}; | |
struct parse_list* | |
parse(struct parser *p, struct token_list *toks) | |
{ | |
return p->parse(toks, p->data); | |
} | |
struct parse_list* | |
pAlt(struct parser *p1, struct parser *p2, struct token_list *toks) | |
{ | |
struct parse_list *p = parse(p1, toks); | |
struct parse_list **q = &p; | |
while (*q) q = &((*q)->next); | |
*q = parse(p2, toks); | |
return p; | |
} | |
typedef struct parse_node* (*zipper)(struct parse_node *, struct parse_node *); | |
struct parse_list* | |
pThen(zipper combine, struct parser *p1, struct parser *p2, struct token_list *toks) | |
{ | |
struct parse_list *ret = NULL; | |
struct parse_list *p; | |
for (p = parse(p1, toks); p; p = p->next) | |
{ | |
struct parse_list *q; | |
for (q = parse(p2, p->t); q; q = q->next) | |
{ | |
struct parse_list *n = malloc(sizeof(struct parse_list)); | |
n->a = combine(p->a, q->a); | |
n->t = q->t; | |
n->next = ret; | |
ret = n; | |
} | |
} | |
return ret; | |
} | |
struct parse_list* | |
pLit(struct token_list *toks, const char *s) | |
{ | |
if (!toks) return NULL; | |
if (strcmp(toks->token->text, s)) return NULL; | |
struct parse_list *n = malloc(sizeof(struct parse_list)); | |
n->a = malloc(sizeof(struct parse_node)); | |
n->a->type = LIT; | |
n->a->as_lit.text = toks->token->text; | |
n->t = toks->next; | |
n->next = NULL; | |
return n; | |
} | |
struct parse_list* | |
pVar(struct token_list *toks, void *_) | |
{ | |
if (!toks) return NULL; | |
struct parse_list *n = malloc(sizeof(struct parse_list)); | |
n->a = malloc(sizeof(struct parse_node)); | |
n->a->type = VAR; | |
n->a->as_var.text = toks->token->text; | |
n->t = toks->next; | |
n->next = NULL; | |
return n; | |
} | |
struct parser* | |
mk_parser2(parse_func f, void *d) | |
{ | |
struct parser *p = malloc(sizeof(struct parser)); | |
p->data = d; | |
p->parse = f; | |
return p; | |
} | |
struct parser* | |
mk_parser1(parse_func f) | |
{ | |
return mk_parser2(f, NULL); | |
} | |
struct parse_list* | |
pHelloOrGoodbye(struct token_list *toks, void *_) | |
{ | |
return pAlt(mk_parser2(pLit, "hello"), mk_parser2(pLit, "goodbye"), toks); | |
} | |
struct parse_node* | |
mk_pair(struct parse_node *fst, struct parse_node *snd) | |
{ | |
struct parse_node *n = malloc(sizeof(struct parse_node)); | |
n->type = PAIR; | |
n->as_pair.fst = fst; | |
n->as_pair.snd = snd; | |
return n; | |
} | |
struct parse_list* | |
pGreeting(struct token_list *toks) | |
{ | |
return pThen(mk_pair, mk_parser1(pHelloOrGoodbye), mk_parser1(pVar), toks); | |
} | |
void print_parse_node(struct parse_node *a) | |
{ | |
if (a->type == LIT) | |
{ | |
printf("\"%s\"", a->as_lit.text); | |
} | |
else if (a->type == VAR) | |
{ | |
printf("%s", a->as_var.text); | |
} | |
else if (a->type == PAIR) | |
{ | |
printf("("); | |
print_parse_node(a->as_pair.fst); | |
printf(", "); | |
print_parse_node(a->as_pair.snd); | |
printf(")"); | |
} | |
} | |
void print_token_list(struct token_list *t) | |
{ | |
printf("["); | |
for (t; t; t = t->next) | |
{ | |
printf("\"%s\" ", t->token->text); | |
} | |
printf("\b]"); | |
} | |
void print_parse_list(struct parse_list *p) | |
{ | |
printf("("); | |
print_parse_node(p->a); | |
printf(" "); | |
print_token_list(p->t); | |
printf(")"); | |
} | |
struct token* | |
mk_token(const char *s) | |
{ | |
struct token *t = malloc(sizeof(struct token)); | |
t->text = s; | |
return t; | |
} | |
struct token_list* | |
mk_token_list(struct token *t, struct token_list *next) | |
{ | |
struct token_list *tl = malloc(sizeof(struct token_list)); | |
tl->token = t; | |
tl->next = next; | |
return tl; | |
} | |
int main() | |
{ | |
struct token_list *toks = mk_token_list(mk_token("goodbye"), | |
mk_token_list(mk_token("James"), | |
mk_token_list(mk_token("!"), NULL))); | |
struct parse_list *p; | |
for (p = pGreeting(toks); p; p = p->next) | |
{ | |
print_parse_list(p); | |
printf("\n"); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment