Created
August 26, 2012 21:36
-
-
Save bl0ckeduser/3483748 to your computer and use it in GitHub Desktop.
NFA wannabe regex, revised
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* August 26, 2012 error fixes: | |
* | |
* Fixed * compilation to work properly with | |
* proper epsilon nodes. Fixed evaluator infinite | |
* loop check to work well with this. | |
* Removed large stack allocation in main(). | |
*/ | |
/* | |
* Wannabe regex (egrep-style) | |
* Wannabe automatic regex tokenizer | |
* Wannabe egrep command (./a.out --wgr 'pattern') | |
* | |
* The wannabe egrep can make a picture of the compiled NFA | |
* this way: | |
* ./a.out --wgr 'pattern' --pic | pic | troff -ms | |
* | |
* Supported 'special' characters: | |
* ? + * . ^ $ \ | |
* the backslash is to escape special status. | |
* For their exact meaning, see the compiler | |
* routine, add_token(). | |
* | |
* (troff output has to be converted to some | |
* useful format: e.g. on my computer I pipe troff | |
* to "grops" [provided by free package groff] | |
* to get postscript) | |
* | |
* Now with support for multiple epsilon edges | |
* as well as lazy/greedy/minimalist choice-mode | |
* choice. (Yeah, you get to choose how the program | |
* chooses). Please note that it uses NFAs, which | |
* are inferior to DFAs. Nasty backtracking involved. | |
* There's some algorithm with matrices and stuff to | |
* convert, but that'll be for another time. I'm not | |
* even a CS student or professional or teacher or | |
* anything, I'm a kid, give me a little time. | |
* | |
* One day I should try to rewrite this in Lisp. | |
* | |
* To respect a certain idiom, greedy is the default | |
* choice-mode. Minimalist mode tries to get as few | |
* characters as possible, while lazy just gives you | |
* the first thing it finds. | |
* (Are they actually the same ?) | |
* | |
* Bl0ckeduser | |
* August 2012 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
int VERBOSE = 0; | |
/* | |
* Some literature | |
* | |
* "Each state of a nondeterministic automaton contains a list of `Accept' | |
* values, a list of epsilon transitions (an epsilon transition represents a | |
* transition to another state that can be made without reading a character) | |
* and a list of transitions qualified with a character predicate (the | |
* transition can only be made to the given state on input of a character | |
* permitted by the predicate). Although a list of `Accept' values is provided | |
* for, in actual fact each state will have zero or one of them (the `Maybe' | |
* type is not used because the flexibility offered by the list representation | |
* is useful)." | |
* Source: NFA.hs, part of Alex - (c) Chris Dornan 1995-2000, Simon Marlow 2003 | |
* http://hackage.haskell.org/packages/archive/alex-meta/latest/doc/html/src/NFA.html | |
*/ | |
/* | |
* Programming notes / to-do's | |
* | |
* - What does '.+?' mean ? GNU egrep seems to do it | |
* differently. Is it BS ? Or am I mis-compiling ? | |
* Should I check for stuff that makes no sense ? | |
* Is there a systematic, clean way to do this ? | |
* | |
* - NFAs / backtracking are said to be a poor way to | |
* do regex. DFAs are said to be better. DFA next | |
* time, I guess. (As restated in header above). | |
*/ | |
/* | |
* This is what gets returned when | |
* a string is found to match a | |
* regex pattern. The data therein | |
* is mostly useful for a tokenizer | |
* wanting the length of the match, | |
* since the wannabe egrep only needs to | |
* know whether match of its single loaded | |
* pattern succeded. If token == 0, | |
* no match was made. | |
*/ | |
typedef struct match_struct { | |
/* Accept number.*/ | |
int token; | |
/* Last character in the match. */ | |
char* pos; | |
} match_t; | |
/* | |
* This is the NFA node data structure. | |
* It includes outgoing edges as pointers | |
* to other structures, and an accept | |
* number. | |
*/ | |
typedef struct trie_struct { | |
/* | |
* The evaluator will try all these edges: | |
* | |
* - map[c] : standard character edge, | |
* where c is the current | |
* character | |
* | |
* - link[0], link[1], ..., link[links - 1]: | |
* these are in fact epsilon edges | |
* | |
* - caret (see below) | |
* | |
* When several choices exist, the evaluator | |
* will go through all possible paths and then | |
* choose the "best" successful match according | |
* to the chosen choice-mode (greed/lazy/mini). | |
*/ | |
struct trie_struct* map[256]; | |
struct trie_struct** link; | |
int links; | |
int link_alloc; | |
/* | |
* Special "character" edge for the | |
* beggining of the string, created | |
* by the '^' meta-character. | |
*/ | |
struct trie_struct* caret; | |
/* | |
* Evaluating a matching pattern eventually | |
* leads to a node with valid_token set | |
* to the a number associated with the originating | |
* regex. Other nodes have this set to 0. | |
*/ | |
int valid_token; | |
} trie; | |
void traverse(trie *); | |
void printout(void); | |
void pic_printout(void); | |
void clear_trie_list(void); | |
/* choice-modes */ | |
enum { | |
LAZY = 0, | |
MINIMALIST, | |
GREEDY | |
}; | |
void smartass_putchar(int c) | |
{ | |
if (!isprint(c)) | |
printf("[0x%x]", c); | |
else | |
putchar(c); | |
} | |
void fail(char* mesg) | |
{ | |
fprintf(stderr, "error: %s\n", mesg); | |
exit(1); | |
} | |
void sanity_requires(int exp) | |
{ | |
if(!exp) | |
fail("that doesn't make any sense"); | |
} | |
void add_link(trie* from, trie* to) | |
{ | |
int i; | |
/* check for redundant link */ | |
for (i = 0; i < from->links; i++) | |
if (from->link[i] == to) | |
return; | |
if (++from->links > from->link_alloc) | |
from->link = realloc(from->link, (from->link_alloc += 5) * sizeof(trie *)); | |
if (!from->link) | |
fail("link array allocation"); | |
from->link[from->links - 1] = to; | |
} | |
trie* new_trie() | |
{ | |
trie* t = malloc(sizeof(trie)); | |
int i; | |
if (!t) | |
fail("trie allocation"); | |
t->link = malloc(10 * sizeof(trie*)); | |
if (!t->link) | |
fail("trie links"); | |
t->links = 0; | |
t->link_alloc = 10; | |
for (i = 0; i < 10; i++) | |
t->link[i] = NULL; | |
t->valid_token = 0; | |
t->caret = NULL; | |
for(i = 0; i < 256; i++) | |
t->map[i] = NULL; | |
return t; | |
} | |
/* | |
* Compiler routine. | |
* | |
* Creates and adds the edges and nodes | |
* for regex pattern 'tok' to the | |
* starting node 't', as well as | |
* an accept state identified by | |
* number 'key'. | |
*/ | |
int add_token(trie* t, char* tok, int key) | |
{ | |
int i, j; | |
int k; | |
int c; | |
trie *hist[1024]; | |
trie *next; | |
int len = strlen(tok) + 1; | |
char* new_tok; | |
int dollar = 0; | |
int n; | |
new_tok = malloc(len + 32); | |
if (!new_tok) | |
fail("buffer allocation"); | |
strcpy(new_tok, tok); | |
/* Rewrite $ as terminating null */ | |
for (i = 0; i < len; ++i) { | |
switch (new_tok[i]) { | |
case '\\': | |
if (!new_tok[++i]) | |
fail("backslash expected char"); | |
break; | |
case '$': | |
new_tok[i] = 0; | |
len = i + 1; | |
dollar = 1; | |
} | |
} | |
/* Otherwise, make the terminating null | |
* optional to allow for sub-matches | |
*/ | |
if (!dollar) | |
new_tok[len++] = '?'; | |
for (i = 0, n = 0; c = new_tok[i], i < len && t; ++i) | |
{ | |
/* | |
* 'n' is node index, which is distinct | |
* from source regex character index ('i') because | |
* meta-characters do not create new nodes. | |
* | |
* Although matching traverses the tree in a | |
* messy, complicated fashion due to backtracking, | |
* which would make a node index nonsensical for it, | |
* compilation creates the nodes in a simple, linear | |
* fashion (this loop) and has to refer to previous nodes | |
* when compiling meta-characters. | |
*/ | |
hist[n] = t; | |
switch (c) { | |
case '?': /* optional character */ | |
sanity_requires(n > 0); | |
add_link(hist[n - 1], t); | |
break; | |
case '+': /* character can repeat | |
any number of times */ | |
sanity_requires(n > 0); | |
add_link(t, hist[n - 1]); | |
break; | |
case '*': /* optional character | |
with repetition allowed */ | |
sanity_requires(n > 0); | |
add_link(t, hist[++n] = new_trie()); | |
t = hist[n]; | |
add_link(hist[n - 1], t); | |
add_link(t, hist[n - 2]); | |
add_link(hist[n - 2], t); | |
add_link(t, hist[++n] = new_trie()); | |
t = hist[n]; | |
add_link(hist[n - 1], t); | |
break; | |
case '.': /* any normal character */ | |
next = new_trie(); | |
for(j = 0; j < 256; j++) | |
t->map[j] = next; | |
t = next; | |
++n; | |
break; | |
case '^': /* beggining of line */ | |
if (!t->caret) | |
t->caret = new_trie(); | |
t = t->caret; | |
++n; | |
break; | |
case '\\': /* escape to normal text */ | |
if (++i == len) | |
fail("backslash expected char"); | |
c = new_tok[i]; | |
default: /* normal text */ | |
if (!t->map[c]) | |
t->map[c] = new_trie(); | |
t = t->map[c]; | |
++n; | |
} | |
} | |
free(new_tok); | |
/* | |
* We have reached the accept-state node; | |
* mark it as such. | |
*/ | |
t->valid_token = key; | |
} | |
match_t match_setup(trie* t) | |
{ | |
clear_trie_list(); | |
traverse(t); | |
} | |
/* | |
* NFA trie evaluator, with backtracking, | |
* implementation-time hacks, parameters and all | |
* (aka bloat) | |
*/ | |
match_t match(trie* t, char* full_tok, char* tok, int btm, int vd, int mode, trie* led, int ledi) | |
{ | |
int i = 0; | |
int j; | |
char c; | |
/* choice according to mode */ | |
int record; | |
match_t m; | |
match_t ml[10]; | |
int mc = 0; | |
match_t* choice; | |
extern int trie_index(trie* t); | |
int len = strlen(tok) + 1; | |
int init = btm; | |
i = 0; do | |
{ | |
c = tok[i]; | |
/* | |
* Give some info on the current | |
* character and node for debugging | |
* purposes | |
*/ | |
if (VERBOSE) { | |
if (init) { | |
printf("[fork %d:%d - c=", vd, init); | |
smartass_putchar(c); | |
printf(",\tn=%d] ", trie_index(t)); | |
} else { | |
printf("At node %d; ", trie_index(t), t); | |
printf("have "); | |
smartass_putchar(c); | |
printf(" (abs. index %d)\n", &tok[i] - full_tok); | |
} | |
} | |
/* Follow caret edge */ | |
if (&tok[i] == full_tok && t->caret) | |
t = t->caret; | |
if (t->links && !btm) | |
{ | |
/* | |
* Ambiguous. Try all possibilites by "forking" | |
* (informal sense). | |
*/ | |
if (VERBOSE) | |
printf("Ambiguous fork; trying all edges\n"); | |
/* Try character edge if it exists */ | |
if (t->map[c]) | |
if ((m = match(t, full_tok, tok + i, 1, vd + 1, mode, led, ledi)).token) | |
ml[mc++] = m; | |
/* Try all the epsilon nodes */ | |
for (j = 0; j < t->links; j++) | |
if ((m = match(t, full_tok, tok + i, j + 2, vd + 1, mode, led, ledi)).token) | |
ml[mc++] = m; | |
/* | |
* If there are multiple matches, we | |
* choose based on the mode. | |
*/ | |
if (mc > 1) { | |
switch (mode) { | |
/* Just give the first match */ | |
case LAZY: | |
return *ml; | |
/* Give the shortest match */ | |
case MINIMALIST: | |
record = strlen(full_tok) + 1; | |
choice = NULL; | |
for (j = 0; j < mc; j++) | |
if (ml[j].pos - full_tok <= record) { | |
choice = &ml[j]; | |
record = ml[j].pos - full_tok; | |
} | |
if (!choice) fail("lazy choice"); | |
return *choice; | |
/* Give the longest match */ | |
case GREEDY: | |
record = 0; | |
choice = NULL; | |
for (j = 0; j < mc; j++) | |
if (ml[j].pos - full_tok >= record) { | |
choice = &ml[j]; | |
record = ml[j].pos - full_tok; | |
} | |
if (!choice) fail("greedy choice"); | |
return *choice; | |
default: | |
fail("match() call mode number"); | |
} | |
} | |
else if (mc == 1) /* No ambiguity, no time wasted choosing */ | |
return *ml; | |
else /* For non-matches (they are never pushed | |
to the decision list) */ | |
return m; | |
} | |
if (btm <= 1 && t->map[c]) { | |
if (VERBOSE) { | |
printf("Following "); | |
smartass_putchar(c); | |
printf(" edge\n"); | |
} | |
t = t->map[c]; | |
/* | |
* Does this have to fork ??? | |
*/ | |
if (t->valid_token) break; | |
++i; | |
goto valid; | |
} | |
else if (btm > 1) { | |
if (VERBOSE) | |
printf("Trying to follow epsilon edge %d\n", btm - 2 + 1); | |
/* prevents infinite loops */ | |
if (!(led && t->link[btm - 2] == led | |
&& (int)(&tok[i] - full_tok) == ledi)) { | |
led = t; /* Last Epsilon Departure | |
node */ | |
ledi = (int)(&tok[i] - full_tok); | |
t = t->link[btm - 2]; | |
goto valid; | |
} else { | |
if (VERBOSE) | |
printf("... won't, I just was there.\n"); | |
} | |
} | |
break; | |
/* The fork path choice is only | |
* relevant for the first ambiguous | |
* choice after the fork | |
*/ | |
valid: if (btm) | |
btm = 0; | |
} while (i < len && t); | |
if (t && t->valid_token) { | |
m.token = t->valid_token; | |
m.pos = tok + i; | |
if (VERBOSE) | |
printf("Matched !\n"); | |
} else { | |
m.token = 0; | |
if (VERBOSE) | |
printf("No good\n"); | |
} | |
return m; | |
} | |
char* theregex; | |
int main(int argc, char** argv) | |
{ | |
trie* t = new_trie(); | |
match_t m; | |
/* tokenizer shell */ | |
char buf[1024]; | |
char cmd[1024]; | |
char nam[1024]; | |
char** expnames = NULL; | |
int i = 0, j =0; | |
int mod = GREEDY; /* a Un*x idiom, innit ? */ | |
/* wannabe-egrep */ | |
int wgr = 0; | |
char *p; | |
if (!(expnames = malloc(1024 * sizeof(char *)))) | |
fail("malloc"); | |
for (i = 0; i < 1024; i++) | |
if (!(expnames[i] = malloc(1024))) | |
fail("malloc"); | |
if (argc > 1 && !strcmp(argv[1], "--wgr")) { | |
/* wannabe-egrep mode */ | |
wgr = 1; | |
if (argc == 2) | |
add_token(t, theregex = "", 1); | |
else | |
add_token(t, theregex = argv[2], 1); | |
} | |
for (i = 0; i < argc; i++) | |
if (!strcmp(argv[i], "--show")) | |
{ | |
traverse(t); | |
printout(); | |
return; | |
} else if (!strcmp(argv[i], "--pic")) | |
{ | |
traverse(t); | |
pic_printout(); | |
return; | |
} | |
strcpy(*expnames, "(failure)"); | |
while (1) { | |
if(!wgr) { | |
printf("%c ", '%'); | |
fflush(stdout); | |
} | |
fgets(buf, 1024, stdin); | |
if (feof(stdin)) break; | |
/* the meat of the wannabe-egrep */ | |
if (wgr) { | |
/* truncate newline */ | |
for (p = buf; *p; ++p) | |
if (*p == '\n') | |
*p = 0; | |
/* | |
* Traverse the tree and assign pretty | |
* numbers to all encountered node pointers | |
* to make debugging less painful. | |
* (You get to hear about "node 2" instead of | |
* "node pointed to by 0x129235". This scheme | |
* is also used by the picture mode, where | |
* you would e.g. see a circle with the number 2). | |
*/ | |
match_setup(t); | |
/* | |
* The loop pushes forward the | |
* substring index after every | |
* failed match attempt. | |
*/ | |
p = buf; | |
do { | |
if (VERBOSE && p != buf) | |
printf("\n--------\n" | |
"trying submatch - index %d\n", | |
(int)(p - buf)); | |
if (match(t, buf, p, 0, 0, GREEDY, NULL, 0).token) { | |
/* The line matches, so print it */ | |
puts(buf); | |
break; | |
} | |
} while (*p++); | |
} else { | |
/* regex tokenizer shell mode */ | |
*nam = *cmd = 0; | |
if(sscanf(buf, "%s %s", cmd, nam) <= 0) { | |
puts("sorry, I need a command and an argument"); | |
continue; | |
} | |
/* add a new regex token */ | |
if(!strcmp(cmd, "add")) { | |
add_token(t, nam, ++i); | |
strcpy(expnames[i], nam); | |
printf("ok\n", i); | |
} | |
/* set choice-mode */ | |
else if(!strcmp(cmd, "mode")) { | |
if (!strcmp(nam, "lazy")) | |
mod = LAZY; | |
else if (!strcmp(nam, "min")) | |
mod = MINIMALIST; | |
else if (!strcmp(nam, "greedy")) | |
mod = GREEDY; | |
else if (!*nam || *nam=='?') | |
puts(mod == LAZY ? "LAZY" : | |
mod == GREEDY ? "GREEDY": | |
"META_LAZY"); | |
else puts("???"); | |
} | |
/* flip verbosity */ | |
else if(!strcmp(cmd, "verbose")) { | |
VERBOSE = !!!VERBOSE; | |
} | |
/* try to match a string */ | |
else if(!strcmp(cmd, "match")) { | |
if (i == 1) { | |
printf("no matching patterns installed\n"); | |
continue; | |
} | |
match_setup(t); | |
m = match(t, nam, nam, 0, 0, mod, NULL, 0); | |
if (m.token == 0) { | |
puts("no match"); | |
continue; | |
} | |
printf("pattern '%s': ", expnames[m.token]); | |
putchar('\''); | |
for (j = 0; j < (int)(m.pos - nam); j++) | |
putchar(nam[j]); | |
putchar('\''); putchar('\n'); | |
} | |
else printf(" ??? \n"); | |
} | |
} | |
if(!wgr) | |
putchar('\n'); | |
return 0; | |
} | |
/* Down here we have the | |
* pretty-numbers-for-pointers code | |
* and routines that display either | |
* simple text representation of a compiled | |
* tree or a generate pic source for | |
* a nice picture of the tree (with arrows, | |
* circles, and all). | |
*/ | |
trie* tries[1024] = {NULL}; | |
int tc = 1; | |
int trie_index(trie* t) | |
{ | |
int i; | |
for (i = 1; i < tc; i++) | |
if (tries[i] == t) | |
return i; | |
return 0; | |
} | |
int trie_in_list(trie* t) | |
{ | |
int i; | |
for (i = 1; i < tc; i++) | |
if (tries[i] == t) | |
return 1; | |
return 0; | |
} | |
void clear_trie_list(void) | |
{ | |
tc = 1; | |
} | |
/* recursively look for all existing | |
* tree-nodes and assign unique pretty | |
* natural numbers to them, for | |
* human reader / picture viewer | |
* convenience. | |
*/ | |
void traverse(trie* t) | |
{ | |
int i, j; | |
int e; | |
if (!trie_in_list(t)) | |
tries[tc++] = t; | |
if (t->caret && !trie_in_list(t->caret)) | |
traverse(t->caret); | |
for (i = 0; i < 256; i++) | |
if (t->map[i] && !trie_in_list(t->map[i])) { | |
tries[tc++] = t->map[i]; | |
traverse(t->map[i]); | |
} | |
for (i = 0; i < t->links; i++) | |
if (!trie_in_list(t->link[i])) | |
traverse(t->link[i]); | |
} | |
/* pointer -> pretty number */ | |
int index_in_list(trie* t) | |
{ | |
int i; | |
for (i = 1; i < tc; i++) | |
if (tries[i] == t) | |
return i; | |
fail("trie list lookup"); | |
} | |
void printout() /* text representation of trie */ | |
{ | |
int i, j; | |
int chk; | |
trie* tt; | |
trie* t; | |
for (i = 1; i < tc; i++) | |
{ | |
printf("%d. ", i); | |
t = tries[i]; | |
if (!t) break; | |
if (t->links) /* [epsilon edge destination node number] */ | |
for (j = 0; j < t->links; j++) | |
printf (" [%d] ", index_in_list(t->link[j])); | |
if (t->valid_token) /* ::accept-state number */ | |
printf(" ::%d ", t->valid_token); | |
if (t->caret) /* caret alphabet edge -> dest. node num. */ | |
printf(" caret->%d", index_in_list(t->caret)); | |
chk = 1; | |
tt = t->map[0]; | |
for (j = 0; j < 256; j++) | |
if (t->map[j] != tt || !t->map[j]) | |
{ | |
chk = 0; | |
break; | |
} | |
if (chk) { /* all alphabet edges -> dest node num. */ | |
printf(" .-> %d ", index_in_list(t->map[0])); | |
} else /* specific characters -> dest nonde nums. */ | |
for (j = 0; j < 256; j++) | |
if (t->map[j]) | |
printf(" %c->%d ", j, index_in_list(t->map[j])); | |
putchar('\n'); | |
} | |
} | |
/* makes pic source for a picture of the tree */ | |
void pic_printout() | |
{ | |
/* registers, buffers, herp derp derp */ | |
int i, j; | |
int chk; | |
trie* tt; | |
trie* t; | |
int cc; | |
int no; | |
char buf[1024], buf2[1024]; | |
struct { | |
char links[1024]; /* caption listing | |
the characters that | |
share this edge */ | |
int lc; /* (count of above) */ | |
} refs[1024]; /* list of alphabet edges for this node, | |
indexed by destination node. */ | |
puts(".PS"); | |
/* Write out the originating regex pattern | |
and then move away to make space for the | |
actual picture */ | |
printf("\"%s\"\n", theregex); | |
puts("down; move 1.0; right"); | |
/* Draw circles for all of the | |
nodes */ | |
for (i = 1; i < tc; i++) | |
printf("circle \"%d\"; move 0.75\n", i); | |
/* Find the accept node and draw a | |
nested concentric circle there */ | |
for (i = 1; i < tc; i++) | |
if (tries[i]->valid_token) | |
printf("circle rad 0.2 at %dth circle\n", i); | |
/* Mark the starting node with a short arrow */ | |
printf("arrow from 1th circle - (0.5, 0) to 1th circle .w\n"); | |
/* For each node ... */ | |
for (i = 1; i < tc; i++) | |
{ | |
t = tries[i]; | |
if (!t) break; | |
if (t->links) { | |
/* ... draw all its epsilon nodes as | |
dashed, arrow-tailed arcs, or as | |
straight dashed lines | |
if we are going backwards | |
to the previous node | |
(arbitrary graphics decision) */ | |
for (j = 0; j < t->links; j++) { | |
no = index_in_list(t->link[j]); | |
if (i < no) { | |
printf("arc -> dashed "); | |
printf("ccw "); | |
printf("from %dth circle .s to %dth circle .s chop\n", | |
i, no); | |
} | |
else if (i - no == 1) | |
printf("arrow dashed from %dth circle to %dth circle chop\n", | |
i, no); | |
else { | |
printf("arc -> dashed "); | |
printf("cw "); | |
printf("from %dth circle .s to %dth circle .s chop\n", | |
i, no); | |
} | |
} | |
} | |
/* Clear up the alphabet edges list */ | |
for (j = 0; j < 1024; j++) { | |
refs[j].lc = 0; | |
strcpy(refs[j].links, ""); | |
} | |
/* Add caret edges to the list. | |
They get the caption "start". */ | |
if (t->caret) { | |
no = index_in_list(t->caret); | |
if (refs[no].lc++) | |
strcat(refs[no].links, ", "); | |
strcat(refs[no].links, "start"); | |
} | |
/* Here, it gets a bit ugly. I check to | |
see if all the characters edges exist | |
and all point to the same node. | |
If so, we're dealing with a '.', and | |
it's wise to draw that specially. | |
*/ | |
chk = 1; | |
tt = t->map[0]; | |
for (j = 0; j < 256; j++) | |
if (t->map[j] != tt || !t->map[j]) | |
{ | |
chk = 0; | |
break; | |
} | |
if (chk) { | |
/* yes, it's the '.' special case */ | |
no = index_in_list(t->map[0]); | |
if (refs[no].lc++) | |
strcat(refs[no].links, ", "); | |
strcat(refs[no].links, "."); | |
} else /* no, add all possible characters to their | |
destination-nodes' character lists */ | |
for (j = 0; j < 256; j++) | |
if (t->map[j]) { | |
no = index_in_list(t->map[j]); | |
if (!isprint(j)) | |
sprintf(buf2, "0x%x", j); | |
else | |
sprintf(buf2, "%c", j); | |
if (refs[no].lc++) | |
strcat(refs[no].links, ", "); | |
strcat(refs[no].links, buf2); | |
} | |
/* Draw the alphabet edges with their | |
character-list captions */ | |
for (j = 1; j < 1024; j++) { | |
if (refs[j].lc) | |
{ | |
printf("arc -> "); | |
if (i > j) | |
printf("ccw"); | |
else | |
printf("cw"); | |
printf(" from %dth circle .n to %dth circle .n chop\n", | |
i, j); | |
printf("\"%s\" at last arc + (0, 0.4)\n", | |
refs[j].links); | |
} | |
} | |
} | |
printf(".PE\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
to fix: the current
+
compilation is problematic when there's a*
or another+
nearby...