emanonwzy · December 24, 2020 23:57
diff --git a/c b/c
 /*
 * Regular expression implementation.
 * Supports only ( | ) * + ?.  No escapes.
 * Compiles to NFA and then simulates NFA
 * using Thompson's algorithm.
 *
 * See also http://swtch.com/~rsc/regexp/ and
 * Thompson, Ken.  Regular Expression Search Algorithm,
 * Communications of the ACM 11(6) (June 1968), pp. 419-422.
 * 
 * Copyright (c) 2007 Russ Cox.
 * Can be distributed under the MIT license, see bottom of file.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>

 /*
 * Convert infix regexp re to postfix notation.
 * Insert . as explicit concatenation operator.
 * Cheesy parser, return static buffer.
 */
 char*
 re2post(char *re)
 {
 	int nalt, natom;
 	static char buf[8000];
 	char *dst;
 	struct {
 		int nalt;
 		int natom;
 	} paren[100], *p;
 	
 	p = paren;
 	dst = buf;
 	nalt = 0;
 	natom = 0;
 	if(strlen(re) >= sizeof buf/2)
 		return NULL;
 	for(; *re; re++){
 		switch(*re){
 		case '(':
 			if(natom > 1){
 				--natom;
 				*dst++ = '.';
 			}
 			if(p >= paren+100)
 				return NULL;
 			p->nalt = nalt;
 			p->natom = natom;
 			p++;
 			nalt = 0;
 			natom = 0;
 			break;
 		case '|':
 			if(natom == 0)
 				return NULL;
 			while(--natom > 0)
 				*dst++ = '.';
 			nalt++;
 			break;
 		case ')':
 			if(p == paren)
 				return NULL;
 			if(natom == 0)
 				return NULL;
 			while(--natom > 0)
 				*dst++ = '.';
 			for(; nalt > 0; nalt--)
 				*dst++ = '|';
 			--p;
 			nalt = p->nalt;
 			natom = p->natom;
 			natom++;
 			break;
 		case '*':
 		case '+':
 		case '?':
 			if(natom == 0)
 				return NULL;
 			*dst++ = *re;
 			break;
 		default:
 			if(natom > 1){
 				--natom;
 				*dst++ = '.';
 			}
 			*dst++ = *re;
 			natom++;
 			break;
 		}
 	}
 	if(p != paren)
 		return NULL;
 	while(--natom > 0)
 		*dst++ = '.';
 	for(; nalt > 0; nalt--)
 		*dst++ = '|';
 	*dst = 0;
 	return buf;
 }

 /*
 * Represents an NFA state plus zero or one or two arrows exiting.
 * if c == Match, no arrows out; matching state.
 * If c == Split, unlabeled arrows to out and out1 (if != NULL).
 * If c < 256, labeled arrow with character c to out.
 */
 enum
 {
 	Match = 256,
 	Split = 257
 };
 typedef struct State State;
 struct State
 {
 	int c;
 	State *out;
 	State *out1;
 	int lastlist;
 };
 State matchstate = { Match };	/* matching state */
 int nstate;

 /* Allocate and initialize State */
 State*
 state(int c, State *out, State *out1)
 {
 	State *s;
 	
 	nstate++;
 	s = malloc(sizeof *s);
 	s->lastlist = 0;
 	s->c = c;
 	s->out = out;
 	s->out1 = out1;
 	return s;
 }

 /*
 * A partially built NFA without the matching state filled in.
 * Frag.start points at the start state.
 * Frag.out is a list of places that need to be set to the
 * next state for this fragment.
 */
 typedef struct Frag Frag;
 typedef union Ptrlist Ptrlist;
 struct Frag
 {
 	State *start;
 	Ptrlist *out;
 };

 /* Initialize Frag struct. */
 Frag
 frag(State *start, Ptrlist *out)
 {
 	Frag n = { start, out };
 	return n;
 }

 /*
 * Since the out pointers in the list are always 
 * uninitialized, we use the pointers themselves
 * as storage for the Ptrlists.
 */
 union Ptrlist
 {
 	Ptrlist *next;
 	State *s;
 };

 /* Create singleton list containing just outp. */
 Ptrlist*
 list1(State **outp)
 {
 	Ptrlist *l;
 	
 	l = (Ptrlist*)outp;
 	l->next = NULL;
 	return l;
 }

 /* Patch the list of states at out to point to start. */
 void
 patch(Ptrlist *l, State *s)
 {
 	Ptrlist *next;
 	
 	for(; l; l=next){
 		next = l->next;
 		l->s = s;
 	}
 }

 /* Join the two lists l1 and l2, returning the combination. */
 Ptrlist*
 append(Ptrlist *l1, Ptrlist *l2)
 {
 	Ptrlist *oldl1;
 	
 	oldl1 = l1;
 	while(l1->next)
 		l1 = l1->next;
 	l1->next = l2;
 	return oldl1;
 }

 /*
 * Convert postfix regular expression to NFA.
 * Return start state.
 */
 State*
 post2nfa(char *postfix)
 {
 	char *p;
 	Frag stack[1000], *stackp, e1, e2, e;
 	State *s;
 	
 	// fprintf(stderr, "postfix: %s\n", postfix);

 	if(postfix == NULL)
 		return NULL;

 	#define push(s) *stackp++ = s
 	#define pop() *--stackp

 	stackp = stack;
 	for(p=postfix; *p; p++){
 		switch(*p){
 		default:
 			s = state(*p, NULL, NULL);
 			push(frag(s, list1(&s->out)));
 			break;
 		case '.':	/* catenate */
 			e2 = pop();
 			e1 = pop();
 			patch(e1.out, e2.start);
 			push(frag(e1.start, e2.out));
 			break;
 		case '|':	/* alternate */
 			e2 = pop();
 			e1 = pop();
 			s = state(Split, e1.start, e2.start);
 			push(frag(s, append(e1.out, e2.out)));
 			break;
 		case '?':	/* zero or one */
 			e = pop();
 			s = state(Split, e.start, NULL);
 			push(frag(s, append(e.out, list1(&s->out1))));
 			break;
 		case '*':	/* zero or more */
 			e = pop();
 			s = state(Split, e.start, NULL);
 			patch(e.out, s);
 			push(frag(s, list1(&s->out1)));
 			break;
 		case '+':	/* one or more */
 			e = pop();
 			s = state(Split, e.start, NULL);
 			patch(e.out, s);
 			push(frag(e.start, list1(&s->out1)));
 			break;
 		}
 	}

 	e = pop();
 	if(stackp != stack)
 		return NULL;

 	patch(e.out, &matchstate);
 	return e.start;
 #undef pop
 #undef push
 }

 typedef struct List List;
 struct List
 {
 	State **s;
 	int n;
 };
 List l1, l2;
 static int listid;

 void addstate(List*, State*);
 void step(List*, int, List*);

 /* Compute initial state list */
 List*
 startlist(State *start, List *l)
 {
 	l->n = 0;
 	listid++;
 	addstate(l, start);
 	return l;
 }

 /* Check whether state list contains a match. */
 int
 ismatch(List *l)
 {
 	int i;

 	for(i=0; i<l->n; i++)
 		if(l->s[i] == &matchstate)
 			return 1;
 	return 0;
 }

 /* Add s to l, following unlabeled arrows. */
 void
 addstate(List *l, State *s)
 {
 	if(s == NULL || s->lastlist == listid)
 		return;
 	s->lastlist = listid;
 	if(s->c == Split){
 		/* follow unlabeled arrows */
 		addstate(l, s->out);
 		addstate(l, s->out1);
 		return;
 	}
 	l->s[l->n++] = s;
 }

 /*
 * Step the NFA from the states in clist
 * past the character c,
 * to create next NFA state set nlist.
 */
 void
 step(List *clist, int c, List *nlist)
 {
 	int i;
 	State *s;

 	listid++;
 	nlist->n = 0;
 	for(i=0; i<clist->n; i++){
 		s = clist->s[i];
 		if(s->c == c)
 			addstate(nlist, s->out);
 	}
 }

 /* Run NFA to determine whether it matches s. */
 int
 match(State *start, char *s)
 {
 	int i, c;
 	List *clist, *nlist, *t;

 	clist = startlist(start, &l1);
 	nlist = &l2;
 	for(; *s; s++){
 		c = *s & 0xFF;
 		step(clist, c, nlist);
 		t = clist; clist = nlist; nlist = t;	/* swap clist, nlist */
 	}
 	return ismatch(clist);
 }

 int
 main(int argc, char **argv)
 {
 	int i;
 	char *post;
 	State *start;

 	if(argc < 3){
 		fprintf(stderr, "usage: nfa regexp string...\n");
 		return 1;
 	}
 	
 	post = re2post(argv[1]);
 	if(post == NULL){
 		fprintf(stderr, "bad regexp %s\n", argv[1]);
 		return 1;
 	}

 	start = post2nfa(post);
 	if(start == NULL){
 		fprintf(stderr, "error in post2nfa %s\n", post);
 		return 1;
 	}
 	
 	l1.s = malloc(nstate*sizeof l1.s[0]);
 	l2.s = malloc(nstate*sizeof l2.s[0]);
 	for(i=2; i<argc; i++)
 		if(match(start, argv[i]))
 			printf("%s\n", argv[i]);
 	return 0;
 }

 /*
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the
 * Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute,
 * sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall
 * be included in all copies or substantial portions of the
 * Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
 * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS
 * OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
	/*
	* Regular expression implementation.
	* Supports only ( \| ) * + ?. No escapes.
	* Compiles to NFA and then simulates NFA
	* using Thompson's algorithm.
	*
	* See also http://swtch.com/~rsc/regexp/ and
	* Thompson, Ken. Regular Expression Search Algorithm,
	* Communications of the ACM 11(6) (June 1968), pp. 419-422.
	*
	* Copyright (c) 2007 Russ Cox.
	* Can be distributed under the MIT license, see bottom of file.
	*/
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <unistd.h>

	/*
	* Convert infix regexp re to postfix notation.
	* Insert . as explicit concatenation operator.
	* Cheesy parser, return static buffer.
	*/
	char*
	re2post(char *re)
	{
	int nalt, natom;
	static char buf[8000];
	char *dst;
	struct {
	int nalt;
	int natom;
	} paren[100], *p;

	p = paren;
	dst = buf;
	nalt = 0;
	natom = 0;
	if(strlen(re) >= sizeof buf/2)
	return NULL;
	for(; *re; re++){
	switch(*re){
	case '(':
	if(natom > 1){
	--natom;
	*dst++ = '.';
	}
	if(p >= paren+100)
	return NULL;
	p->nalt = nalt;
	p->natom = natom;
	p++;
	nalt = 0;
	natom = 0;
	break;
	case '\|':
	if(natom == 0)
	return NULL;
	while(--natom > 0)
	*dst++ = '.';
	nalt++;
	break;
	case ')':
	if(p == paren)
	return NULL;
	if(natom == 0)
	return NULL;
	while(--natom > 0)
	*dst++ = '.';
	for(; nalt > 0; nalt--)
	*dst++ = '\|';
	--p;
	nalt = p->nalt;
	natom = p->natom;
	natom++;
	break;
	case '*':
	case '+':
	case '?':
	if(natom == 0)
	return NULL;
	dst++ = re;
	break;
	default:
	if(natom > 1){
	--natom;
	*dst++ = '.';
	}
	dst++ = re;
	natom++;
	break;
	}
	}
	if(p != paren)
	return NULL;
	while(--natom > 0)
	*dst++ = '.';
	for(; nalt > 0; nalt--)
	*dst++ = '\|';
	*dst = 0;
	return buf;
	}

	/*
	* Represents an NFA state plus zero or one or two arrows exiting.
	* if c == Match, no arrows out; matching state.
	* If c == Split, unlabeled arrows to out and out1 (if != NULL).
	* If c < 256, labeled arrow with character c to out.
	*/
	enum
	{
	Match = 256,
	Split = 257
	};
	typedef struct State State;
	struct State
	{
	int c;
	State *out;
	State *out1;
	int lastlist;
	};
	State matchstate = { Match }; /* matching state */
	int nstate;

	/* Allocate and initialize State */
	State*
	state(int c, State out, State out1)
	{
	State *s;

	nstate++;
	s = malloc(sizeof *s);
	s->lastlist = 0;
	s->c = c;
	s->out = out;
	s->out1 = out1;
	return s;
	}

	/*
	* A partially built NFA without the matching state filled in.
	* Frag.start points at the start state.
	* Frag.out is a list of places that need to be set to the
	* next state for this fragment.
	*/
	typedef struct Frag Frag;
	typedef union Ptrlist Ptrlist;
	struct Frag
	{
	State *start;
	Ptrlist *out;
	};

	/* Initialize Frag struct. */
	Frag
	frag(State start, Ptrlist out)
	{
	Frag n = { start, out };
	return n;
	}

	/*
	* Since the out pointers in the list are always
	* uninitialized, we use the pointers themselves
	* as storage for the Ptrlists.
	*/
	union Ptrlist
	{
	Ptrlist *next;
	State *s;
	};

	/* Create singleton list containing just outp. */
	Ptrlist*
	list1(State **outp)
	{
	Ptrlist *l;

	l = (Ptrlist*)outp;
	l->next = NULL;
	return l;
	}

	/* Patch the list of states at out to point to start. */
	void
	patch(Ptrlist l, State s)
	{
	Ptrlist *next;

	for(; l; l=next){
	next = l->next;
	l->s = s;
	}
	}

	/* Join the two lists l1 and l2, returning the combination. */
	Ptrlist*
	append(Ptrlist l1, Ptrlist l2)
	{
	Ptrlist *oldl1;

	oldl1 = l1;
	while(l1->next)
	l1 = l1->next;
	l1->next = l2;
	return oldl1;
	}

	/*
	* Convert postfix regular expression to NFA.
	* Return start state.
	*/
	State*
	post2nfa(char *postfix)
	{
	char *p;
	Frag stack[1000], *stackp, e1, e2, e;
	State *s;

	// fprintf(stderr, "postfix: %s\n", postfix);

	if(postfix == NULL)
	return NULL;

	#define push(s) *stackp++ = s
	#define pop() *--stackp

	stackp = stack;
	for(p=postfix; *p; p++){
	switch(*p){
	default:
	s = state(*p, NULL, NULL);
	push(frag(s, list1(&s->out)));
	break;
	case '.': /* catenate */
	e2 = pop();
	e1 = pop();
	patch(e1.out, e2.start);
	push(frag(e1.start, e2.out));
	break;
	case '\|': /* alternate */
	e2 = pop();
	e1 = pop();
	s = state(Split, e1.start, e2.start);
	push(frag(s, append(e1.out, e2.out)));
	break;
	case '?': /* zero or one */
	e = pop();
	s = state(Split, e.start, NULL);
	push(frag(s, append(e.out, list1(&s->out1))));
	break;
	case '': / zero or more */
	e = pop();
	s = state(Split, e.start, NULL);
	patch(e.out, s);
	push(frag(s, list1(&s->out1)));
	break;
	case '+': /* one or more */
	e = pop();
	s = state(Split, e.start, NULL);
	patch(e.out, s);
	push(frag(e.start, list1(&s->out1)));
	break;
	}
	}

	e = pop();
	if(stackp != stack)
	return NULL;

	patch(e.out, &matchstate);
	return e.start;
	#undef pop
	#undef push
	}

	typedef struct List List;
	struct List
	{
	State **s;
	int n;
	};
	List l1, l2;
	static int listid;

	void addstate(List, State);
	void step(List, int, List);

	/* Compute initial state list */
	List*
	startlist(State start, List l)
	{
	l->n = 0;
	listid++;
	addstate(l, start);
	return l;
	}

	/* Check whether state list contains a match. */
	int
	ismatch(List *l)
	{
	int i;

	for(i=0; i<l->n; i++)
	if(l->s[i] == &matchstate)
	return 1;
	return 0;
	}

	/* Add s to l, following unlabeled arrows. */
	void
	addstate(List l, State s)
	{
	if(s == NULL \|\| s->lastlist == listid)
	return;
	s->lastlist = listid;
	if(s->c == Split){
	/* follow unlabeled arrows */
	addstate(l, s->out);
	addstate(l, s->out1);
	return;
	}
	l->s[l->n++] = s;
	}

	/*
	* Step the NFA from the states in clist
	* past the character c,
	* to create next NFA state set nlist.
	*/
	void
	step(List clist, int c, List nlist)
	{
	int i;
	State *s;

	listid++;
	nlist->n = 0;
	for(i=0; i<clist->n; i++){
	s = clist->s[i];
	if(s->c == c)
	addstate(nlist, s->out);
	}
	}

	/* Run NFA to determine whether it matches s. */
	int
	match(State start, char s)
	{
	int i, c;
	List clist, nlist, *t;

	clist = startlist(start, &l1);
	nlist = &l2;
	for(; *s; s++){
	c = *s & 0xFF;
	step(clist, c, nlist);
	t = clist; clist = nlist; nlist = t; /* swap clist, nlist */
	}
	return ismatch(clist);
	}

	int
	main(int argc, char **argv)
	{
	int i;
	char *post;
	State *start;

	if(argc < 3){
	fprintf(stderr, "usage: nfa regexp string...\n");
	return 1;
	}

	post = re2post(argv[1]);
	if(post == NULL){
	fprintf(stderr, "bad regexp %s\n", argv[1]);
	return 1;
	}

	start = post2nfa(post);
	if(start == NULL){
	fprintf(stderr, "error in post2nfa %s\n", post);
	return 1;
	}

	l1.s = malloc(nstate*sizeof l1.s[0]);
	l2.s = malloc(nstate*sizeof l2.s[0]);
	for(i=2; i<argc; i++)
	if(match(start, argv[i]))
	printf("%s\n", argv[i]);
	return 0;
	}

	/*
	* Permission is hereby granted, free of charge, to any person
	* obtaining a copy of this software and associated
	* documentation files (the "Software"), to deal in the
	* Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute,
	* sublicense, and/or sell copies of the Software, and to
	* permit persons to whom the Software is furnished to do so,
	* subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall
	* be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
	* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
	* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
	* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
	* OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	*/
No results found