Last active
July 1, 2022 21:07
-
-
Save tkurtbond/bbc2d2da8eaf942cfcbc0a69ef1e14a3 to your computer and use it in GitHub Desktop.
Example regexp matcher from *The Practice of Programming*.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* tpop_match.c -- The regexp matcher from *The Practice of Programming*. */ | |
| /* See: http://genius.cat-v.org/brian-kernighan/articles/beautiful */ | |
| /* This code is a simple regular expression matcher that implements | |
| the following constructs: | |
| c matches any literal character c | |
| . matches any single character | |
| ^ matches the beginning of the input string | |
| $ matches the end of the input string | |
| * matches zero or more occurrences of the previous character | |
| */ | |
| #include <stdio.h> | |
| int matchhere(char *regexp, char *text); | |
| int matchstar(int c, char *regexp, char *text); | |
| /* match: search for regexp anywhere in text */ | |
| int match(char *regexp, char *text) | |
| { | |
| if (regexp[0] == '^') | |
| return matchhere(regexp+1, text); | |
| do { /* must look even if string is empty */ | |
| if (matchhere(regexp, text)) | |
| return 1; | |
| } while (*text++ != '\0'); | |
| return 0; | |
| } | |
| /* matchhere: search for regexp at beginning of text */ | |
| int matchhere(char *regexp, char *text) | |
| { | |
| if (regexp[0] == '\0') | |
| return 1; | |
| if (regexp[1] == '*') | |
| return matchstar(regexp[0], regexp+2, text); | |
| if (regexp[0] == '$' && regexp[1] == '\0') | |
| return *text == '\0'; | |
| if (*text!='\0' && (regexp[0]=='.' || regexp[0]==*text)) | |
| return matchhere(regexp+1, text+1); | |
| return 0; | |
| } | |
| /* matchstar: search for c*regexp at beginning of text */ | |
| int matchstar(int c, char *regexp, char *text) | |
| { | |
| do { /* a * matches zero or more instances */ | |
| if (matchhere(regexp, text)) | |
| return 1; | |
| } while (*text != '\0' && (*text++ == c || c == '.')); | |
| return 0; | |
| } | |
| void try (char *regexp, char *text) | |
| { | |
| printf ("match (\"%s\", \"%s\") => %s\n", regexp, text, | |
| (match (regexp, text) ? "TRUE" : "FALSE")); | |
| } | |
| int main (int argc, char **argv) | |
| { | |
| struct test { char *regexp; char *text; } tests[] = | |
| {{"", ""}, | |
| {"", "a"}, | |
| {"a", ""}, | |
| {"a", "a"}, | |
| {"aa", "a"}, | |
| {"a", "ba"}, | |
| {".", "a"}, | |
| {"a*b", "ab"}, | |
| {"a*b", "aab"}, | |
| {"a*b", "aaab"}, | |
| {"a*c", "aaac"}, | |
| {NULL, NULL}}; | |
| for (int i = 0; tests[i].regexp != NULL; i++) | |
| try (tests[i].regexp, tests[i].text); | |
| return 0; | |
| } | |
| // Local Variables: | |
| // compile-command: "cc -o tpop_match tpop_match.c && ./tpop_match" | |
| // End: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment