Created
November 30, 2012 03:13
-
-
Save willglynn/4173566 to your computer and use it in GitHub Desktop.
Ragel for classifying log lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <string.h> | |
#include <stdio.h> | |
#define LINE_IS_SIGNAL 1 | |
#define LINE_IS_NOISE 2 | |
#define LINE_IS_UNKNOWN -1 | |
%%{ | |
machine line_classifier; | |
action is_signal { return LINE_IS_SIGNAL; } | |
action is_noise { return LINE_IS_NOISE; } | |
action is_unknown { return LINE_IS_UNKNOWN; } | |
signal = ( | |
/ERROR/ | | |
/WARNING/ | |
) @(is_signal); | |
noise = ( | |
/blah/ | |
) @(is_noise); | |
main := ( signal | noise ) @(is_unknown) $err(is_unknown); | |
}%% | |
%% write data; | |
int classify_line(const char * line, int length) { | |
const char *p = line; | |
const char *pe = line + length; | |
const char *eof = pe; | |
int cs; | |
%% write init; | |
%% write exec; | |
/* this should never happen, since the state machine should return a value in all cases */ | |
assert(0); | |
} | |
/* to run it */ | |
void classify(const char * line) { | |
const char * kind; | |
switch (classify_line(line, strlen(line))) { | |
case LINE_IS_NOISE: kind = "noise"; break; | |
case LINE_IS_SIGNAL: kind = "signal"; break; | |
default: kind = "unknown"; break; | |
} | |
printf("<%s> \"%s\"\n", kind, line); | |
} | |
int main(int argc, char ** argv) { | |
classify("ERROR: this is important"); | |
classify("who knows what this is?"); | |
classify("blah blah blah"); | |
classify("WARNING FROM THE FUTURE: DON'T EAT THAT ICE CREAM"); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
line_classifier: line_classifier.o | |
%.c: %.rl | |
ragel -G2 $^ -o $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<signal> "ERROR: this is important" | |
<unknown> "who knows what this is?" | |
<noise> "blah blah blah" | |
<signal> "WARNING FROM THE FUTURE: DON'T EAT THAT ICE CREAM" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment