Created
March 19, 2012 04:11
-
-
Save Wollw/2094483 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This is a parser intended for Common Log Format access logs. | |
* It splits on whitespace unless it is part of a field delimited by | |
* square brackets or double quotes. | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include "parser.h" | |
#define BUFFER_SIZE 5000 | |
int main(int argc, char *argv[]) { | |
if (argc < 2) { | |
printf("Please provide a file name.\n"); | |
return 1; | |
} | |
FILE *fp = fopen(argv[1], "r"); | |
if (fp == NULL) { | |
perror("Failed to open file."); | |
return EXIT_FAILURE; | |
} | |
char *line = malloc(BUFFER_SIZE*sizeof(char)); | |
assert(line); | |
while (fgets(line, BUFFER_SIZE, fp) != NULL) { | |
line[strlen(line) - 1] = '\0'; | |
char **toks = parse_line(line); | |
int i = 0; | |
while (toks[i] != NULL) { | |
printf("%s\n", toks[i++]); | |
} | |
parse_free(toks); | |
} | |
free(line); | |
fclose(fp); | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include "parser.h" | |
char * _parse_append_char(char *s, char c) { | |
int l = strlen(s); | |
s = realloc(s, (l + 2) * sizeof(char)); | |
assert("NULL Pointer." && s); | |
s[l] = c; | |
s[l+1] = '\0'; | |
return s; | |
} | |
void parse_free(char **p) { | |
unsigned int i = 0; | |
while (p[i] != NULL) | |
free(p[i++]); | |
free(p); | |
} | |
char ** parse_line(char line[]) { | |
char **tokens = NULL; | |
int tok_n = -1; | |
unsigned int flags = 0x00; | |
char *c; | |
char end_delim = '\0'; | |
for (c = line; *c != '\0'; c++) { | |
if (!(flags & _f_delimited_tok) && !(flags & _f_bare_tok) | |
&& *c != '\t' && *c != ' ') { | |
flags = *c == '"' || *c == '[' ? _f_delimited_tok : _f_bare_tok; | |
tokens = (char**)realloc(tokens, (++tok_n+1) * sizeof(char*)); | |
assert("NULL Pointer." && tokens); | |
tokens[tok_n] = calloc(1, sizeof(char)); | |
assert("NULL Pointer." && tokens[tok_n]); | |
if (flags == _f_delimited_tok) { | |
end_delim = *c == '"' ? '"' : ']'; | |
continue; | |
} | |
} | |
if (flags & _f_delimited_tok) { | |
if (*c == '\\' && !(flags & _f_escaped)) { | |
flags |= _f_escaped; | |
} | |
else if ( (flags & _f_escaped) || (*c != end_delim && *c != '\\')) { | |
tokens[tok_n] = _parse_append_char(tokens[tok_n], *c); | |
flags &= ~_f_escaped; | |
} else | |
flags &= ~_f_delimited_tok; | |
} else if (flags & _f_bare_tok) { | |
if (*c == '\t' || *c == ' ') | |
flags &= ~_f_bare_tok; | |
else { | |
tokens[tok_n] = _parse_append_char(tokens[tok_n], *c); | |
} | |
} | |
} | |
tokens = (char**)realloc(tokens, (++tok_n+1) * sizeof(char*)); | |
assert("NULL Pointer." && tokens); | |
tokens[tok_n] = NULL; | |
return tokens; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef PARSER_H | |
#define PARSER_H | |
/* flags for keeping track of the parser's state */ | |
enum { | |
_f_delimited_tok = 1, | |
_f_bare_tok = 2, | |
_f_escaped = 4 | |
}; | |
/* Parses a line into tokens. It allocates its own memory. */ | |
char ** parse_line(char l[]); | |
/* Frees the parsed tokens */ | |
void parse_free(char **data); | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment