Last active
January 3, 2016 03:49
-
-
Save blakesmith/8404563 to your computer and use it in GitHub Desktop.
nginx log merger. Useful if you have logs distributed on many app nodes that you'd like unified into one big log file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC=gcc | |
CFLAGS=-Wall -pendatic -03 -std=c99 | |
PROG=nginx_log_merger | |
all: nginx_log_merger.o | |
nginx_log_merger.o: | |
$(CC) $(CFLAGS) nginx_log_merger.c -o $(PROG) | |
clean: | |
rm -rf nginx_log_merger |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <errno.h> | |
#include <time.h> | |
#include <getopt.h> | |
#define MAX_LINE 512 | |
#define DATE_SIZE 64 | |
#define MAX_FILE_COUNT 64 | |
#define MAX_FILE_LEN 512 | |
#define SECONDSPERDAY 86400 | |
struct opt { | |
char infile_names[MAX_FILE_COUNT][MAX_FILE_LEN]; | |
char outfile_name[MAX_FILE_LEN]; | |
int num_infiles; | |
int date_pos; | |
}; | |
typedef struct logfile { | |
const char *name; | |
FILE *infile; | |
char cur_position[MAX_LINE]; | |
time_t cur_date; | |
int done; | |
} logfile; | |
typedef struct logcollection { | |
logfile *logs; | |
int num_logs; | |
int num_done; | |
} logcollection; | |
static int find_date(char *in, char *buf, int len, char delim, int pos) { | |
char *start; | |
int i = 0; | |
int cur_delim = 0; | |
char *ch = in; | |
int date_length, copy_length; | |
while (*ch != '\0') { | |
if (*ch == delim) { | |
cur_delim++; | |
if (i == 0 && pos == 0) { | |
start = ch; | |
} else if (cur_delim == pos) { | |
start = ch+(sizeof(char)*1); | |
} | |
if (cur_delim > pos) { | |
date_length = ch - start - (sizeof(char)*6); | |
if (date_length+1 > len) { | |
copy_length = len; | |
} else { | |
copy_length = date_length; | |
} | |
strncpy(buf, start, copy_length); | |
buf[copy_length] = '\0'; | |
return 0; | |
} | |
} | |
i++; | |
ch++; | |
} | |
return -1; | |
} | |
static int find_next_log(logcollection *coll) { | |
time_t *min = NULL; | |
int pos = 0; | |
int i; | |
double diff; | |
for (i = 0; i < coll->num_logs; ++i) { | |
if (min == NULL) { | |
min = &coll->logs[i].cur_date; | |
pos = i; | |
continue; | |
} | |
diff = difftime(*min, coll->logs[i].cur_date); | |
if (diff > 0) { | |
min = &coll->logs[i].cur_date; | |
pos = i; | |
} | |
} | |
return pos; | |
} | |
void invaliddate() { | |
fprintf (stderr, "invalid date/time specification\n"); | |
exit(1); | |
} | |
static void parseISO8601(char *text, time_t *isotime) { | |
char *c; | |
struct tm tmstruct; | |
int year = 0; | |
int month = 0; | |
int seconds = 0; | |
int minutes = 0; | |
int hours = 0; | |
int days = 0; | |
c = text; | |
*isotime = 0; | |
/* point in time, must be one of | |
CCYYMMDD | |
CCYY-MM-DD | |
CCYYMMDDTHHMM | |
CCYY-MM-DDTHH:MM | |
CCYYMMDDTHHMMSS | |
CCYY-MM-DDTHH:MM:SS | |
*/ | |
c = text; | |
/* NOTE: we have to check for the extended format first, | |
because otherwise the separting '-' will be interpreted | |
by sscanf as signs of a 1 digit integer .... :-( */ | |
if (sscanf(text, "%4u-%2u-%2u", &year, &month, &days) == 3) { | |
c += 10; | |
} | |
else if (sscanf(text, "%4u%2u%2u", &year, &month, &days) == 3) { | |
c += 8; | |
} | |
else { | |
invaliddate(); | |
} | |
tmstruct.tm_year = year - 1900; | |
tmstruct.tm_mon = month - 1; | |
tmstruct.tm_mday = days; | |
if (*c == '\0') { | |
tmstruct.tm_hour = 0; | |
tmstruct.tm_sec = 0; | |
tmstruct.tm_min = 0; | |
*isotime = mktime (&tmstruct); | |
} | |
else if (*c == 'T') { | |
/* time of day part */ | |
c++; | |
if (sscanf(c, "%2d%2d", &hours, &minutes) == 2) { | |
c += 4; | |
} | |
else if (sscanf(c, "%2d:%2d", &hours, &minutes) == 2) { | |
c += 5; | |
} | |
else { | |
invaliddate(); | |
} | |
if (*c == ':') { | |
c++; | |
} | |
if (*c != '\0') { | |
if (sscanf(c, "%2d", &seconds) == 1) { | |
c += 2; | |
} | |
else { | |
invaliddate(); | |
} | |
if (*c != '\0') { /* something left? */ | |
invaliddate(); | |
} | |
} | |
tmstruct.tm_hour = hours; | |
tmstruct.tm_min = minutes; | |
tmstruct.tm_sec = seconds; | |
*isotime = mktime (&tmstruct); | |
} | |
else { | |
invaliddate(); | |
} | |
} | |
static void set_date(logcollection *coll, int log_pos, int date_pos) { | |
char *out; | |
char buf[DATE_SIZE]; | |
int res; | |
time_t date; | |
out = fgets(coll->logs[log_pos].cur_position, MAX_LINE, coll->logs[log_pos].infile); | |
if (out == NULL) { | |
coll->num_done++; | |
coll->logs[log_pos].done = 1; | |
return; | |
} | |
res = find_date(out, buf, DATE_SIZE, ' ', date_pos); | |
if (res < 0) { | |
fprintf(stderr, "Unable to find the date field\n"); | |
} | |
parseISO8601(buf, &date); | |
coll->logs[log_pos].cur_date = date; | |
} | |
static int logcollection_init(logcollection *coll, char *filenames, int num_files, int date_pos) { | |
int i; | |
char *filename; | |
coll->logs = malloc(sizeof(logfile)*num_files); | |
coll->num_logs = num_files; | |
coll->num_done = 0; | |
for (i = 0; i < num_files; ++i) { | |
filename = filenames + (i * MAX_FILE_LEN); | |
coll->logs[i].infile = fopen(filename, "r"); | |
if (coll->logs[i].infile == NULL) { | |
fprintf(stderr, "Unable to open infile: %s, %s\n", filename, strerror(errno)); | |
return -1; | |
} | |
coll->logs[i].name = filename; | |
coll->logs[i].done = 0; | |
coll->logs[i].cur_date = 0; | |
set_date(coll, i, date_pos); | |
} | |
return 0; | |
} | |
static int logcollection_free(logcollection *coll) { | |
int res, i; | |
for (i = 0; i < coll->num_logs; ++i) { | |
res = fclose(coll->logs[i].infile); | |
if (res != 0) { | |
return -1; | |
} | |
} | |
free(coll->logs); | |
return 0; | |
} | |
int merge_logs(logcollection *coll, int date_pos, FILE *outfile) { | |
int next_log; | |
int written = 0; | |
logfile *cur_log = NULL; | |
char *out; | |
while (coll->num_done < coll->num_logs) { | |
next_log = find_next_log(coll); | |
written = 0; | |
cur_log = &coll->logs[next_log]; | |
written = fwrite(cur_log->cur_position, 1, strlen(cur_log->cur_position), outfile); | |
while (written == MAX_LINE-1) { | |
out = fgets(cur_log->cur_position, MAX_LINE, cur_log->infile); | |
if (out == NULL) { | |
fprintf(stderr, "Error reading from log file: %s\n", strerror(errno)); | |
return -1; | |
} | |
written = fwrite(cur_log->cur_position, 1, strlen(cur_log->cur_position), outfile); | |
} | |
set_date(coll, next_log, date_pos); | |
} | |
return 0; | |
} | |
static void display_help() { | |
printf("USAGE: nginx_log_merger [OPTIONS]\r\n\r\n" | |
"OPTIONS:\r\n" | |
" -i Logfile to merge (may specify multiple -i args)\r\n" | |
" -o Merged output file\r\n" | |
" -d (optional) 0 based index position of the ISO8601 date. Used as the comparator. Default: 3\r\n"); | |
} | |
static int parse_opts(struct opt *op, int argc, char **argv) { | |
int ch; | |
int in_spec, out_spec = 0; | |
static struct option longopts[] = { | |
{ "infile", required_argument, 0, 'i' }, | |
{ "outfile", required_argument, 0, 'o' }, | |
{ "date-pos", required_argument, 0, 'd' }, | |
{ "help", no_argument, 0, 'h' }, | |
{ 0, 0, 0, 0 } | |
}; | |
/* Default date position to the 3rd index, 0 based */ | |
op->date_pos = 3; | |
while ((ch = getopt_long(argc, argv, "i:o:d:", longopts, NULL)) != -1) { | |
switch (ch) { | |
case 'i': | |
if (op->num_infiles > MAX_FILE_COUNT+1) { | |
fprintf(stderr, "You have exceeded the maximum allowed files of: %d\n", MAX_FILE_COUNT); | |
return -1; | |
} | |
strncpy(op->infile_names[op->num_infiles], optarg, strlen(optarg)); | |
op->num_infiles++; | |
in_spec = 1; | |
break; | |
case 'o': | |
strncpy(op->outfile_name, optarg, strlen(optarg)); | |
out_spec = 1; | |
break; | |
case 'd': | |
op->date_pos = atoi(optarg); | |
break; | |
case 'h': | |
default: | |
return -1; | |
} | |
} | |
if (!in_spec && !out_spec) { | |
return -1; | |
} | |
return 0; | |
} | |
int main(int argc, char **argv) { | |
struct opt op; | |
logcollection coll; | |
int res; | |
int date_pos = 3; | |
FILE *outfile; | |
res = parse_opts(&op, argc, argv); | |
if (res < 0) { | |
display_help(); | |
exit(-1); | |
} | |
outfile = fopen(op.outfile_name, "w+"); | |
if (outfile == NULL) { | |
fprintf(stderr, "Unable to open outfile\n"); | |
} | |
res = logcollection_init(&coll, *op.infile_names, op.num_infiles, date_pos); | |
if (res) { | |
fprintf(stderr, "Unable to initialize log collection\n"); | |
exit(-1); | |
} | |
printf("Merging logs...\n"); | |
res = merge_logs(&coll, date_pos, outfile); | |
if (res < 0) { | |
fprintf(stderr, "Log merge failed: %s\n", strerror(errno)); | |
exit(-1); | |
} | |
printf("Done!\n"); | |
res = logcollection_free(&coll); | |
if (res) { | |
fprintf(stderr, "Unable to free log collection\n"); | |
exit(-1); | |
} | |
fclose(outfile); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment