Created
September 19, 2015 20:22
-
-
Save leverich/8079867fd73b478e7e0a to your computer and use it in GitHub Desktop.
Hacky tool to join timestamped logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
## assumptions... | |
### 1. time samples within some correlation coefficient are candidates | |
### for a matching. this correlation coefficient should be longer than | |
### the longest sample period. | |
### 2. the nearest sample (rounded up or down) shall be the matching | |
### sample. should some samples be collected at a faster rate than | |
### others, the slower samples will be matched multiple times. | |
### 3. big jumps or backwards jumps in time samples denote a new log | |
## each stream is an array so that time is binary searachable | |
# @streams = ( [ { time => secs.usecs, text => log string } ], | |
# ) | |
$correlation_period = 1; ## output sample rate after correlation | |
$correlation_distance = undef; | |
$start_time = undef; | |
$stop_time = undef; | |
$zero = undef; | |
use Time::HiRes qw/clock_gettime/; | |
use Getopt::Long; | |
Getopt::Long::Configure("bundling"); | |
$result = GetOptions("start|s=f" => \$start_time, | |
"stop|t=f" => \$stop_time, | |
"period|p=f" => \$correlation_period, | |
"zero|z" => \$zero); | |
@streams = (); | |
$current_stream = 0; | |
$current_time = undef; | |
$last_time = undef; | |
$current_entry = undef; | |
## input logs, separate into streams and collect multi-line log | |
## messages. cut-off times should probably be applied at this point to | |
## avoid further processing... | |
#print clock_gettime, "\n"; | |
outer: while (<>) { | |
$line = $_; | |
# chomp $line; | |
# print "$line"; | |
if (/^(\d+\.\d+)/) { ## new time | |
$check_time = $1; | |
if ($start_time - $check_time > 3600) { | |
for (0..600) { $x = <> or last outer; } | |
next outer; | |
} | |
if ($check_time - $stop_time > 3600) { | |
for (0..600) { $x = <> or last outer; } | |
next outer; | |
} | |
$last_time = $current_time; | |
$current_time = $1; | |
$line =~ s/^$1\s+//; | |
if (($last_time - $current_time) > 10) { # $current_time < $last_time) { ## backwards!! | |
# print "Starting new... $current_time $last_time\n"; | |
$current_stream++; | |
} | |
if ($current_time && $current_time != $last_time) { ## new unique time.. | |
$current_entry = { "time" => $current_time }; | |
push @{$streams[$current_stream]}, $current_entry; | |
} | |
} | |
if (($start_time - $current_time) > 3600) { | |
} | |
$current_entry->{"text"} .= $line; | |
# print $current_entry->{"time"} . " x " . $current_entry->{"text"}; | |
chomp $line; | |
} | |
#print clock_gettime, "\n"; | |
if (!$start_time) { ## no start time specified... find earliest entry | |
for my $stream (@streams) { | |
if (!$start_time || $stream->[0]{"time"} < $start_time) { | |
$start_time = $stream->[0]{"time"}; | |
} | |
} | |
} | |
if (!$stop_time) { ## no stop time specified... find latest entry | |
for my $stream (@streams) { | |
if (!$stop_time || $stream->[scalar @{$stream} - 1]{"time"} > $stop_time) { | |
$stop_time = $stream->[scalar @{$stream} - 1]{"time"}; | |
} | |
} | |
} | |
@stream_pointers = (); | |
$current_time = $start_time; | |
map { $stream_pointers[$_] = 0 } 0 .. $#streams; | |
while ($current_time < $stop_time && grep { $_ eq 1 } map { $stream_pointers[$_] < scalar @{$streams[$_]} } 0 .. $#streams) { | |
@correlations = (); | |
## advance each log to >= $current_time | |
# print "OK starting...\n"; | |
# print "start: $current_time " . join(" ", @stream_pointers) . "\n"; | |
for my $n (0..$#streams) { | |
while (($stream_pointers[$n] < scalar @{$streams[$n]} - 1) && ($streams[$n][$stream_pointers[$n]]{"time"} < $current_time)) { | |
$stream_pointers[$n]++; | |
# print $n . " " . $streams[$n][$stream_pointers[$n]]{"time"} . " " . $current_time . " " . $stream_pointers[$n] . " " . (scalar @{$streams[$n]}) . "\n"; | |
} | |
} | |
## this entry is either 1) last entry or 2) >= current_time | |
## previous entry is either 1) non-existant or 2) < current_time | |
## 1, 2 => prev < this < current_time => this | |
## 1, 1 => current_time <= this => this | |
## 2, 2 => prev < current_time <= this => depends | |
## 2, 1 => current_time <= this => this | |
for my $n (0..$#streams) { | |
$prev = 0; | |
# $fail = 0; | |
if ($stream_pointers[$n] == scalar @{$streams[$n]} - 1) { ## this is the last entry, its going to be the best correlation | |
} elsif ($stream_pointers[$n] == 0) { ## this is the first entry, its going to be the best correlation | |
} else { ## determine which is best based on time distance | |
$thistime = $streams[$n][$stream_pointers[$n]]{"time"}; | |
$prevtime = $streams[$n][$stream_pointers[$n]-1]{"time"}; | |
# if (abs($current_time - $prevtime) > $correlation_distance && | |
# abs($current_time - $thistime) > $correlation_distance) { | |
# $fail = 1; | |
# } | |
if (abs($current_time - $prevtime) < abs($current_time - $thistime)) { | |
$prev = 1; | |
# prev | |
# } else { | |
# this | |
} | |
} | |
# if ($fail) { | |
# $correlations[$n] = -1; | |
# } elsif ($prev) { | |
if ($prev) { | |
$correlations[$n] = $stream_pointers[$n]-1; | |
} else { | |
$correlations[$n] = $stream_pointers[$n]; | |
} | |
} | |
## is this what we want to output? | |
$print_time = $zero ? ($current_time - $start_time) : $current_time; | |
print $print_time . ": " . join(" | ", map { join(" ", split("\n", $streams[$_][$correlations[$_]]{"text"})) } 0..$#streams) . "\n"; | |
$current_time += $correlation_period; | |
## RULE: to avoid livelock, at least one stream must advance every iteration | |
# map { $stream_pointers[$_]++ } 0 .. $#stream_pointers; | |
} | |
use Data::Dumper; | |
$Data::Dumper::Terse = 1; | |
$Data::Dumper::Indent = 1; | |
#print Dumper(\@streams); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment