Last active
January 1, 2016 04:09
-
-
Save bonobo78/8089753 to your computer and use it in GitHub Desktop.
Creates a json file ready for _bulk ES API
tested with strawberry-perl-5.18.1.1-64bit on window$
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
## Creates a json file ready for _bulk ES API | |
## tested with strawberry-perl-5.18.1.1-64bit | |
## 2013-12-22 Author : FRN | |
use strict; use warnings; | |
use JSON; ## brings json_encode | |
use Tie::Array::CSV; ## brings @Array from csv file | |
use Tie::IxHash; ## brings ordered hashes | |
use Getopt::Long; ## brings GetOptions facilities | |
my $separator = " "; | |
my $show_help = 0; | |
my $index = "test_index"; | |
my $type = "test_type"; | |
GetOptions( | |
"--separator|s=s" => \$separator, | |
"--help|h!" => \$show_help, | |
"--index=s" => \$index, | |
"--type=s" => \$type, | |
) or show_help(); | |
show_help() if $show_help; | |
## die if file name is not specified | |
my ($csvfile) = @ARGV; | |
unless (defined $csvfile) { | |
die "input file name is required"; | |
} | |
## Create array() from csv file | |
tie my @file, 'Tie::Array::CSV', $csvfile, sep_char => $separator; | |
## first line must contain fields name | |
my $header = \@{$file[0]}; | |
## starting iterate array of rows from second row (offset[1]) to avoid header | |
## $#file = last @file element | |
for my $rownum (1 .. $#file) { | |
my $row = \@{$file[$rownum]}; ## getting the row | |
my ($key, $value); | |
tie my %r, "Tie::IxHash"; ## create ordered hash | |
@r{@$header} = @$row; ## magic trick : assigning each value to its column name | |
$r{'message'} = ""; ## empty message field | |
## get into each assigned field to provide little transformation | |
while ( ($key, $value) = each %r ) { | |
## prevent some lines that does not contain all fields | |
## TODO: better field number control | |
if ($value) { | |
## special case : remove '[field name=value]' from value | |
$r{$key} = $1 if $value =~ /\[$key=(.+)\]/; | |
## field message contains every value except himself | |
## TODO: trim leading space | |
$r{'message'} = $r{'message'}." ".$value if $key !~ /message/; | |
} | |
} | |
## timestamp from date end time fields | |
## TODO: try to catch from any field without knowing it | |
## TODO: add eventtreatedTime | |
$r{'@timestamp'} = $1."-".$2."-".$3."T".$4 if "$r{'date'} $r{'time'}" =~ /(\d{4})\/(\d{2})\/(\d{2})\s+(\d{2}:\d{2}:\d{2})/; | |
## output bulk method | |
## TODO: a lot to be more versatile | |
print "{ \"index\" : { \"_index\" : \"$index\", \"_type\" : \"$type\"} })\n"; | |
## output json line | |
print encode_json(\%r)."\n"; | |
## testing pretty output | |
#print JSON->new->utf8->pretty->encode(\%r)."\n"; | |
} | |
sub show_help { | |
print "Usage: $0 [--separator=?] [--index=<index>] [--type=<type>] [--help] <file>\n"; | |
exit(1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment