Created
January 13, 2016 17:08
-
-
Save ag4ve/6a3b2b5313a367cc3356 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
use Text::CSV; | |
my $csv = Text::CSV->new({binary => 1, always_quote => 1}) | |
or die "Can not use CSV: " . Text::CSV->error_diag(); | |
# A header is expected and hash is expected to be the first field | |
my $lookup = $ARGV[0] or die "Must specify a lookup csv.\n"; | |
my $fileout = $ARGV[1] // 'out.csv'; | |
# Score is expected as the first field and hash is expected as the second | |
# A header is expected | |
# If globbing - do not include the lookup filename | |
my @resultsin = (scalar(@ARGV) > 2 ? @ARGV[2 .. $#ARGV] : grep {$_ ne $lookup} glob "*.csv"); | |
my ($lookdatain, $header) = get_data($lookup); | |
# Add headers | |
unshift(@$header, "Score"); | |
push(@$header, "Individual scores"); | |
# Put lookup data into a hash | |
my $lookdata; | |
foreach my $line (@$lookdatain) { | |
my $hash = shift(@$line); | |
$lookdata->{$hash} = $line; | |
} | |
# Get scores | |
my $data; | |
foreach my $resultin (@resultsin) { | |
my ($resultdata) = get_data($resultin, 1); | |
if (not grep {defined($_->[0]) and $_->[0] =~ /^[0-9\.-]+$/} @$resultdata) { | |
($resultdata) = not_csv($resultin); | |
} | |
#print "[$resultin] " . Dumper(\[map {{$_->[0], $_->[1]}} @$resultdata]); | |
# hash => [score0, score1, ... scoreN] | |
map { | |
# Push the absolute value of a number | |
my $num = ($_->[0] =~ s/-//r) // 0; | |
#print "resultdata: " . $_->[1] . " => " . $_->[0] . "\n"; | |
push @{$data->{$_->[1]}}, $num | |
if (defined($num) and $num =~ /[0-9\.-]+/) | |
} @$resultdata; | |
} | |
# Generate a hash of hash key => score average | |
my $order; | |
foreach my $hash (keys %$data) { | |
# Number of voters for individual | |
my $resultnum = $#{$data->{$hash}}; | |
# Get the sum of all scores | |
my $sum = 0; | |
map { | |
# Max score of 10 | |
my $num = ($_ > 10 ? 10 : $_); | |
$sum += $num; | |
} @{$data->{$hash}}; | |
# Generate a lookup with average score | |
$order->{$hash} = ($sum ? $sum/$resultnum : $sum); | |
} | |
# Write result file | |
{ | |
$csv->eol("\r\n"); | |
open (my $fh, '>', $fileout) | |
or die "Can not write [" . $fileout . "] $!"; | |
# Write header | |
$csv->print(\*$fh, $header); | |
# Write output data | |
map { | |
$csv->print(\*$fh, [ | |
$order->{$_}, # average score | |
$_, # hash | |
@{$lookdata->{$_}}, # identification information | |
@{$data->{$_}}, # individual voters' scores | |
]) | |
} sort {$order->{$b} <=> $order->{$a}} keys %$order; | |
close $fh; | |
} | |
sub not_csv { | |
my ($file) = @_; | |
open (my $fh, '<', $file) | |
or die "Can not open [$file] $!"; | |
my $ret; | |
while (my $line = <$fh>) { | |
chomp $line; | |
my @cols = split(',', ($line =~ s/"//gr)); | |
next if (scalar(@cols) < 2 or | |
$cols[0] !~ /^[0-9\.-]+$/); | |
$cols[1] =~ s/[^a-zA-Z0-9+\/]//g; | |
next if (length($cols[1]) != 22); | |
push @$ret, [@cols[0,1]]; | |
} | |
return $ret; | |
} | |
sub get_data { | |
my ($file, $noheader) = @_; | |
open (my $fh, '<', $file) | |
or die "Can not open [$file] $!"; | |
my $rows = $csv->getline_all($fh); | |
$csv->eof or $csv->error_diag(); | |
close $fh; | |
my $header; | |
$header = shift @$rows | |
if (not $noheader); | |
return ($rows, $header); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment