process postgres aligned output files
this files are created by using \a to enable aligned output and \o filename
\a \o output.csv select * from my_table;
*.csv | |
*.sql |
process postgres aligned output files
this files are created by using \a to enable aligned output and \o filename
\a \o output.csv select * from my_table;
#!/usr/bin/perl | |
use strict; | |
use Data::Dumper; | |
$Data::Dumper::Indent= 1; | |
use Util::Matrix; | |
my $fnm1= shift (@ARGV); | |
my $fnm2= shift (@ARGV); | |
my $tbl1= pga::read_table ($fnm1); | |
$tbl1->mk_idx(0); | |
my $tbl2= pga::read_table ($fnm2); | |
$tbl2->mk_idx(0); | |
my $tbl_x= pga::read_table ('sj_dump.csv'); | |
my $tbl_y= $tbl_x->filter(2, 1251); | |
my $idx_y= $tbl_y->mk_idx(0); | |
my $missing= pga::diff ($tbl1, $tbl2); | |
my @column_names= @{$tbl1->{'columns'}}; | |
# print "columns: ", Dumper (\@column_names); | |
# print "missing: ", Dumper ($missing); | |
push (@column_names, 'hit'); | |
my $cn_hit= $#column_names; | |
my @out_rows; | |
foreach my $row (@$missing) | |
{ | |
# print "row: ", Dumper ($row); | |
$row->[$cn_hit]= ''; | |
if (exists ($idx_y->{$row->[4]})) | |
{ | |
$row->[$cn_hit]= 'name'; | |
push (@out_rows, $row); | |
} | |
} | |
# Util::Matrix::print(\@column_names, $missing); | |
Util::Matrix::print(\@column_names, \@out_rows); | |
exit (0); | |
sub x1 | |
{ | |
my $fnm1= 'sj_current.csv'; | |
my $fnm2= 'sj_dump.csv'; | |
my $tbl1= pga::read_table ($fnm1); | |
$tbl1->mk_idx(0); | |
# print "tbl1: ", Dumper ($tbl1); | |
my $tbl2= pga::read_table ($fnm2); | |
$tbl2->mk_idx(0); | |
my $missing= pga::diff ($tbl1, $tbl2); | |
# print "columns: ", Dumper ($tbl1->{'columns'}); | |
# print "missing: ", Dumper ($missing); | |
check_1251 ($missing); | |
write_inserts ('SchlagwortJelinek', $tbl1->{'columns'}, $missing); | |
} | |
sub write_inserts | |
{ | |
my $tbl_name= shift; | |
my $col_names= shift; | |
my $rows= shift; | |
my $fnm_sql= 'insert_missing.sql'; | |
open (SQL, '>:utf8', $fnm_sql) or die; | |
my @col_names= @$col_names; | |
my $col_count= @col_names; | |
my $cnt= 0; | |
foreach my $row (@$rows) | |
{ | |
my @col; | |
my @val; | |
for (my $i= 0; $i < $col_count; $i++) | |
{ | |
if (defined ($row->[$i]) && $row->[$i] ne '') | |
{ | |
push (@col, $col_names[$i]); | |
push (@val, $row->[$i]); | |
} | |
} | |
print SQL "INSERT INTO \"$tbl_name\" (", | |
join (',', map { $_ } @col), | |
") VALUES (", | |
join (',', map { "'".$_."'" } @val),");\n"; | |
$cnt++; | |
} | |
close (SQL); | |
print "written $cnt rows to $fnm_sql\n"; | |
$cnt; | |
} | |
=head2 check_1251 | |
find out if either column 0 or column 2 have the value 1251; | |
complain otherwise | |
=cut | |
sub check_1251 | |
{ | |
my $rows= shift; | |
foreach my $row (@$rows) | |
{ | |
next if ($row->[0] == 1251 || $row->[2] == 1251); | |
print "check row: ", Dumper($row); | |
} | |
} | |
package pga; | |
# postgres aligned output | |
sub mk_idx | |
{ | |
my $tbl= shift; | |
my $col= shift; | |
my $idx; | |
foreach my $row (@{$tbl->{rows}}) | |
{ | |
# print "row: ", main::Dumper ($row); | |
my $v= $row->[$col]; | |
push (@{$idx->{$v}}, $row); | |
} | |
$tbl->{index}= $idx; | |
} | |
=head1 filter($table, $column_number, $value) | |
return a new table consisting only of those rows from $table that contain the given $value in given $column_number. | |
=cut | |
sub filter | |
{ | |
my $tbl= shift; | |
my $col_num= shift; | |
my $val= shift; | |
my @out_rows; | |
foreach my $row (@{$tbl->{rows}}) | |
{ | |
push (@out_rows, $row) if ($row->[$col_num] == $val); | |
} | |
my $res= { columns => $tbl->{columns}, rows => \@out_rows }; | |
bless ($res); | |
$res; | |
} | |
=head2 diff ($tbl1, $tbl2) | |
look for items from $tbl2 which are not present in $tbl1 | |
=cut | |
sub diff | |
{ | |
my $tbl1= shift; | |
my $tbl2= shift; | |
my $idx1= $tbl1->{index}; | |
my $idx2= $tbl2->{index}; | |
my @missing; | |
ROW2: foreach my $id (sort { $a <=> $b } keys %$idx2) | |
{ | |
if (exists ($idx1->{$id})) | |
{ | |
# TODO: check if both rows are identical or report otherwise; | |
next ROW2; | |
} | |
push (@missing, @{$idx2->{$id}}); | |
} | |
(wantarray) ? @missing : \@missing; | |
} | |
sub read_table | |
{ | |
my $fnm= shift; | |
unless (open (FI, '<:utf8', $fnm)) | |
{ | |
print "cant read $fnm\n"; | |
return undef; | |
} | |
my @lines= <FI>; | |
close (FI); | |
# TODO: check for empty files or other stuff... | |
chop (@lines); | |
my $columns= shift (@lines); | |
my $rows= pop (@lines); | |
my @columns= split(/\|/, $columns); | |
# print "columns: ", main::Dumper(\@columns); | |
print "rows=[$rows]\n"; # TODO: check if $rows is | |
push (@lines, $rows) unless ($rows =~ m#^\(\d+ rows\)$#); | |
my @rows; | |
my $res= | |
{ | |
'columns' => \@columns, | |
'rows' => \@rows, | |
}; | |
bless ($res); | |
foreach my $line (@lines) | |
{ | |
my @l= split (/\|/, $line); | |
push (@rows, \@l); | |
} | |
$res; | |
} |