Skip to content

Instantly share code, notes, and snippets.

@paveljurca
Last active August 29, 2015 14:18
Show Gist options
  • Save paveljurca/1ee355752c2b45e0a1bd to your computer and use it in GitHub Desktop.
Save paveljurca/1ee355752c2b45e0a1bd to your computer and use it in GitHub Desktop.
filter out CSV lines by the values in a specific column
#!/usr/bin/perl
use v5.10.1;
use strict;
use warnings;
use Text::CSV;
#use open qw(:std :utf8);
my $col_look_up = 'id'; #where to look
my @filter_out = ( #what to search
'2434',
'3622',
'4185',
'5627',
'27145',
);
my $file_in = 'src/airports1.csv'; #which INPUT
my $file_out = 'airports1.csv'; #where to OUTPUT
# do NOT change
#========= MAIN =========
die qq("$file_out" already exists\n) if -e $file_out;
open(my $fh_in, '<:encoding(utf8)', $file_in)
or die qq(no "$file_in" file\n);
open(my $fh_out, '>:encoding(utf8)', $file_out)
or die qq($!\n);
my $csv = Text::CSV->new({ binary => 1, eol => $/ });
#first row
my $fields = $csv->getline($fh_in);
die qq("$file_in" does not have the '$col_look_up' column)
unless grep $_ eq $col_look_up, @{ $fields };
$csv->column_names($fields);
my $rows = $csv->getline_hr_all($fh_in);
#$csv->print(\*STDOUT, ... );
$csv->print($fh_out, $fields);
# @$_{ } is a hash slice
$csv->print($fh_out, [ @$_{@{ $fields }} ])
for (
grep {
#smart_match
$_->{$col_look_up} ~~ @filter_out
} @{ $rows }
);
close($fh_in);
close($fh_out);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment