Created
September 23, 2011 20:33
-
-
Save TimToady/1238385 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!./perl6 | |
grammar CSV::File { | |
token TOP { ^ <line>* <empty_line>? [ $ || { say $¢.pos,' ', $¢.orig.substr($¢.pos,20) } <!> ] } | |
token line { <value> ** ',' \n } | |
token value { | |
| <pure_text> | |
| \h* \" <quoted_contents> \" \h* | |
} | |
token quoted_contents { [<-["]> | '""' ]* } | |
token pure_text { <!before \h*'"'> <-[,\n]>* } | |
token empty_line { \h* \n } | |
} | |
class CSV { | |
has $!trim; | |
has $!strict; | |
has $!skip-header; | |
has $!output; | |
my $trim-default = False; | |
my $strict-default = 'default'; | |
my $skip-header-default = False; | |
my $output-default = 'arrays'; | |
sub extract_text($m, :$trim) { | |
my $text = ($m<quoted_contents> // $m).subst('""', '"', :global); | |
return $trim ?? $text.trim !! ~$text; | |
} | |
method parse($input, :$trim is copy, :$strict is copy, | |
:$skip-header is copy, :$output is copy) { | |
if self.defined { | |
$trim //= $!trim // $trim-default; | |
$strict //= $!strict // $strict-default; | |
$skip-header //= $!skip-header // $skip-header-default; | |
$output //= $!output // $output-default | |
} | |
else { | |
$trim //= $trim-default; | |
$strict //= $strict-default; | |
$skip-header //= $skip-header-default; | |
$output //= $output-default; | |
} | |
CSV::File.parse($input) | |
or die "Sorry, cannot parse"; | |
my @lines = $<line>.list; | |
my @values = map { | |
[map { extract_text($_, :$trim) }, .<value>.list] | |
}, @lines; | |
if $strict eq 'default' { | |
$strict = $output.lc ne 'arrays'; | |
} | |
if $strict { | |
my $expected-columns = @values[0].elems; | |
for ^@values -> $line { | |
if (my $c = +@values[$line]) > $expected-columns { | |
die "Too many columns ($c, expected $expected-columns) " | |
~ "on line $line"; | |
} | |
elsif $c < $expected-columns { | |
die "Too few columns ($c, expected $expected-columns) " | |
~ "on line $line"; | |
} | |
} | |
} | |
if $output.lc eq 'hashes' { | |
my @header = @values.shift.list; | |
@values = map -> @line { | |
my %hash = map {; @header[$_] => @line[$_] }, | |
^(+@line min +@header); | |
\%hash | |
}, @values; | |
} | |
elsif $output.lc eq 'arrays' { | |
if $skip-header { | |
@values.shift; | |
} | |
} | |
else { | |
my $type = $output; | |
my @header = @values.shift.list; | |
@values = map -> @line { | |
my %attrs = map {; @header[$_] => @line[$_] }, | |
^(+@line min +@header); | |
$type.new( |%attrs ); | |
}, @values; | |
} | |
return @values; | |
} | |
method parse-file($filename, *%_) { | |
return self.parse( slurp($filename).subst("\r",''), |%_ ); | |
} | |
} | |
my $file = 'export.csv'; | |
my @csv = CSV.parse-file: $file, :output<hashes>; | |
@csv[0].perl.say; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment