Created
April 14, 2016 08:00
-
-
Save phochste/feb5b2933a556b860e2c1562fcef118d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Usage: | |
# marc_marc('852a',holding.$append) | |
# holding(-logfile => '/mnt/scratch/lludss/holding_error.txt') | |
# | |
package Catmandu::Fix::holding; | |
use strict; | |
use Catmandu::Util qw(:io is_string); | |
use POSIX qw(strftime); | |
use Data::Dumper; | |
use Parse::RecDescent; | |
use Moo; | |
has logfile => ( is => 'ro' ); | |
has parser => ( is => 'rw' , lazy => 1 , builder => 1); | |
around BUILDARGS => sub { | |
my ($orig, $class, %opts) = @_; | |
my $logfile = $opts{"-logfile"}; | |
$orig->($class, logfile => $logfile); | |
}; | |
sub _build_parser { | |
my ($self) = @_; | |
my (@grammar) = <DATA>; | |
my $parser = Parse::RecDescent->new(join("",@grammar)); | |
$parser; | |
} | |
sub fix { | |
my ($self, $data) = @_; | |
return $data unless $data->{holding}; | |
$data = $self->default_holding($data); | |
$data; | |
} | |
sub default_holding { | |
my ($self,$data) = @_; | |
my $identifier = $data->{_id} // ''; | |
my $curryear = [ localtime time]->[5] + 1900; | |
my $holding = join(";", @{$data->{holding}}); | |
$holding =~ s{laatste\s*\d*\s*\S+}{$curryear}g; | |
my $is_lopend = 0; | |
for (@{$data->{holding}}) { | |
$is_lopend = 1 unless ($_ =~ /^\s*#/); | |
} | |
my $res = $self->parser->startrule($holding); | |
# Collect all the parsed year holdings in an array of 'consecutive' years | |
my %YEARS = (); | |
foreach my $range (@$res) { | |
next if (ref $range ne 'ARRAY' || @$range == 0); | |
my $start = $range->[0]; | |
my $end = $range->[1]; | |
$end = $start unless defined $end; | |
$end = $curryear if $end eq 'NOW'; | |
for ($start..$end) { $YEARS{$_} = 1} | |
} | |
my @years = sort { $a <=> $b } keys %YEARS; | |
# Translate the array of 'consecutive' years into an array of year ranges | |
my @ranges; | |
my $start = 0; | |
my $prev = 0; | |
foreach my $year (@years) { | |
$start = $year unless $start; | |
if ($prev && $year - $prev > 1) { | |
push(@ranges, $start eq $prev ? "$start" : "$start-$prev"); | |
$start = $year; | |
} | |
$prev = $year; | |
} | |
push(@ranges, $start eq $prev ? "$start" : "$start-$prev") if $start; | |
my $years = join(" ", sort { $b <=> $a } @years); | |
my $range = join("; ", @ranges); | |
$self->logme("$identifier : failed to interpret '$holding'") unless is_string($range); | |
$data->{holding} = $years; | |
if (@years == 1 && !$is_lopend) { | |
$data->{holding_txt} = length $range ? "Print available for $range" : ''; | |
} | |
else { | |
$data->{holding_txt} = length $range ? "Print available from $range" : ''; | |
} | |
if ($is_lopend) { | |
$data->{holding_txt} .= " (current)"; | |
} | |
$data; | |
} | |
sub logme { | |
my ($self,$msg) = @_; | |
return undef unless defined $self->logfile; | |
my $fh = io($self->logfile, mode=>'a'); | |
my $date = localtime; | |
$fh->print("$date [$$] : $msg\n"); | |
$fh->close(); | |
} | |
1; | |
__DATA__ | |
startrule: item(s /;/) | |
{ $return = $item[1]; } | |
item: holding '-' holding junk(?) | |
{ $return = [ $item[1], $item[3] ]; } | |
| | |
holding '-' junk | |
{ $return = [ $item[1] ]; } | |
| | |
holding '-' | |
{ $return = [ $item[1] , 'NOW' ]; } | |
| | |
holding | |
{ $return = [ $item[1] ]; } | |
| | |
<resync:[^;]*> | |
junk: /[^;]+/ | |
holding: stop(?) remark(?) volume(?) '(' publication_year except_or_range_year(?) ')' issue(?) | |
{ $return = $item{publication_year} } | |
| | |
stop(?) remark(?) publication_year except_or_range_year(?) issue(?) | |
{ $return = $item{publication_year} } | |
volume: /[^#(;]+/ | |
issue: /[\w\.]+([-\/,:][\w\.]+)*/ | |
stop: /#/ | |
except_or_range_year: /[-\/]\s*\d+/ | |
publication_year: /(16|17|18|19|20)\d{2}/ | |
remark: /[^#:]+/ ':' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment