Skip to content

Instantly share code, notes, and snippets.

@paveljurca
Last active August 29, 2015 14:14
Show Gist options
  • Save paveljurca/1ecb3439cfca31784432 to your computer and use it in GitHub Desktop.
Save paveljurca/1ecb3439cfca31784432 to your computer and use it in GitHub Desktop.
Právě půjčeno | Městská knihovna v Praze /* SKRIPT PRO UCHOVÁNÍ SI PŘEHLEDU O KNIŽNÍCH VÝPŮJČKÁCH */
#!/usr/bin/env perl
use strict;
use warnings;
use open qw(:std :utf8);
use utf8;
#################################
# EXAMPLE #
# http://jurcapavel.cz/books.pl #
#################################
# SOURCE => https://www.mlp.cz/cz/moje-knihovna/moje-knihy/prave-pujceno/
@ARGV = q[ Právě půjčeno | Městská knihovna v Praze.html ];
my %books;
while (<>) {
if (/col1">([^<]+)/) {
#table heading row
next if $1 eq "Název knihy";
my $book = $1;
my($surname,$forename) = <> =~ />([^,]+,\s)(.)/;
# key => $books{'Čapek, K.'}
$books{"\u\L$surname\U$forename."} .= "$book\n";
}
}
for my $author (sort keys %books) {
print "$author: $_\n" for (split /\n/, $books{$author});
}
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use utf8;
# http://search.mlp.cz/?action=cdqc&espQCId=tov-ctenar-seznamy&callback=myCallbackFunction&phase=open&lbparam=open&name=NAZEV_SEZNAMU
# w3m search.mlp.cz.html -dump -cols 200 > input.txt
open (my $fh, '<:encoding(UTF-8)', 'input.txt') or die;
my %books;
while (<$fh>) {
chomp;
m/^(.{53})(.{59})/;
my $c = $2;
my $d = $1;
$c =~ s/^\s+|\s+$//g;
$d =~ s/^\s+|\s+$//g;
$books{"$c"} .= "$d\n" ;
}
my @c;
my @d;
my $record;
my $l;
close($fh);
open ($fh, '>:encoding(UTF-8)', 'books.txt') or die;
foreach my $author (sort keys %books) {
@c = split /\s/, $author;
@d = split /\n/, $books{$author};
$record = pop @c;
foreach (@c) {
$l = length($_) - 1;
s/.{$l}$/\./;
}
$" = "";
$record .= ", @c: ";
say $fh "${record}$_\n" foreach @d;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment