Created
August 11, 2016 22:45
-
-
Save langthom/8ac09efc279fddeada867b0a477a75e3 to your computer and use it in GitHub Desktop.
simple crawler for getting xkcd comics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# A simple crawler for getting the nice comics from xkcd.com | |
#(c) Thomas Lang, 2016 | |
# | |
# Yes, this is damn non-performant, but if it works, it ain't stupid. | |
use warnings; | |
use strict; | |
use LWP::Simple; | |
use XML::Twig; | |
# No error handling! | |
# | |
# minIdx = minimum image number, e.g. 1615 | |
# maxIdx = maximum image number, e.g. 1618 | |
# verbose = indicator if prints should be made | |
# if verbose eq "y" (for yes) then print something | |
# else print nothing | |
my ($minIdx, $maxIdx, $verbose) = @ARGV; | |
# Gets a single image, quite ineffective | |
sub getImg { | |
my $idx = shift; | |
my $uri = "http://www.xkcd.com/$idx/"; | |
my $fil = "./$idx"; | |
getstore($uri, $fil); | |
# Leave only line 64 in the file | |
open(my $FH1, '<', $fil) or die "FUCK"; | |
my @lines = <$FH1>; | |
my $line = $lines[63]; | |
close $FH1; | |
open(my $FH2, '>', $fil) or die "Error:$!\n"; | |
print $FH2 $line; | |
close $FH2; | |
# end | |
my $i; | |
# get image url | |
my $twig = XML::Twig->new( | |
twig_roots => { | |
'/img' => sub { | |
my $val = $_->att('src'); | |
$val =~ s/\/\///; # Strip leading '//' | |
$i = $val; | |
}, | |
}, | |
)->parsefile($fil); | |
my $img = $i; | |
$img =~ s/imgs\.xkcd\.com\/comics\///; | |
print "Getting image '$img' ... " if ($verbose eq "y"); | |
getstore("http://$i", $img); | |
print "done.\n" if ($verbose eq "y"); | |
unlink $fil; # Remove temporary gotten file | |
} | |
foreach my $ix ($minIdx .. $maxIdx) { | |
getImg $ix; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment