Skip to content

Instantly share code, notes, and snippets.

@seungwon0
Created June 30, 2012 04:48
Show Gist options
  • Save seungwon0/3022317 to your computer and use it in GitHub Desktop.
Save seungwon0/3022317 to your computer and use it in GitHub Desktop.
downloads Miss Korea 2012 Photos
#!/usr/bin/env perl
#
# miss_korea_2012.pl - downloads Miss Korea 2012 Photos
#
# Downloads Miss Korea 2012 photos from http://misskorea.hankooki.com.
#
# Seungwon Jeong <[email protected]>
#
# Copyright (C) 2012 by Seungwon Jeong
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
# <http://www.gnu.org/licenses/>.
use perl5i::2;
use IO::All;
use Term::ProgressBar::Simple;
my $image_pattern = qr{/ 2012 / (?<file_name> \d+ B? [.] jpg )}x;
my %category = (
final => {
url_template => 'http://misskorea.hankooki.com/photo_view.php'
. '?indexid=%INDEX_ID&gubun=%CATEGORY',
index_id_list => [ 2800 .. 2917 ],
image_base_url =>
'http://photo.hankooki.com/misskorea/misskorea_group_photo/2012/',
},
camp => {
url_template => 'http://misskorea.hankooki.com/photo_view.php'
. '?indexid=%INDEX_ID&gubun=%CATEGORY',
index_id_list => [ 2595 .. 2799 ],
image_base_url =>
'http://photo.hankooki.com/misskorea/misskorea_group_photo/2012/',
},
area => {
url_template => 'http://misskorea.hankooki.com/photo_view.php'
. '?indexid=%INDEX_ID&gubun=%CATEGORY',
index_id_list => [ 792 .. 836 ],
image_base_url =>
'http://photo.hankooki.com/misskorea/misskorea_area_photo/2012/',
},
profile => {
url_template => 'http://misskorea.hankooki.com/profile_view.php'
. '?year_miss=2012&miss_num=1&indexid=%INDEX_ID',
index_id_list => [ 2194 .. 2249 ],
image_base_url =>
'http://photo.hankooki.com/misskorea/personal_photo/2012/',
},
);
for my $category ( keys %category ) {
io($category)->mkpath();
my @index_id_list = @{ $category{$category}{index_id_list} };
my $image_base_url = $category{$category}{image_base_url};
my $progress = Term::ProgressBar::Simple->new(
{ count => scalar @index_id_list,
name => $category,
}
);
for my $index_id (@index_id_list) {
$progress++;
my $url = $category{$category}{url_template};
$url =~ s/%INDEX_ID/$index_id/;
$url =~ s/%CATEGORY/$category/;
my $document < io $url;
while ( $document =~ /$image_pattern/g ) {
my $file_name = $LAST_PAREN_MATCH{file_name};
$progress->message("Downloading '$file_name'...");
io( $image_base_url . $file_name )
> io->catfile( $category, $file_name );
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment