Skip to content

Instantly share code, notes, and snippets.

@t-kashima
Created June 16, 2012 17:09
Show Gist options
  • Select an option

  • Save t-kashima/2941970 to your computer and use it in GitHub Desktop.

Select an option

Save t-kashima/2941970 to your computer and use it in GitHub Desktop.
the photo apart 用のプログラム
#!/usr/bin/env perl
use strict;
use warnings;
use LWP::UserAgent;
use XML::Simple;
use Image::MetaData::JPEG;
use URI;
use Time::Local;
use MongoDB;
use Data::Dumper;
my $baseurl = 'http://f.hatena.ne.jp/userlist';
my $temp_file = "temp.jpg";
my $connection = MongoDB::Connection->new(host => 'mongodb://ds029827.mongolab.com:29827', username => 'mysql', password => 'around', db_name => 'hatena');
my $database = $connection->hatena;
my $collection = $database->hatenafotos;
# my $col = $collection->find_one({time => '0859'});
# print $col->{time};
my $ua = LWP::UserAgent->new;
sub get_items {
my $page = shift;
my $url = URI->new($baseurl);
$url->query_form(
mode => 'rss',
type => 'image',
page => $page
);
my $res = $ua->get($url);
if ($res->is_success) {
my $xml = XML::Simple->new;
my $hash = $xml->XMLin($res->content);
return @{ $hash->{item} };
}
return undef;
}
sub check_date_photo_from_url {
my $url = URI->new(shift);
return undef unless ($url =~ /jpg$/);
system("wget -nv " . $url . " -O " . $temp_file);
my $image = Image::MetaData::JPEG->new($temp_file);
return undef unless ($image);
my $segment = $image->retrieve_app1_Exif_segment(0);
return undef unless ($segment);
my $data = $segment->get_Exif_data('SUBIFD_DATA', 'TEXTUAL');
return undef unless ($data->{DateTimeOriginal});
my ($date_temp, $time_temp) = split('\s', $data->{DateTimeOriginal}->[0]);
my ($year, $month, $day);
if ($date_temp =~ /\//) {
($year, $month, $day) = split(/\//, $date_temp);
} else {
($year, $month, $day) = split(/:/, $date_temp);
}
my ($hour, $min, $sec) = split(/:/, $time_temp);
return undef if ($month <= 0 || $month > 12);
my $unix_time = timelocal($sec, $min, $hour, $day, $month - 1, $year);
my $date = $year . $month . $day;
my $time = $hour . $min;
return $date, $time, $unix_time;
}
sub is_find_from_url {
my $url = shift;
return $collection->find_one({ url => $url });
}
sub insert_data {
my $data = shift;
return $collection->insert($data);
}
my $is_find = 0;
for (my $i = 1;; $i++) {
my $page = $i;
my @items = get_items($page);
last if ($#items <= 0);
for my $item (@items) {
print "page:" . $page ."\n";
my $photo_url = $item->{"hatena:imageurl"};
print $photo_url . "\n";
next unless ($photo_url);
if (is_find_from_url($photo_url)) {
$is_find = 1;
last;
}
my ($date, $time, $unix_time) = check_date_photo_from_url($photo_url);
next unless ($date || $time || $unix_time);
print $date . " " . $time . "\n";
my $data = { time => $time, date => $date, url => $photo_url, unix_time => $unix_time };
my $id = insert_data($data);
print $id . "\n";
}
last if (is_find == 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment