Created
June 16, 2012 17:09
-
-
Save t-kashima/2941970 to your computer and use it in GitHub Desktop.
the photo apart 用のプログラム
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| use strict; | |
| use warnings; | |
| use LWP::UserAgent; | |
| use XML::Simple; | |
| use Image::MetaData::JPEG; | |
| use URI; | |
| use Time::Local; | |
| use MongoDB; | |
| use Data::Dumper; | |
| my $baseurl = 'http://f.hatena.ne.jp/userlist'; | |
| my $temp_file = "temp.jpg"; | |
| my $connection = MongoDB::Connection->new(host => 'mongodb://ds029827.mongolab.com:29827', username => 'mysql', password => 'around', db_name => 'hatena'); | |
| my $database = $connection->hatena; | |
| my $collection = $database->hatenafotos; | |
| # my $col = $collection->find_one({time => '0859'}); | |
| # print $col->{time}; | |
| my $ua = LWP::UserAgent->new; | |
| sub get_items { | |
| my $page = shift; | |
| my $url = URI->new($baseurl); | |
| $url->query_form( | |
| mode => 'rss', | |
| type => 'image', | |
| page => $page | |
| ); | |
| my $res = $ua->get($url); | |
| if ($res->is_success) { | |
| my $xml = XML::Simple->new; | |
| my $hash = $xml->XMLin($res->content); | |
| return @{ $hash->{item} }; | |
| } | |
| return undef; | |
| } | |
| sub check_date_photo_from_url { | |
| my $url = URI->new(shift); | |
| return undef unless ($url =~ /jpg$/); | |
| system("wget -nv " . $url . " -O " . $temp_file); | |
| my $image = Image::MetaData::JPEG->new($temp_file); | |
| return undef unless ($image); | |
| my $segment = $image->retrieve_app1_Exif_segment(0); | |
| return undef unless ($segment); | |
| my $data = $segment->get_Exif_data('SUBIFD_DATA', 'TEXTUAL'); | |
| return undef unless ($data->{DateTimeOriginal}); | |
| my ($date_temp, $time_temp) = split('\s', $data->{DateTimeOriginal}->[0]); | |
| my ($year, $month, $day); | |
| if ($date_temp =~ /\//) { | |
| ($year, $month, $day) = split(/\//, $date_temp); | |
| } else { | |
| ($year, $month, $day) = split(/:/, $date_temp); | |
| } | |
| my ($hour, $min, $sec) = split(/:/, $time_temp); | |
| return undef if ($month <= 0 || $month > 12); | |
| my $unix_time = timelocal($sec, $min, $hour, $day, $month - 1, $year); | |
| my $date = $year . $month . $day; | |
| my $time = $hour . $min; | |
| return $date, $time, $unix_time; | |
| } | |
| sub is_find_from_url { | |
| my $url = shift; | |
| return $collection->find_one({ url => $url }); | |
| } | |
| sub insert_data { | |
| my $data = shift; | |
| return $collection->insert($data); | |
| } | |
| my $is_find = 0; | |
| for (my $i = 1;; $i++) { | |
| my $page = $i; | |
| my @items = get_items($page); | |
| last if ($#items <= 0); | |
| for my $item (@items) { | |
| print "page:" . $page ."\n"; | |
| my $photo_url = $item->{"hatena:imageurl"}; | |
| print $photo_url . "\n"; | |
| next unless ($photo_url); | |
| if (is_find_from_url($photo_url)) { | |
| $is_find = 1; | |
| last; | |
| } | |
| my ($date, $time, $unix_time) = check_date_photo_from_url($photo_url); | |
| next unless ($date || $time || $unix_time); | |
| print $date . " " . $time . "\n"; | |
| my $data = { time => $time, date => $date, url => $photo_url, unix_time => $unix_time }; | |
| my $id = insert_data($data); | |
| print $id . "\n"; | |
| } | |
| last if (is_find == 1); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment