t-kashima · June 16, 2012 17:09
diff --git a/photo.pl b/photo.pl
 #!/usr/bin/env perl

 use strict;
 use warnings;
 use LWP::UserAgent;
 use XML::Simple;
 use Image::MetaData::JPEG;
 use URI;
 use Time::Local;
 use MongoDB;
 use Data::Dumper;

 my $baseurl = 'http://f.hatena.ne.jp/userlist';
 my $temp_file = "temp.jpg";

 my $connection = MongoDB::Connection->new(host => 'mongodb://ds029827.mongolab.com:29827', username => 'mysql', password => 'around', db_name => 'hatena');
 my $database = $connection->hatena;
 my $collection = $database->hatenafotos;
 # my $col = $collection->find_one({time => '0859'});
 # print $col->{time};
 my $ua = LWP::UserAgent->new;

 sub get_items {
    my $page = shift;
    my $url = URI->new($baseurl);
    $url->query_form(
        mode => 'rss',
        type => 'image',
        page => $page
    );
    my $res = $ua->get($url);
    if ($res->is_success) {
        my $xml = XML::Simple->new;
        my $hash = $xml->XMLin($res->content);
        return @{ $hash->{item} };
    }
    return undef;
 }

 sub check_date_photo_from_url {
        my $url = URI->new(shift);
        return undef unless ($url =~ /jpg$/);
        system("wget -nv " . $url . " -O " . $temp_file);
        my $image = Image::MetaData::JPEG->new($temp_file);
        return undef unless ($image);
        my $segment = $image->retrieve_app1_Exif_segment(0);
        return undef unless ($segment);
        my $data = $segment->get_Exif_data('SUBIFD_DATA', 'TEXTUAL');
        return undef unless ($data->{DateTimeOriginal});
        my ($date_temp, $time_temp) = split('\s', $data->{DateTimeOriginal}->[0]);
        my ($year, $month, $day);
        if ($date_temp =~ /\//) {
            ($year, $month, $day) = split(/\//, $date_temp);
        } else {
            ($year, $month, $day) = split(/:/, $date_temp);
        }
        my ($hour, $min, $sec) = split(/:/, $time_temp);
        return undef if ($month <= 0 || $month > 12);

        my $unix_time = timelocal($sec, $min, $hour, $day, $month - 1, $year);
        my $date = $year . $month . $day;
        my $time = $hour . $min;
        return $date, $time, $unix_time;
 }

 sub is_find_from_url {
    my $url = shift;
    return $collection->find_one({ url => $url });
 }

 sub insert_data {
    my $data = shift;
    return $collection->insert($data);
 }

 my $is_find = 0;
 for (my $i = 1;; $i++) {
    my $page = $i;
    my @items = get_items($page);
    last if ($#items <= 0);
    for my $item (@items) {
        print "page:" . $page ."\n";
        my $photo_url = $item->{"hatena:imageurl"};
        print $photo_url . "\n";
        next unless ($photo_url);
        if (is_find_from_url($photo_url)) {
            $is_find = 1;
            last;
        }
        my ($date, $time, $unix_time) = check_date_photo_from_url($photo_url);
        next unless ($date || $time || $unix_time);
        print $date . " " . $time . "\n";
        my $data = { time => $time, date => $date, url => $photo_url, unix_time => $unix_time };
        my $id = insert_data($data);
        print $id . "\n";
    }
    last if (is_find == 1);
 }
	#!/usr/bin/env perl

	use strict;
	use warnings;
	use LWP::UserAgent;
	use XML::Simple;
	use Image::MetaData::JPEG;
	use URI;
	use Time::Local;
	use MongoDB;
	use Data::Dumper;

	my $baseurl = 'http://f.hatena.ne.jp/userlist';
	my $temp_file = "temp.jpg";

	my $connection = MongoDB::Connection->new(host => 'mongodb://ds029827.mongolab.com:29827', username => 'mysql', password => 'around', db_name => 'hatena');
	my $database = $connection->hatena;
	my $collection = $database->hatenafotos;
	# my $col = $collection->find_one({time => '0859'});
	# print $col->{time};
	my $ua = LWP::UserAgent->new;

	sub get_items {
	my $page = shift;
	my $url = URI->new($baseurl);
	$url->query_form(
	mode => 'rss',
	type => 'image',
	page => $page
	);
	my $res = $ua->get($url);
	if ($res->is_success) {
	my $xml = XML::Simple->new;
	my $hash = $xml->XMLin($res->content);
	return @{ $hash->{item} };
	}
	return undef;
	}

	sub check_date_photo_from_url {
	my $url = URI->new(shift);
	return undef unless ($url =~ /jpg$/);
	system("wget -nv " . $url . " -O " . $temp_file);
	my $image = Image::MetaData::JPEG->new($temp_file);
	return undef unless ($image);
	my $segment = $image->retrieve_app1_Exif_segment(0);
	return undef unless ($segment);
	my $data = $segment->get_Exif_data('SUBIFD_DATA', 'TEXTUAL');
	return undef unless ($data->{DateTimeOriginal});
	my ($date_temp, $time_temp) = split('\s', $data->{DateTimeOriginal}->[0]);
	my ($year, $month, $day);
	if ($date_temp =~ /\//) {
	($year, $month, $day) = split(/\//, $date_temp);
	} else {
	($year, $month, $day) = split(/:/, $date_temp);
	}
	my ($hour, $min, $sec) = split(/:/, $time_temp);
	return undef if ($month <= 0 \|\| $month > 12);

	my $unix_time = timelocal($sec, $min, $hour, $day, $month - 1, $year);
	my $date = $year . $month . $day;
	my $time = $hour . $min;
	return $date, $time, $unix_time;
	}

	sub is_find_from_url {
	my $url = shift;
	return $collection->find_one({ url => $url });
	}

	sub insert_data {
	my $data = shift;
	return $collection->insert($data);
	}

	my $is_find = 0;
	for (my $i = 1;; $i++) {
	my $page = $i;
	my @items = get_items($page);
	last if ($#items <= 0);
	for my $item (@items) {
	print "page:" . $page ."\n";
	my $photo_url = $item->{"hatena:imageurl"};
	print $photo_url . "\n";
	next unless ($photo_url);
	if (is_find_from_url($photo_url)) {
	$is_find = 1;
	last;
	}
	my ($date, $time, $unix_time) = check_date_photo_from_url($photo_url);
	next unless ($date \|\| $time \|\| $unix_time);
	print $date . " " . $time . "\n";
	my $data = { time => $time, date => $date, url => $photo_url, unix_time => $unix_time };
	my $id = insert_data($data);
	print $id . "\n";
	}
	last if (is_find == 1);
	}
No results found