Skip to content

Instantly share code, notes, and snippets.

@keiya
Created February 29, 2012 10:59
Show Gist options
  • Select an option

  • Save keiya/1939988 to your computer and use it in GitHub Desktop.

Select an option

Save keiya/1939988 to your computer and use it in GitHub Desktop.
csv of akb48 members
#!/usr/bin/perl
#
# by Keiya Chinen
use strict;
use Web::Scraper;
use URI;
use Data::Dumper;
use utf8;
$| = 1;
my %fetched;
my $url = 'http://www.google.com/intl/ja/+/project48/';
my $urle = $url;
$urle =~ s/([^\w ])/'%'.unpack('H2', $1)/eg;
$urle =~ tr/ /+/;
my %res = scrapeuri($url);
foreach my $id (keys %res) {
print $res{$id}{id}.','.
$res{$id}{name}.','.
$res{$id}{type}."\n";
}
sub scrapeuri {
my $scraper = scraper {
process "/html/body/div[5]/ul/li", 'id[]' => '@data-gplusid';
process "/html/body/div[5]/ul/li/a", 'name[]' => '@data-g-label';
process "/html/body/div[5]/ul/li/a", 'type[]' => '@data-g-event';
};
my $link = $_[0];
my $uri = new URI($link);
print "scraping: $link\n";
my $res = $scraper->scrape($uri);
my %hash;
my $i = 0;
foreach my $tmp (@{$res->{id}}) {
$hash{$tmp}{name} = $res->{name}[$i];
$hash{$tmp}{type} = $res->{type}[$i];
$hash{$tmp}{id} = $res->{id}[$i];
$i++;
}
return %hash;
}
sub url_encode {
my $str = shift;
return $str;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment