Skip to content

Instantly share code, notes, and snippets.

@hitsujixgit
Created July 9, 2014 01:40
Show Gist options
  • Save hitsujixgit/ad452791446e60ce538e to your computer and use it in GitHub Desktop.
Save hitsujixgit/ad452791446e60ce538e to your computer and use it in GitHub Desktop.
横浜市統計ポータルサイトの年齢別人口データのうち、総人口と0〜5歳各年齢の人口を入手してJSONファイルに書き出します。
#! /opt/local/bin/perl
use strict;
use warnings;
use 5.012;
use Encode;
use HTML::TableExtract;
use utf8;
use LWP::Simple;
use JSON qw( decode_json encode_json );
my $url_dir = "http://www.city.yokohama.lg.jp/ex/stat/jinko/age/new/";
my @file_keys = qw(age tsurumi kanagawa nishi naka minami konan hodogaya asahi isogo kanazawa kohoku midori aoba tsuzuki totsuka sakae izumi seya);
my $browser = LWP::UserAgent->new(keep_alive=>3, timeout=>30);
my @stat;
foreach my $key (@file_keys) {
# URLフレーズを作成する
my $url = $url_dir . $key . "-j.html";
print $url, "\n";
# HTMLソースを入手する
my $content = get_content_by_url($url);
# HTMLソースから目的の人口データのみ入手する
my $res = get_stat_from_html($content);
# 結果を入れる配列にpush
push(@stat, {'key' => $key, 'stat' => $res, });
# Webサイトへのアクセスするタイミングを1sec開ける
sleep(1);
}
my $json = encode_json(\@stat);
open(FILE, ">yokohama_stat.json") or die "Cannot open json file.";
print FILE $json;
close(FILE);
sub get_content_by_url {
my $url = shift;
my $response = $browser->get( $url );
# 正常に読み込みできたか確認する
if ( $response->is_success ) {
my $content = decode('Shift_JIS', $response->content) or die "Decode html source code failed.";
return $content;
} else {
die "Get html form web failed.";
}
}
sub get_stat_from_html {
my $content = shift;
my $te = new HTML::TableExtract(headers => [qw(年齢(歳) 総数)]);
$te->parse($content) or die "Parse file by htmlextract failed.";
my $ts = ($te->tables)[0];
my %nums;
if(defined $ts and defined $ts->rows) {
foreach my $row ($ts->rows) {
print encode('utf-8', join(": ",@$row)), "\n";
my $age;
# 行見出しが0-5歳に該当する場合をそれぞれPickup(見出し数値は全角表記されている)
if ($row->[0] eq '総数' ) {
$age = "TTL";
} elsif ($row->[0] eq '0' ) {
$age = 0;
} elsif ( $row->[0] eq '1' ) {
$age = 1;
} elsif ( $row->[0] eq '2' ) {
$age = 2;
} elsif ( $row->[0] eq '3' ) {
$age = 3;
} elsif ( $row->[0] eq '4' ) {
$age = 4;
} elsif ( $row->[0] eq '5' ) {
$age = 5;
} else {
next;
}
# 桁区切り文字を取り除いて、再度文字列として連結した後で数値に変換。Validationも兼ねて行う
my $num = join('', split(/,/,$row->[1])) + 0 or die "The population number invalid.";
# 結果をhashに追加
$nums{$age.""} = $num;
}
}
return \%nums;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment