Skip to content

Instantly share code, notes, and snippets.

@hitsujixgit
Created July 7, 2014 01:13
Show Gist options
  • Save hitsujixgit/088c1d13ba9bd6802119 to your computer and use it in GitHub Desktop.
Save hitsujixgit/088c1d13ba9bd6802119 to your computer and use it in GitHub Desktop.
Read stat of Yokohama city web.
#! /opt/local/bin/perl
use strict;
use warnings;
use 5.012;
use Encode;
use HTML::TableExtract;
use utf8;
my $filepath = "test.html";
open(FILE, $filepath) or die "Open a html file failed.";
my $content = "";
while (my $line = <FILE>) {
$content .= decode('Shift_JIS', $line);;
}
close(FILE);
my $te = new HTML::TableExtract(headers => [qw(年齢(歳) 総数)]);
$te->parse($content) or die "Parse file by htmlextract failed.";
my $ts = ($te->tables)[0];
my %nums;
if(defined $ts and defined $ts->rows) {
foreach my $row ($ts->rows) {
print encode('utf-8', join(": ",@$row)), "\n";
my $age;
# 行見出しが0-5歳に該当する場合をそれぞれPickup(見出し数値は全角表記されている)
if ($row->[0] eq '0' ) {
$age = 0;
} elsif ( $row->[0] eq '1' ) {
$age = 1;
} elsif ( $row->[0] eq '2' ) {
$age = 2;
} elsif ( $row->[0] eq '3' ) {
$age = 3;
} elsif ( $row->[0] eq '4' ) {
$age = 4;
} elsif ( $row->[0] eq '5' ) {
$age = 5;
} else {
next;
}
# 桁区切り文字を取り除いて、再度文字列として連結した後で数値に変換。Validationも兼ねて行う
my $num = join('', split(/,/,$row->[1])) + 0 or die "The population number invalid.";
# 結果をhashに追加
$nums{$age.""} = $num;
}
}
# 結果を出力してみる
print "Age 0-5\n";
foreach my $age (keys(%nums)) {
print $age, " ", $nums{$age}, "\n";
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment