Created
May 31, 2014 00:52
-
-
Save memememomo/92d50d340083cd2d0a0b to your computer and use it in GitHub Desktop.
Mojo::UserAgentがスクレイピングツールとして便利 ref: http://qiita.com/uchiko/items/2d925c23fa04b696fc7a
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cpanm Mojolicious IO::Socket::SSL |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ MOJO_INACTIVITY_TIMEOUT=10000 perl scrape.pl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use utf8; | |
use Mojo::UserAgent; | |
# インスタンス生成 | |
my $ua = Mojo::UserAgent->new->max_redirects(5); | |
# GETを設定 | |
my $tx = $ua->get('latest.mojolicio.us'); | |
# 取得 | |
# $res は、Mojo::Message::Response | |
if (my $res = $tx->success) { | |
$res | |
->content # Mojo::Content::Single | |
->asset # Mojo::Asset::File | |
->move_to('/home/uchiko/mojo.tar.gz'); # ファイルを保存 | |
} | |
else { | |
my ($err, $code) = $tx->error; | |
print $code ? "$code response: $err\n" : "Connection error: $err\n"; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CSSセレクタで取得 | |
# (セレクタを渡したdomメソッドの返り値はMojo::Collectionになっている) | |
# https://metacpan.org/pod/Mojo::Collection | |
my $texts = $res->dom('h2 > a')->text; | |
# メソッドチェーンで取得 | |
my $texts = $res->dom->html->head->title->text; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pointerを指定してデータを取得する | |
my $title = $res->json('/results/0/title'); | |
# hashに変換してから取得する | |
my $hash = $res->json | |
my $title = $hash->{results}[0]{title}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use utf8; | |
use Mojo::UserAgent; | |
# インスタンス生成 | |
my $ua = Mojo::UserAgent->new; | |
# 最大リダイレクト回数を設定 | |
$ua->max_redirects(5); | |
# GET設定 | |
my $tx = $ua->build_tx(GET => 'http://www.perl.com/'); | |
# UserAgentを設定 | |
$tx->req->headers->user_agent('user-agent'); | |
# REFERERを設定 | |
$tx->req->headers->referrer('http://www.perl.com/'); | |
# Cookie設定 | |
$tx->req->cookies({name => 'foo', value => 'bar'}); | |
# トランザクションスタート | |
$tx = $ua->start($tx); | |
# 取得 | |
# $res は、 Mojo::Message::Response | |
if (my $res = $tx->success) { | |
print $res->body; | |
} | |
else { | |
my ($err, $code) = $tx->error; | |
print $code ? "$code response: $err\n" : "Connection error: $err\n"; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use utf8; | |
use Mojo::UserAgent; | |
# フォームの値 | |
my %params = ( | |
login_id => 'hogehoge', | |
login_password => 'fugafuga', | |
); | |
# インスタンス生成 | |
my $ua = Mojo::UserAgent->new; | |
# 最大リダイレクト回数を設定 | |
$ua->max_redirects(5); | |
# POST設定 | |
my $tx = $ua->build_tx(POST => 'http://hogehogehugahuga.com/login', form => \%params); | |
# REFERERを設定 | |
$tx->req->headers->referrer('http://hogehogehugahuga.com/'); | |
# トランザクションスタート | |
$tx = $ua->start($tx); | |
# 取得 | |
# $res は、Mojo::Message::Response | |
if (my $res = $tx->success) { | |
print $res->body; | |
} | |
else { | |
my ($err, $code) = $tx->error; | |
print $code ? "$code response: $err\n" : "Connection error: $err\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment