Created
May 29, 2011 09:09
-
-
Save norry-gogo/997598 to your computer and use it in GitHub Desktop.
Twitter API -> Read it later API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use 5.010; | |
use strict; | |
use warnings; | |
use Net::Twitter; | |
use URI::Find; | |
use Web::Scraper; | |
use LWP::UserAgent; | |
use YAML; | |
use Scalar::Util 'blessed'; | |
use Encode; | |
my $config_uri ='path/to/yaml'; | |
my $config = YAML::LoadFile($config_uri); | |
### yaml format (sample) | |
# --- | |
# lists: | |
# - list_id: 5157981 | |
# list_name: more-twitter-accounts | |
# since_id: ***************** # ... ex.) The latest number of the list | |
# user: twitter | |
# - list_id: 4497778 | |
# list_name: perl-people | |
# since_id: ***************** | |
# user: perlfoundation | |
# read_it_later: | |
# apikey: ********************* | |
# password: ******** | |
# username: ******** | |
my $nt = Net::Twitter->new( | |
traits => [qw/API::REST API::Lists/], | |
); | |
my $read_it_later = URI->new('https://readitlaterlist.com/v2/add'); | |
my $ua = LWP::UserAgent->new; | |
for my $list ( @{$config->{lists}} ) { | |
my $page = 1; | |
my $start_since_id = $list->{since_id}; | |
my $new_since_id = $start_since_id; | |
LOOP_PAGE: | |
while (1) { | |
my ($statuses, $success) = get_list_statuses($list, $page); | |
$new_since_id = $start_since_id unless $success; | |
last LOOP_PAGE unless @$statuses; | |
for my $status (reverse @$statuses) { | |
my @uris = find_uris_from($status->{text}); | |
for my $uri (@uris) { | |
my $expand_uri = expand_uri($uri); | |
next unless $expand_uri; | |
my $html_title = get_html_title($expand_uri); | |
next unless $html_title; | |
$read_it_later->query_form( | |
apikey => $config->{read_it_later}{apikey}, | |
username => $config->{read_it_later}{username}, | |
password => $config->{read_it_later}{password}, | |
url => $expand_uri, | |
title => sprintf "[TW]%s@%s / %s\n", | |
$list->{list_name}, | |
$status->{user}{screen_name}, | |
$html_title, | |
); | |
my $res; | |
eval { | |
$res = $ua->head("$read_it_later"); | |
}; | |
next if $@; | |
if ($res->is_success) { | |
printf "[TW]%s@%s / %s (%s)\n", | |
$list->{list_name}, | |
$status->{user}{screen_name}, | |
encode('utf-8', $html_title), | |
$expand_uri; | |
} | |
} | |
$new_since_id = $status->{id} if $new_since_id < $status->{id}; | |
} | |
$page++; | |
} | |
$list->{since_id} = $new_since_id; | |
} | |
YAML::DumpFile($config_uri, $config); | |
sub get_list_statuses { | |
my ($list, $page) = @_; | |
my $statuses; | |
my $success = 1; | |
eval { | |
$statuses = $nt->list_statuses({ | |
user => $list->{user}, | |
list_id => $list->{list_id}, | |
per_page => 200, | |
page => $page, | |
since_id => $list->{since_id} | |
}); | |
}; | |
if (my $err = $@) { | |
die $@ unless blessed $err && $err->isa('Net::Twitter::Error'); | |
$success = undef; | |
} | |
return ($statuses, $success); | |
} | |
sub find_uris_from { | |
my $text = shift; | |
state @uris; @uris = (); | |
state $finder = URI::Find->new(sub{ | |
my ($uri, $orig_uri) = @_; | |
push @uris, $orig_uri; | |
return $orig_uri; | |
}); | |
$finder->find(\$text); | |
return @uris; | |
} | |
sub expand_uri { | |
my $uri = shift; | |
my $res = $ua->head($uri); | |
return unless $res->is_success; | |
return $res->request->uri; | |
} | |
sub get_html_title { | |
my $uri = shift; | |
state $scraper = scraper { | |
process 'title', 'title' => 'TEXT'; | |
}; | |
my $html; | |
eval { | |
$html = $scraper->scrape(URI->new($uri)); | |
}; | |
return if $@; | |
return "-- No title --" unless $html->{title}; | |
return $html->{title}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment