Created
September 7, 2009 22:10
-
-
Save semifor/182575 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# top-followed-by-friends | |
# | |
# Prints a list of the users most followed by your friends, but not you. | |
# | |
# usage: top-followed-by-friends [twitter-screen_name-or-id] | |
# | |
# Throttles Twitter API calls so you don't get rate limited. | |
# Uses Cache::FileCache for storage. Set $cache_dir to something more | |
# permanent than /tmp if you want to keep the cache alive longer. | |
# | |
# Copyright (c) 2009 Marc Mims | |
# | |
# This is free software, licensed under the terms of the GNU Public License, version 2. | |
# | |
use warnings; | |
use strict; | |
use feature ':5.10'; | |
use Cache::FileCache; | |
use Net::Twitter; | |
#----------------------------------- config ---------------------------------- | |
# How much of your twitter rate_limit do you want this script to eat? | |
# 0.2 = 20% | |
my $max_bandwidth = 0.5; | |
# seconds to wait before retrying after a twitter error | |
my $delay_on_error = 10; | |
# how many users to print | |
my $TOP = 25; | |
# Where does the FileCache live? | |
my $cache_dir = '/home/mjm/tmp/perl/cache'; | |
# How long does it live? | |
my $cache_expires_in = "5 days"; | |
#----------------------------------------------------------------------------- | |
my $friend_cache = Cache::FileCache->new({ | |
namespace => 'friends', | |
($cache_dir ? (cache_root => $cache_dir) : ()), | |
default_expires_in => $cache_expires_in, | |
}); | |
$friend_cache->purge; | |
my $nt = Net::Twitter->new(legacy => 0, netrc => 1, ssl => 1); | |
my $my_id = shift @ARGV || $nt->username; | |
{ | |
# monkey patch Net::Twitter to provide rate limit features | |
$nt->ua->add_handler(response_done => sub { | |
my $res = shift; | |
@{$nt}{qw/rate_count rate_reset rate_limit/} = | |
map $res->header($_), qw/ | |
X-RateLimit-Remaining | |
X-RateLimit-Reset | |
X-RateLimit-Limit | |
/; | |
}); | |
my %method = map { my $method = $_; ($method => sub { | |
my $self = shift; | |
$self->ensure_rate; | |
return $self->{$method}; | |
}) | |
} qw/rate_limit rate_count rate_reset/; | |
$method{ensure_rate} = sub { | |
my $self = shift; | |
unless ( defined $self->{rate_count} ) { | |
my $r = $self->rate_limit_status; | |
@{$nt}{qw/rate_count rate_reset rate_limit/} = | |
@{$r}{qw/remaining_hits reset_time_in_seconds hourly_limit/}; | |
} | |
}; | |
$method{rate_percent} = sub { | |
my $self = shift; | |
$self->ensure_rate; | |
my $full_rate = $nt->{rate_limit} / 3600; | |
my $current_rate = eval { $nt->{rate_count} / ($nt->{rate_reset} - time) }; | |
return $current_rate / $full_rate; | |
}; | |
$method{until_rate} = sub { | |
my ($self, $target_rate) = @_; | |
$self->ensure_rate; | |
my $s = $nt->{rate_reset} - time - 3600 * $nt->{rate_count} / $target_rate / $nt->{rate_limit}; | |
$s > 0 ? $s : 0; | |
}; | |
my $package = ref $nt; | |
no strict 'refs'; | |
while ( my($name, $code) = each %method ) { | |
*{"$package\::$name"} = $code; | |
} | |
} | |
sub uniq { | |
my %seen; | |
grep { !$seen{$_}++ } @_; | |
} | |
my $me = $nt->show_user($my_id); | |
my @friends = @{ $friend_cache->get($me->{id}) || [] }; | |
unless ( @friends ) { | |
@friends = uniq @{$nt->friends_ids({ user_id => $me->{id} })}; | |
$friend_cache->set($me->{id} => [ @friends ]); | |
} | |
FRIEND: | |
for my $friend ( @friends ) { | |
next FRIEND if $friend_cache->get($friend); | |
my $until = eval { $nt->until_rate($max_bandwidth) }; | |
$until //= 10; | |
warn "waiting for $until seconds...\n"; | |
sleep $until if $until; | |
my $friends_of = eval { $nt->friends_ids({ id => $friend }) } || do { | |
next $friend if $@ && $@ =~ /404/; | |
warn "$@; waiting for $delay_on_error seconds...\n"; | |
sleep $delay_on_error; | |
redo FRIEND; | |
}; | |
$friend_cache->set($friend => [ uniq @$friends_of ]); | |
} | |
my %exclude = map { $_ => 1 } $me->{id}, @friends; | |
my %edge_count; | |
++$edge_count{$_} for grep { !$exclude{$_} } map { @{$friend_cache->get($_)} } $friend_cache->get_keys; | |
my $user_cache = Cache::FileCache->new({ | |
namespace => 'users', | |
($cache_dir ? (cache_root => $cache_dir) : ()), | |
default_expires_in => $cache_expires_in, | |
}); | |
$user_cache->purge; | |
my %top_users = map { $_ => $edge_count{$_} } | |
(sort { $edge_count{$b} <=> $edge_count{$a} } keys %edge_count) | |
[0 .. ($TOP - 1)]; | |
USER: | |
for my $user_id ( keys %top_users ) { | |
next USER if $user_cache->get($user_id); | |
my $until = eval { $nt->until_rate($max_bandwidth) }; | |
$until //= 10; # wait on error | |
if ( $until ) { | |
warn "wating $until seconds...\n"; | |
sleep $until; | |
redo USER; | |
} | |
my $user = eval { $nt->show_user({ user_id => $user_id }) } || do { | |
next USER if $@ && $@ =~ /404/; # It's twitter, anyting can happen | |
warn "waiting $delay_on_error seconds to retry\n"; | |
sleep($delay_on_error); | |
redo USER; | |
}; | |
$user_cache->set($user_id => $user); | |
} | |
for my $user_id ( sort { $top_users{$b} <=> $top_users{$a} } keys %top_users ) { | |
my $user = $user_cache->get($user_id) || next; | |
say "$user->{name} [$edge_count{$user_id}]: http://twitter.com/$user->{screen_name}"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment