Skip to content

Instantly share code, notes, and snippets.

@semifor
Created September 7, 2009 22:10
Show Gist options
  • Save semifor/182575 to your computer and use it in GitHub Desktop.
Save semifor/182575 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# top-followed-by-friends
#
# Prints a list of the users most followed by your friends, but not you.
#
# usage: top-followed-by-friends [twitter-screen_name-or-id]
#
# Throttles Twitter API calls so you don't get rate limited.
# Uses Cache::FileCache for storage. Set $cache_dir to something more
# permanent than /tmp if you want to keep the cache alive longer.
#
# Copyright (c) 2009 Marc Mims
#
# This is free software, licensed under the terms of the GNU Public License, version 2.
#
use warnings;
use strict;
use feature ':5.10';
use Cache::FileCache;
use Net::Twitter;
#----------------------------------- config ----------------------------------
# How much of your twitter rate_limit do you want this script to eat?
# 0.2 = 20%
my $max_bandwidth = 0.5;
# seconds to wait before retrying after a twitter error
my $delay_on_error = 10;
# how many users to print
my $TOP = 25;
# Where does the FileCache live?
my $cache_dir = '/home/mjm/tmp/perl/cache';
# How long does it live?
my $cache_expires_in = "5 days";
#-----------------------------------------------------------------------------
my $friend_cache = Cache::FileCache->new({
namespace => 'friends',
($cache_dir ? (cache_root => $cache_dir) : ()),
default_expires_in => $cache_expires_in,
});
$friend_cache->purge;
my $nt = Net::Twitter->new(legacy => 0, netrc => 1, ssl => 1);
my $my_id = shift @ARGV || $nt->username;
{
# monkey patch Net::Twitter to provide rate limit features
$nt->ua->add_handler(response_done => sub {
my $res = shift;
@{$nt}{qw/rate_count rate_reset rate_limit/} =
map $res->header($_), qw/
X-RateLimit-Remaining
X-RateLimit-Reset
X-RateLimit-Limit
/;
});
my %method = map { my $method = $_; ($method => sub {
my $self = shift;
$self->ensure_rate;
return $self->{$method};
})
} qw/rate_limit rate_count rate_reset/;
$method{ensure_rate} = sub {
my $self = shift;
unless ( defined $self->{rate_count} ) {
my $r = $self->rate_limit_status;
@{$nt}{qw/rate_count rate_reset rate_limit/} =
@{$r}{qw/remaining_hits reset_time_in_seconds hourly_limit/};
}
};
$method{rate_percent} = sub {
my $self = shift;
$self->ensure_rate;
my $full_rate = $nt->{rate_limit} / 3600;
my $current_rate = eval { $nt->{rate_count} / ($nt->{rate_reset} - time) };
return $current_rate / $full_rate;
};
$method{until_rate} = sub {
my ($self, $target_rate) = @_;
$self->ensure_rate;
my $s = $nt->{rate_reset} - time - 3600 * $nt->{rate_count} / $target_rate / $nt->{rate_limit};
$s > 0 ? $s : 0;
};
my $package = ref $nt;
no strict 'refs';
while ( my($name, $code) = each %method ) {
*{"$package\::$name"} = $code;
}
}
sub uniq {
my %seen;
grep { !$seen{$_}++ } @_;
}
my $me = $nt->show_user($my_id);
my @friends = @{ $friend_cache->get($me->{id}) || [] };
unless ( @friends ) {
@friends = uniq @{$nt->friends_ids({ user_id => $me->{id} })};
$friend_cache->set($me->{id} => [ @friends ]);
}
FRIEND:
for my $friend ( @friends ) {
next FRIEND if $friend_cache->get($friend);
my $until = eval { $nt->until_rate($max_bandwidth) };
$until //= 10;
warn "waiting for $until seconds...\n";
sleep $until if $until;
my $friends_of = eval { $nt->friends_ids({ id => $friend }) } || do {
next $friend if $@ && $@ =~ /404/;
warn "$@; waiting for $delay_on_error seconds...\n";
sleep $delay_on_error;
redo FRIEND;
};
$friend_cache->set($friend => [ uniq @$friends_of ]);
}
my %exclude = map { $_ => 1 } $me->{id}, @friends;
my %edge_count;
++$edge_count{$_} for grep { !$exclude{$_} } map { @{$friend_cache->get($_)} } $friend_cache->get_keys;
my $user_cache = Cache::FileCache->new({
namespace => 'users',
($cache_dir ? (cache_root => $cache_dir) : ()),
default_expires_in => $cache_expires_in,
});
$user_cache->purge;
my %top_users = map { $_ => $edge_count{$_} }
(sort { $edge_count{$b} <=> $edge_count{$a} } keys %edge_count)
[0 .. ($TOP - 1)];
USER:
for my $user_id ( keys %top_users ) {
next USER if $user_cache->get($user_id);
my $until = eval { $nt->until_rate($max_bandwidth) };
$until //= 10; # wait on error
if ( $until ) {
warn "wating $until seconds...\n";
sleep $until;
redo USER;
}
my $user = eval { $nt->show_user({ user_id => $user_id }) } || do {
next USER if $@ && $@ =~ /404/; # It's twitter, anyting can happen
warn "waiting $delay_on_error seconds to retry\n";
sleep($delay_on_error);
redo USER;
};
$user_cache->set($user_id => $user);
}
for my $user_id ( sort { $top_users{$b} <=> $top_users{$a} } keys %top_users ) {
my $user = $user_cache->get($user_id) || next;
say "$user->{name} [$edge_count{$user_id}]: http://twitter.com/$user->{screen_name}";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment