Last active
February 8, 2018 16:57
-
-
Save a3f/75bd3510692cd183ec3857df82caca33 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[API] | |
access_token=ADD_YOUR_TOKEN_HERE | |
access_token_secret=ADD_YOUR_TOKEN_HERE | |
api_key=ADD_YOUR_KEY_HERE | |
api_secret=ADD_YOUR_KEY_HERE | |
[SEARCH] | |
lastid= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -CS | |
# SPDX-License-Identifier: Artistic-1.0-Perl | |
use warnings; | |
use 5.010; | |
use Net::Twitter; | |
use Config::Tiny; | |
use File::HomeDir; | |
my $tweets = 0; | |
my $words = 0; | |
my $interrupt = 0; | |
my $tty; | |
open $tty, '+<', '/dev/tty' or $tty = STDERR; | |
my %count; | |
$SIG{INFO} = $SIG{USR1} = sub { | |
print $tty "tweets: $tweets\twords: $words\tunique:", (scalar keys %count), "\n"; | |
}; | |
$SIG{INT} = sub { | |
print $tty "Exiting..\n"; | |
die; | |
}; | |
use utf8; | |
use open ':std', ':encoding(UTF-8)'; | |
my $config_file = File::HomeDir->my_home . "/.twitter"; | |
die "$config_file is missing\n" if not -e $config_file; | |
my $config = Config::Tiny->read( $config_file, 'utf8' ); | |
my $nt = Net::Twitter->new( | |
ssl => 1, | |
traits => [qw/API::RESTv1_1/], | |
consumer_key => $config->{API}{api_key}, | |
consumer_secret => $config->{API}{api_secret}, | |
access_token => $config->{API}{access_token}, | |
access_token_secret => $config->{API}{access_token_secret}, | |
useragent_args => {timeout=>10} | |
); | |
my $maxid = $ARGV[0] || $config->{SEARCH}{lastid}; | |
print $tty "Starting with maxid=$maxid\n"; | |
my $r = $nt->search( | |
$maxid ? {q=>'lang:ar', count=>100, maxid => $maxid} | |
: {q=>'lang:ar', count=>100} | |
); | |
eval { | |
while (!$interrupt) { | |
foreach my $e (@{ $r->{statuses} }) { | |
foreach my $str (split / /, $e->{text}) { | |
next if $str =~ /\p{^InArabic}/; | |
#$count{$str}++; | |
$words++; | |
print "$str\n" | |
} | |
$tweets++; | |
$maxid = $e->{id}; | |
} | |
print $tty "$tweets tweets with $words words\n"; | |
$r = $nt->search({q=>'lang:ar', count=>100, maxid=>$maxid }); | |
} | |
}; | |
$config->{SEARCH}{lastid} = $maxid; | |
$config->write($config_file); | |
print $tty "tweets: $tweets\twords: $words\tunique:", (scalar keys %count), "\tlastid:$maxid\n"; | |
__END__ | |
foreach my $word (sort { $count{$b} <=> $count{$a} } keys %count) { | |
last if $count{$word} == 1; | |
next if $word =~ /\p{^InArabic}/; | |
printf "%s: %s\n", $count{$word}, $word; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment