Created
December 6, 2016 02:41
-
-
Save NewMexicoKid/789a3ccce6803c5a76fc39ad25ff733d to your computer and use it in GitHub Desktop.
Quick perl script to retrieve word count data for a list of NaNoWriMo uids
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
# Create a simple wordcount history graph using the wcapi | |
# T. Yao, 11/08/2005 | |
# | |
## HISTORY | |
# If the xml file is older than one hour, refresh it. Otherwise, don't do anything. | |
# 24 October 2012 - tyao - Updated to use the new alphabetical uids | |
# 2013-10-27 - tyao - updated to 2013 | |
# 2015-10-23 - tyao - updated to 2015 | |
# | |
#OLD use LWP::Simple; ## Note: LWP::Simple no longer works on the nano site; you have to use WWW:Mechanize | |
# | |
use MIME::Base64; | |
use WWW::Mechanize; | |
use FileHandle; | |
use Date::Calc qw(Delta_YMDHMS Today_and_Now); | |
my $mech; | |
$mech = WWW::Mechanize->new( autocheck => 0, stack_depth => 0 ); | |
$mech->get( "http://nanowrimo.org/sign_in" ); | |
#---------------------------------------------------------------------- | |
# Set the password, username (mechname) and the file with uids | |
#---------------------------------------------------------------------- | |
my $password = 'Fill this in with your NaNo Password'; | |
my $mechname = 'Fill this in with your NaNo Username'; | |
my $file = 'This is your filename that has the uids, one per line'; | |
#---------------------------------------------------------------------- | |
$mech->submit_form( | |
form_number => 1, | |
fields => { 'user_session[password]' => $password, | |
'user_session[name]' => $mechname, | |
'commit' => 'Login' }, | |
); | |
warn "Warning: difficulty logging in\n" unless ($mech->success); | |
my $uid; | |
my $XML; | |
my $XMLDIR = '.'; ## Wherever you want to put the retrieved XML files | |
print "Reading $file\n"; | |
my $fh = new FileHandle "$file","r"; | |
my @uidlist = (); | |
if (defined $fh) { | |
while (<$fh>) { | |
chomp; | |
push @uidlist,$_; | |
} | |
undef $fh; | |
} | |
my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now(); | |
foreach $uid (@uidlist) { | |
my $outputfile = "${uid}_wchistory.xml"; | |
if (-f "$XMLDIR/$outputfile") { | |
my $mtime = (stat("$XMLDIR/$outputfile"))[9]; | |
# my ($year2,$month2,$day2, $hour2,$min2,$sec2) | |
my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime); | |
my $month2 = $mon2 + 1; | |
$year2 = 1900 + $year2; | |
my $day2 = $mday2; | |
my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec); | |
# print "DEBUG: y=$y m=$m d=$d h=$h mn=$mn s=$s\n"; | |
# print "DEBUG: year=$year month=$month day=$day hour=$hour min=$min sec=$sec\n"; | |
# print "DEBUG: year=$year2 month=$month2 day=$day2 hour=$hour2 min=$min2 sec=$sec2\n"; | |
my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24; | |
if ($hours > 1 or $hours < 0) { | |
print "Updating uid=$uid (hours=$hours)\n"; | |
$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid"); | |
warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success; | |
my $XML = $mech->content(); | |
writefile($XML,$uid,$outputfile) if ($XML); | |
} else { | |
print "Skipping update for uid=$uid (hours=$hours)\n"; | |
} | |
} else { | |
print "Creating uid=$uid\n"; | |
$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid"); | |
warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success; | |
my $XML = $mech->content(); | |
writefile($XML,$uid,$outputfile) if ($XML); | |
} | |
} | |
### Site stats | |
my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now(); | |
my $outputfile = "wcstats_wchistory.xml"; | |
if (-f "$XMLDIR/$outputfile") { | |
my $mtime = (stat("$XMLDIR/$outputfile"))[9]; | |
# my ($year2,$month2,$day2, $hour2,$min2,$sec2) | |
my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime); | |
my $month2 = $mon2 + 1; | |
$year2 = 1900 + $year2; | |
my $day2 = $mday2; | |
my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec); | |
my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24; | |
if ($hours > 1 or $hours < 0) { | |
print "Updating wcstats\n"; | |
$mech->get("http://nanowrimo.org/wordcount_api/wcstats"); | |
die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success; | |
my $XML = $mech->content(); | |
writestatsfile($XML,$outputfile); | |
} else { | |
print "Skipping update for wcstats (hours=$hours)\n"; | |
} | |
} else { | |
print "Creating wcstats\n"; | |
$mech->get("http://nanowrimo.org/wordcount_api/wcstats"); | |
die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success; | |
my $XML = $mech->content(); | |
writestatsfile($XML,$outputfile); | |
} | |
exit 0; | |
sub writestatsfile { | |
my $XML = shift; | |
my $outputfile = shift; | |
my $fh = new FileHandle "$XMLDIR/$outputfile","w"; | |
if (defined $fh) { | |
print $fh $XML; | |
undef $fh; | |
} else { | |
die "Unable to write to $XMLDIR/$outputfile\n"; | |
} | |
#### Extract data | |
my %data = (); | |
my $min_total = 0; | |
my $max_total = 0; | |
my $wcavg_total = 0; | |
my $wcstd_total = 0; | |
my $gh = new FileHandle "$XMLDIR/wcstats.dat","w"; | |
if (defined $gh) { | |
while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) { | |
my $STUFF = $1; | |
my ($wcdate,$min,$max,$wcavg,$wcstd) = extract_wcstats($STUFF); | |
$min_total += $min; | |
$max_total += $max; | |
$wcavg_total += $wcavg; | |
$wcstd_total += $wcstd; | |
print $gh qq{$wcdate\t$wcavg_total\t$min_total\t$max_total\t$wcstd_total\n}; | |
} | |
undef $gh; | |
} else { | |
die "Unable to create ${XMLDIR}/wcstats.dat\n"; | |
} | |
} | |
sub writefile { | |
my $XML = shift; | |
my $new_uid = shift; | |
my $outputfile = shift; | |
my $fh = new FileHandle "$XMLDIR/$outputfile","w"; | |
if (defined $fh) { | |
print $fh $XML; | |
undef $fh; | |
} else { | |
die "Unable to write to $XMLDIR/$outputfile\n"; | |
} | |
#### Extract data | |
my %data = (); | |
my $uid = extract_field($XML,'uid'); | |
my $uname = extract_field($XML,'uname'); | |
my $user_wordcount = extract_field($XML,'user_wordcount'); | |
my $wctotal = 0; | |
while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) { | |
my $ENTRY = $1; | |
my ($wc,$wcdate) = extract_entry($ENTRY); | |
$wctotal += $wc; | |
$data{$wcdate} = $wctotal; | |
} | |
#### Create data file per uid processed | |
my $gh = new FileHandle "$XMLDIR/${new_uid}_data.dat","w"; | |
if (defined $gh) { | |
print $gh qq{# name: $uname | |
# wordcount: $user_wordcount\n}; | |
foreach my $date (sort keys %data) { | |
print $gh qq{$date\t$data{$date}\n}; | |
#DEBUG print qq{DEBUG: $date\t$data{$date}\n}; | |
} | |
undef $gh; | |
} else { | |
die "Unable to create ${XMLDIR}/${new_uid}_data.dat\n"; | |
} | |
return; | |
} | |
sub extract_entry { | |
my $STUFF = shift; | |
my $wc; | |
my $wcdate; | |
if ($STUFF =~ m{<wc>(.*?)</wc>}s) { | |
$wc = $1; | |
} else { | |
$wc = ''; | |
} | |
if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) { | |
$wcdate = $1; | |
} else { | |
$wcdate = ''; | |
} | |
return ($wc,$wcdate); | |
} | |
sub extract_wcstats { | |
my $STUFF = shift; | |
my $wcdate; | |
my $min; | |
my $max; | |
my $wcavg; | |
my $wcstd; | |
if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) { | |
$wcdate = $1; | |
} else { | |
$wcdate = ''; | |
} | |
if ($STUFF =~ m{<min>(.*?)</min>}s) { | |
$min = $1; | |
} else { | |
$min = ''; | |
} | |
if ($STUFF =~ m{<max>(.*?)</max>}s) { | |
$max = $1; | |
} else { | |
$max = ''; | |
} | |
if ($STUFF =~ m{<average>(.*?)</average>}s) { | |
$wcavg = $1; | |
} else { | |
$wcavg = ''; | |
} | |
if ($STUFF =~ m{<stddev>(.*?)</stddev>}s) { | |
$wcstd = $1; | |
} else { | |
$wcstd = ''; | |
} | |
return ($wcdate,$min,$max,$wcavg,$wcstd); | |
} | |
sub extract_field { | |
my $STUFF = shift; | |
my $field = shift; | |
my $value; | |
if ($STUFF =~ m{<$field>(.*?)</$field>}s) { | |
$value = $1; | |
} else { | |
$value = ''; | |
} | |
return ($value); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment