NewMexicoKid · December 6, 2016 02:41
diff --git a/retrieve_data.pl b/retrieve_data.pl
 #!/usr/bin/perl -w 
 #
 # Create a simple wordcount history graph using the wcapi
 # T. Yao, 11/08/2005
 #
 ## HISTORY
 # If the xml file is older than one hour, refresh it. Otherwise, don't do anything.
 # 24 October 2012 - tyao - Updated to use the new alphabetical uids
 # 2013-10-27 - tyao - updated to 2013
 # 2015-10-23 - tyao - updated to 2015
 #
 #OLD use LWP::Simple; ## Note: LWP::Simple no longer works on the nano site; you have to use WWW:Mechanize
 # 
 use MIME::Base64;
 use WWW::Mechanize;
 use FileHandle;
 use Date::Calc qw(Delta_YMDHMS Today_and_Now);

 my $mech;
 $mech = WWW::Mechanize->new( autocheck => 0, stack_depth => 0 );
 $mech->get( "http://nanowrimo.org/sign_in" );
 #----------------------------------------------------------------------
 # Set the password, username (mechname) and the file with uids
 #----------------------------------------------------------------------
 my $password = 'Fill this in with your NaNo Password';
 my $mechname = 'Fill this in with your NaNo Username';
 my $file = 'This is your filename that has the uids, one per line';
 #----------------------------------------------------------------------
 $mech->submit_form(
     form_number => 1,
     fields      => { 'user_session[password]' => $password,
                      'user_session[name]' => $mechname,
                      'commit' => 'Login' },
 );
 warn "Warning: difficulty logging in\n" unless ($mech->success);

 my $uid;
 my $XML;
 my $XMLDIR = '.'; ## Wherever you want to put the retrieved XML files
 print "Reading $file\n";
 my $fh = new FileHandle "$file","r";
 my @uidlist = ();
 if (defined $fh) {
   while (<$fh>) {
      chomp;
      push @uidlist,$_;
   }
   undef $fh;
 }
 my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
 foreach $uid (@uidlist) {
   my $outputfile = "${uid}_wchistory.xml";
   if (-f "$XMLDIR/$outputfile") {
      my $mtime = (stat("$XMLDIR/$outputfile"))[9];
      # my ($year2,$month2,$day2, $hour2,$min2,$sec2) 
      my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
      my $month2 = $mon2 + 1;
      $year2 = 1900 + $year2;
      my $day2 = $mday2;

      my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
      # print "DEBUG: y=$y m=$m d=$d h=$h mn=$mn s=$s\n";
      # print "DEBUG: year=$year month=$month day=$day hour=$hour min=$min sec=$sec\n";
      # print "DEBUG: year=$year2 month=$month2 day=$day2 hour=$hour2 min=$min2 sec=$sec2\n";
      my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24;
      if ($hours > 1 or $hours < 0) {
         print "Updating uid=$uid (hours=$hours)\n";
         $mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
         warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
         my $XML = $mech->content();
         writefile($XML,$uid,$outputfile) if ($XML);
      } else {
         print "Skipping update for uid=$uid (hours=$hours)\n";
      }
   } else {
      print "Creating uid=$uid\n";
      $mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
      warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
      my $XML = $mech->content();
      writefile($XML,$uid,$outputfile) if ($XML);
   }
 }

 ### Site stats
 my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
 my $outputfile = "wcstats_wchistory.xml";
 if (-f "$XMLDIR/$outputfile") {
   my $mtime = (stat("$XMLDIR/$outputfile"))[9];
   # my ($year2,$month2,$day2, $hour2,$min2,$sec2) 
   my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
   my $month2 = $mon2 + 1;
   $year2 = 1900 + $year2;
   my $day2 = $mday2;

   my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
   my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24;
   if ($hours > 1 or $hours < 0) {
      print "Updating wcstats\n";
      $mech->get("http://nanowrimo.org/wordcount_api/wcstats");
      die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
      my $XML = $mech->content();
      writestatsfile($XML,$outputfile);
   } else {
      print "Skipping update for wcstats (hours=$hours)\n";
   }
 } else {
   print "Creating wcstats\n";
   $mech->get("http://nanowrimo.org/wordcount_api/wcstats");
   die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
   my $XML = $mech->content();
   writestatsfile($XML,$outputfile);
 }

 exit 0;

 sub writestatsfile {
   my $XML = shift;
   my $outputfile = shift;
   my $fh = new FileHandle "$XMLDIR/$outputfile","w";
   if (defined $fh) {
      print $fh $XML;
      undef $fh;
   } else {
      die "Unable to write to $XMLDIR/$outputfile\n";
   }
   #### Extract data
   my %data = ();
   my $min_total = 0;
   my $max_total = 0;
   my $wcavg_total = 0;
   my $wcstd_total = 0;
   my $gh = new FileHandle "$XMLDIR/wcstats.dat","w";
   if (defined $gh) {
      while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
         my $STUFF = $1;
         my ($wcdate,$min,$max,$wcavg,$wcstd) = extract_wcstats($STUFF);
         $min_total += $min;
         $max_total += $max;
         $wcavg_total += $wcavg;
         $wcstd_total += $wcstd;
         print $gh qq{$wcdate\t$wcavg_total\t$min_total\t$max_total\t$wcstd_total\n};
      }
      undef $gh;
   } else {
      die "Unable to create ${XMLDIR}/wcstats.dat\n";
   }
 }

 sub writefile {
   my $XML = shift;
   my $new_uid = shift;
   my $outputfile = shift;
   my $fh = new FileHandle "$XMLDIR/$outputfile","w";
   if (defined $fh) {
      print $fh $XML;
      undef $fh;
   } else {
      die "Unable to write to $XMLDIR/$outputfile\n";
   }
   #### Extract data
   my %data = ();
   my $uid = extract_field($XML,'uid');
   my $uname = extract_field($XML,'uname');
   my $user_wordcount = extract_field($XML,'user_wordcount');
   my $wctotal = 0;
   while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
      my $ENTRY = $1;
      my ($wc,$wcdate) = extract_entry($ENTRY);
      $wctotal += $wc;
      $data{$wcdate} = $wctotal;
   }

   #### Create data file per uid processed
   my $gh = new FileHandle "$XMLDIR/${new_uid}_data.dat","w";
   if (defined $gh) {
      print $gh qq{# name: $uname
 # wordcount: $user_wordcount\n};
      foreach my $date (sort keys %data) {
         print $gh qq{$date\t$data{$date}\n};
         #DEBUG print qq{DEBUG: $date\t$data{$date}\n};
      }
      undef $gh;
   } else {
      die "Unable to create ${XMLDIR}/${new_uid}_data.dat\n";
   }
   return;
 }

 sub extract_entry {
   my $STUFF = shift;
   my $wc;
   my $wcdate;
   if ($STUFF =~ m{<wc>(.*?)</wc>}s) {
      $wc = $1;
   } else {
      $wc = '';
   }
   if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
      $wcdate = $1;
   } else {
      $wcdate = '';
   }
   return ($wc,$wcdate);
 }

 sub extract_wcstats {
   my $STUFF = shift;
   my $wcdate;
   my $min;
   my $max;
   my $wcavg;
   my $wcstd;
   if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
      $wcdate = $1;
   } else {
      $wcdate = '';
   }
   if ($STUFF =~ m{<min>(.*?)</min>}s) {
      $min = $1;
   } else {
      $min = '';
   }
   if ($STUFF =~ m{<max>(.*?)</max>}s) {
      $max = $1;
   } else {
      $max = '';
   }
   if ($STUFF =~ m{<average>(.*?)</average>}s) {
      $wcavg = $1;
   } else {
      $wcavg = '';
   }
   if ($STUFF =~ m{<stddev>(.*?)</stddev>}s) {
      $wcstd = $1;
   } else {
      $wcstd = '';
   }
   return ($wcdate,$min,$max,$wcavg,$wcstd);
 }


 sub extract_field {
   my $STUFF = shift;
   my $field = shift;
   my $value;
   if ($STUFF =~ m{<$field>(.*?)</$field>}s) {
      $value = $1;
   } else {
      $value = '';
   }
   return ($value);
 }
	#!/usr/bin/perl -w
	#
	# Create a simple wordcount history graph using the wcapi
	# T. Yao, 11/08/2005
	#
	## HISTORY
	# If the xml file is older than one hour, refresh it. Otherwise, don't do anything.
	# 24 October 2012 - tyao - Updated to use the new alphabetical uids
	# 2013-10-27 - tyao - updated to 2013
	# 2015-10-23 - tyao - updated to 2015
	#
	#OLD use LWP::Simple; ## Note: LWP::Simple no longer works on the nano site; you have to use WWW:Mechanize
	#
	use MIME::Base64;
	use WWW::Mechanize;
	use FileHandle;
	use Date::Calc qw(Delta_YMDHMS Today_and_Now);

	my $mech;
	$mech = WWW::Mechanize->new( autocheck => 0, stack_depth => 0 );
	$mech->get( "http://nanowrimo.org/sign_in" );
	#----------------------------------------------------------------------
	# Set the password, username (mechname) and the file with uids
	#----------------------------------------------------------------------
	my $password = 'Fill this in with your NaNo Password';
	my $mechname = 'Fill this in with your NaNo Username';
	my $file = 'This is your filename that has the uids, one per line';
	#----------------------------------------------------------------------
	$mech->submit_form(
	form_number => 1,
	fields => { 'user_session[password]' => $password,
	'user_session[name]' => $mechname,
	'commit' => 'Login' },
	);
	warn "Warning: difficulty logging in\n" unless ($mech->success);

	my $uid;
	my $XML;
	my $XMLDIR = '.'; ## Wherever you want to put the retrieved XML files
	print "Reading $file\n";
	my $fh = new FileHandle "$file","r";
	my @uidlist = ();
	if (defined $fh) {
	while (<$fh>) {
	chomp;
	push @uidlist,$_;
	}
	undef $fh;
	}
	my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
	foreach $uid (@uidlist) {
	my $outputfile = "${uid}_wchistory.xml";
	if (-f "$XMLDIR/$outputfile") {
	my $mtime = (stat("$XMLDIR/$outputfile"))[9];
	# my ($year2,$month2,$day2, $hour2,$min2,$sec2)
	my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
	my $month2 = $mon2 + 1;
	$year2 = 1900 + $year2;
	my $day2 = $mday2;

	my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
	# print "DEBUG: y=$y m=$m d=$d h=$h mn=$mn s=$s\n";
	# print "DEBUG: year=$year month=$month day=$day hour=$hour min=$min sec=$sec\n";
	# print "DEBUG: year=$year2 month=$month2 day=$day2 hour=$hour2 min=$min2 sec=$sec2\n";
	my $hours = $s/3600 + $mn/60 + $h + $d24 + $m365/1224 + $y365*24;
	if ($hours > 1 or $hours < 0) {
	print "Updating uid=$uid (hours=$hours)\n";
	$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
	warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
	my $XML = $mech->content();
	writefile($XML,$uid,$outputfile) if ($XML);
	} else {
	print "Skipping update for uid=$uid (hours=$hours)\n";
	}
	} else {
	print "Creating uid=$uid\n";
	$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
	warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
	my $XML = $mech->content();
	writefile($XML,$uid,$outputfile) if ($XML);
	}
	}

	### Site stats
	my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
	my $outputfile = "wcstats_wchistory.xml";
	if (-f "$XMLDIR/$outputfile") {
	my $mtime = (stat("$XMLDIR/$outputfile"))[9];
	# my ($year2,$month2,$day2, $hour2,$min2,$sec2)
	my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
	my $month2 = $mon2 + 1;
	$year2 = 1900 + $year2;
	my $day2 = $mday2;

	my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
	my $hours = $s/3600 + $mn/60 + $h + $d24 + $m365/1224 + $y365*24;
	if ($hours > 1 or $hours < 0) {
	print "Updating wcstats\n";
	$mech->get("http://nanowrimo.org/wordcount_api/wcstats");
	die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
	my $XML = $mech->content();
	writestatsfile($XML,$outputfile);
	} else {
	print "Skipping update for wcstats (hours=$hours)\n";
	}
	} else {
	print "Creating wcstats\n";
	$mech->get("http://nanowrimo.org/wordcount_api/wcstats");
	die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
	my $XML = $mech->content();
	writestatsfile($XML,$outputfile);
	}

	exit 0;

	sub writestatsfile {
	my $XML = shift;
	my $outputfile = shift;
	my $fh = new FileHandle "$XMLDIR/$outputfile","w";
	if (defined $fh) {
	print $fh $XML;
	undef $fh;
	} else {
	die "Unable to write to $XMLDIR/$outputfile\n";
	}
	#### Extract data
	my %data = ();
	my $min_total = 0;
	my $max_total = 0;
	my $wcavg_total = 0;
	my $wcstd_total = 0;
	my $gh = new FileHandle "$XMLDIR/wcstats.dat","w";
	if (defined $gh) {
	while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
	my $STUFF = $1;
	my ($wcdate,$min,$max,$wcavg,$wcstd) = extract_wcstats($STUFF);
	$min_total += $min;
	$max_total += $max;
	$wcavg_total += $wcavg;
	$wcstd_total += $wcstd;
	print $gh qq{$wcdate\t$wcavg_total\t$min_total\t$max_total\t$wcstd_total\n};
	}
	undef $gh;
	} else {
	die "Unable to create ${XMLDIR}/wcstats.dat\n";
	}
	}

	sub writefile {
	my $XML = shift;
	my $new_uid = shift;
	my $outputfile = shift;
	my $fh = new FileHandle "$XMLDIR/$outputfile","w";
	if (defined $fh) {
	print $fh $XML;
	undef $fh;
	} else {
	die "Unable to write to $XMLDIR/$outputfile\n";
	}
	#### Extract data
	my %data = ();
	my $uid = extract_field($XML,'uid');
	my $uname = extract_field($XML,'uname');
	my $user_wordcount = extract_field($XML,'user_wordcount');
	my $wctotal = 0;
	while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
	my $ENTRY = $1;
	my ($wc,$wcdate) = extract_entry($ENTRY);
	$wctotal += $wc;
	$data{$wcdate} = $wctotal;
	}

	#### Create data file per uid processed
	my $gh = new FileHandle "$XMLDIR/${new_uid}_data.dat","w";
	if (defined $gh) {
	print $gh qq{# name: $uname
	# wordcount: $user_wordcount\n};
	foreach my $date (sort keys %data) {
	print $gh qq{$date\t$data{$date}\n};
	#DEBUG print qq{DEBUG: $date\t$data{$date}\n};
	}
	undef $gh;
	} else {
	die "Unable to create ${XMLDIR}/${new_uid}_data.dat\n";
	}
	return;
	}

	sub extract_entry {
	my $STUFF = shift;
	my $wc;
	my $wcdate;
	if ($STUFF =~ m{<wc>(.*?)</wc>}s) {
	$wc = $1;
	} else {
	$wc = '';
	}
	if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
	$wcdate = $1;
	} else {
	$wcdate = '';
	}
	return ($wc,$wcdate);
	}

	sub extract_wcstats {
	my $STUFF = shift;
	my $wcdate;
	my $min;
	my $max;
	my $wcavg;
	my $wcstd;
	if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
	$wcdate = $1;
	} else {
	$wcdate = '';
	}
	if ($STUFF =~ m{<min>(.*?)</min>}s) {
	$min = $1;
	} else {
	$min = '';
	}
	if ($STUFF =~ m{<max>(.*?)</max>}s) {
	$max = $1;
	} else {
	$max = '';
	}
	if ($STUFF =~ m{<average>(.*?)</average>}s) {
	$wcavg = $1;
	} else {
	$wcavg = '';
	}
	if ($STUFF =~ m{<stddev>(.*?)</stddev>}s) {
	$wcstd = $1;
	} else {
	$wcstd = '';
	}
	return ($wcdate,$min,$max,$wcavg,$wcstd);
	}


	sub extract_field {
	my $STUFF = shift;
	my $field = shift;
	my $value;
	if ($STUFF =~ m{<$field>(.*?)</$field>}s) {
	$value = $1;
	} else {
	$value = '';
	}
	return ($value);
	}