Skip to content

Instantly share code, notes, and snippets.

@DamianZaremba
Created August 22, 2011 15:55
Show Gist options
  • Save DamianZaremba/1162735 to your computer and use it in GitHub Desktop.
Save DamianZaremba/1162735 to your computer and use it in GitHub Desktop.
Cluenet graphviz mapping script C=
#!/usr/bin/env perl
# We want to know if stuff is going to explode in our face
use warnings;
use strict;
# Awesome logging
use Log::Log4perl;
# LWP is an awesome HTTP client
use LWP::UserAgent;
use HTTP::Request;
# For flock
use Fcntl qw(:flock);
# For processing the mediawiki data
use XML::Simple;
# Good for debugging
use Data::Dumper;
=head1 NAME
update.pl - A script to rebuild the cluemap
=head1 OVERVIEW
This script does the following (in order):
1) Clears existing graphviz files
2) Gets a list of all pages in the ClueMap namespace
3) Downloads every page returned from step 1
4) Calls graphviz to output the map
The script should be run by cron.
Tt uses flock to ensure it doesn't run twice.
=head1 AUTHOR
Damian Zaremba <[email protected]>
=head1 CHANGE LOG
* v0.1 - 16 Aug 2011
- Initial version
=head1 LICENSE
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 CONFIG
Hash of out config values
=head2 Required options
wiki_url - Base URL of the wiki (with an ending slash/file)
dumpIdr - Dir to stick everything in
=cut
my $config = {
wiki_url => "http://cluenet.org/cluewiki/api.php",
dump_dir => "/home/damian/public_html/cluemap",
};
my $VERSION = "0.1";
# Stuff we need everywhere
our($logger);
=head1 METHODS
=head2 run
Sets up everything and kicks off the process.
=head3 Arguments
Takes no arguments.
=head3 Returns
Returns nothing.
=cut
sub run {
# Setup the logger object
Log::Log4perl->easy_init();
$logger = Log::Log4perl->get_logger();
# Error if we couldn't initialize the logger oject
if( ! defined( $logger ) ) {
print "!!! Could not init logger !!!\n";
exit(1);
}
# Try and get a fh on the lock file
my $lock_fh;
if( !open($lock_fh, '>', $config->{'dump_dir'} . '/rebuild.flock') ) {
$logger->fatal("Cannot open file handler on " . $config->{'dump_dir'} . '/rebuild.flock');
exit(4);
}
# Try and get a lock in the lock file fh
if( !flock($lock_fh, LOCK_EX) ) {
$logger->fatal("Process appears to be running already");
exit(4);
}
# Make sure our dump dir exists
if( -d $config->{'dump_dir'} ) {
$logger->info("Creating " . $config->{'dump_dir'});
mkdir( $config->{'dump_dir'} );
}
# Clear the current maps
$logger->info("Starting clear_cluemaps");
clear_cluemaps();
# Get the submaps
$logger->info("Starting get_submaps");
my $submaps = get_submaps();
# Download the submaps
for my $submap ( @{ $submaps } ) {
$logger->info("Starting download_submap for " . $submap);
download_submap($submap);
}
# Rebuild the map images
$logger->info("Starting rebuild_cluemaps");
rebuild_cluemaps();
# Unlock and close the lock FH
if( !flock($lock_fh, LOCK_UN) ) {
$logger->error("Could not unlock rebuild.flock file");
notify_irc("Could not unlock rebuild.flock file: $!");
}
close($lock_fh);
unlink( $config->{'dump_dir'} . '/rebuild.flock' );
$logger->info("Done!");
}
=head2 get_wiki_result
Makes a call to the wiki.
=head3 Arguments
request - Request to make
=head3 Returns
Hashref of the XML return
=cut
sub get_wiki_result {
my $request = shift;
my $return_data = {};
# user_agent to make the request with
my $user_agent = LWP::UserAgent->new(
timeout => 5,
agent => "ClueMapRbuild/v" . $VERSION,
);
# URL to request
my $url = $config->{"wiki_url"} . "?format=xml&" . $request;
# Request object
my $request_object = HTTP::Request->new(
GET => $url,
);
# Make the request and store the response object
my $response = $user_agent->request($request_object);
# Check if we didn't get a 200OK back
if ( ! $response->is_success ) {
$logger->error("Could not get " . $url . ", server returned: " . $response->status_line);
} else {
$logger->debug("Got the data for " . $url);
# Try and load the returned XML
eval {
my $xml = new XML::Simple;
$return_data = $xml->XMLin($response->decoded_content);
};
# If the XML was bad then error
if( $@ ) {
$logger->error("Could not process the api data: " . $@);
}
}
return $return_data;
}
=head2 clear_cluemaps
Removes everything in the dump dir
=head3 Arguments
Takes no arguments
=head3 Returns
Returns nothing
=cut
sub clear_cluemaps {
my $fh;
opendir($fh, $config->{'dump_dir'});
for my $file ( readdir( $fh ) ) {
if( $file =~ /\.gv$/ ) {
unlink( $config->{'dump_dir'} . '/' . $file );
}
}
close($fh);
}
=head2 get_submaps
Gets a list of the submaps specified on the wiki
=head3 Arguments
Takes no arguments.
=head3 Returns
Returns array of maps.
=cut
sub get_submaps {
my $maps = [];
my $api_request = "action=query&list=allpages&apnamespace=118&aplimit=500";
my $api_data = get_wiki_result($api_request);
for my $page ( @{ $api_data->{'query'}->{'allpages'}->{'p'} } ) {
if( !grep(/$page->{'title'}/, $maps) ) {
push(@$maps, $page->{'title'});
}
}
return $maps;
}
=head2 download_submap
Downloads a submaps content from the wiki to disk.
=head3 Arguments
map - Map name to download (as returned from get_submaps)
=head3 Returns
Returns nothing.
=cut
sub download_submap {
my $map = shift;
my $map_content = "";
my $api_request = "action=query&prop=revisions&rvprop=content&titles=" . $map;
my $api_data = get_wiki_result($api_request);
if(
! defined( $api_data->{'query'}->{'pages'}->{'page'}->{'title'} ) ||
! defined( $api_data->{'query'}->{'pages'}->{'page'}->{'revisions'} ) ||
! defined( $api_data->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'} ) ||
! defined( $api_data->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'content'} )
) {
$logger->error("No revisions found for " . $map);
return;
}
my $name = $api_data->{'query'}->{'pages'}->{'page'}->{'title'};
$name =~ s/^ClueMap://;
$name =~ s/\.gv$//;
$name =~ s/[\. \/]/_/g;
my $fh;
open($fh, '>>', $config->{'dump_dir'} . '/' . lc( $name ) . ".gv");
print $fh $api_data->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'content'};
close($fh);
}
=head2 rebuild_cluemaps
Calls graphviz and rebuilds the cluemap images.
=head3 Arguments
Takes no arguments.
=head3 Returns
Returns nothing.
=cut
sub rebuild_cluemaps {
my $path = $config->{'dump_dir'} . '/cluenet.gv';
my $cpath = $config->{'dump_dir'} . '/cluenet-compiled.gv';
if( ! -f $path ) {
$logger->fatal("cluenet.gv appears to be missing!");
return;
}
$logger->info("Running the pre-processor");
# Don't check the output of this as the worse thing that will happen
# is a blank file which dot can handle
my $output = system("cpp '" . $path . "' -o '" . $cpath . "'");
if( $? ) {
$logger->fatal("Pre-processor failed!");
return;
}
$logger->info("Dumping SVG image");
my $opath = $config->{'dump_dir'} . '/cluenet.svg';
$output = system("dot '" . $cpath . "' -Tsvg -o '" . $opath . "'");
if( $? ) {
$logger->error("Could not create svg: " . $output);
}
$logger->info("Dumping PNG image");
$opath = $config->{'dump_dir'} . '/cluenet.png';
$output = system("dot '" . $cpath . "' -Tpng -o '" . $opath . "'");
if( $? ) {
$logger->error("Could not create png: " . $output);
}
}
# Run!
run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment