Skip to content

Instantly share code, notes, and snippets.

@luelista
Created May 27, 2011 21:36
Show Gist options
  • Save luelista/996234 to your computer and use it in GitHub Desktop.
Save luelista/996234 to your computer and use it in GitHub Desktop.
Wikipedia trivia: if you take any article, click on the first link in the article text not in parentheses or italics, and then repeat, you will eventually end up at "Philosophy".
#!/usr/bin/perl
use strict;
use warnings;
use Switch;
use LWP::Simple qw($ua);
# changing User Agent string because Wikipedia.org blocks default LWP::Simple User Agent
$ua->agent("WikiBot/0.1");
my $startWord = $ARGV[0];
# argument: name of a wikipedia article, result: name of another wikipedia article
sub getFirstLink { my $pageName = shift;
my $url = "http://en.wikipedia.org/w/index.php?title=Special:Export&pages=$pageName&offset=1&limit=1&action=submit";
my $str = LWP::Simple::get($url);
if ($str =~ m/<text[^>]*>(.*)<\/text>/s) {
my $content = $1;
# avoiding text from parentheses, boxes and image captions...
my ($braces,$brackets,$parens)=(0,0,0);
while ($content =~ m/(\[\[|\]\]|\(|\)|{|})/g) {
switch ($1) {
case '(' { $parens++; }
case ')' { $parens--; }
case '{' { $braces++; }
case '}' { $braces--; }
case '[[' {
if ($parens == 0 && $braces == 0 && $brackets == 0) {
my $pos = pos($content);
if (substr($content,$pos,5) ne "File:" && substr($content,$pos,6) ne "Image:") {
pos($content)=$pos-2;
$content =~ m/\[\[([^\]\|]+)(\|[^\]\|]+)?\]\]/g;
return $1;
}
}
$brackets++;
}
case ']]' { $brackets--; }
}
}
} else {
print "ERROR on '$pageName': unable to find any links on this page, or the keyword does not exist!\n\n";
return 0;
}
}
my $myWord = $startWord;
my @words = ();
while($myWord) {
# filling word list for loop detection
push @words, $myWord;
#printing word
print "--> $myWord\n";
$myWord = getFirstLink($myWord);
if ( grep { $_ eq $myWord} @words ) {
# printing last word and exiting when loop was detected
print "--> $myWord\n";
print "##### loop detected!";
push @words, $myWord;
last;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment