OzTamir · August 29, 2015 14:08 · OzTamir · Nov 4, 2014
diff --git a/wikigame.py b/wikigame.py
 import urllib2
 import sys

 # Used for formating wiki links
 wiki_endpoint = "http://en.wikipedia.org/wiki/"
 # Count how many links were visited on the way
 counter = 0
 # Content HTML
 c_html = '<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr">'

 def remove_parentheses(data):
 	''' We want the first link that is not in parentheses, so we make sure it's not '''
 	first = data.split('<a href="')
 	# If the paragraph dosen't contain parentheses, return the first wiki link
 	if not ('(' in first[0] or ')' in first[0]):
 		return [x for x in first if x.startswith('/wiki/') and not 'File:' in x][0]
 	# Else, find the first link after the parentheses
 	total = first[0]
 	for i in range(len(first)):
 		if total.count('(') == total.count(')'):
 			# Make sure it's a wiki link
 			return [x for x in first[i:] if x.startswith('/wiki/')][0]
 		if i > 0:
 			total += first[i]


 def get_next(url):
 	''' Get the first link from the article '''
 	# Get the HTML
 	data = urllib2.urlopen(url).read()
 	# Split to paragraphs
 	data = data.split(c_html)[1]
 	if '<table class="infobox biota"' in data:
 		data = data.split('</table>')[1]
 	while data.startswith('<table'):
 		data = '</table>'.join(data.split('</table>')[1:])
 	p = data.split('<p>')
 	paragraph = ''
 	pars = []
 	# Find the first paragraph with a link in it
 	for index, i in enumerate(p[1:]):
 		if "<a href" in i.split('</p>')[0]:
 			pars.append(i)
 	for i in pars:
 		pa = i.split('</p>')[0]
 		if (pa[:2] in ['<b', '<i', '<a']) or (pa[0] != '<'):
 			paragraph = pa
 			break
 	# Get the next article's title
 	link = remove_parentheses(paragraph).split('"')[0].split('/')[-1]
 	return link

 def next_url(title):
 	''' URL formatting '''
 	return wiki_endpoint + urllib2.quote(title).replace('%20', '_')

 def nextPage(name):
 	global counter
 	''' Recursive function to get from a given article to the Philosophy article '''
 	if name == "Philosophy":
 		return
 	counter += 1
 	# Get the title of the next article
 	url = next_url(name)
 	# Format it to a URL
 	title = get_next(url)
 	# Log to the user
 	print "{0} reached.".format(title.replace('_', ' '))
 	# Make a recursive call
 	nextPage(title)


 def main():
 	''' Get the first article's title from the user and run the code for it '''
 	title = ' '.join(sys.argv[1:])
 	try:
 		nextPage(title)
 		print 'Number of articles visited: %s' % str(counter)
 	except KeyboardInterrupt:
 		print 'User has stopped.'
 	except Exception, e:
 		print 'ERROR: {0}'.format(str(e))

 if __name__ == '__main__':
 	main()
	import urllib2
	import sys

	# Used for formating wiki links
	wiki_endpoint = "http://en.wikipedia.org/wiki/"
	# Count how many links were visited on the way
	counter = 0
	# Content HTML
	c_html = '<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr">'

	def remove_parentheses(data):
	''' We want the first link that is not in parentheses, so we make sure it's not '''
	first = data.split('<a href="')
	# If the paragraph dosen't contain parentheses, return the first wiki link
	if not ('(' in first[0] or ')' in first[0]):
	return [x for x in first if x.startswith('/wiki/') and not 'File:' in x][0]
	# Else, find the first link after the parentheses
	total = first[0]
	for i in range(len(first)):
	if total.count('(') == total.count(')'):
	# Make sure it's a wiki link
	return [x for x in first[i:] if x.startswith('/wiki/')][0]
	if i > 0:
	total += first[i]


	def get_next(url):
	''' Get the first link from the article '''
	# Get the HTML
	data = urllib2.urlopen(url).read()
	# Split to paragraphs
	data = data.split(c_html)[1]
	if '<table class="infobox biota"' in data:
	data = data.split('</table>')[1]
	while data.startswith('<table'):
	data = '</table>'.join(data.split('</table>')[1:])
	p = data.split('<p>')
	paragraph = ''
	pars = []
	# Find the first paragraph with a link in it
	for index, i in enumerate(p[1:]):
	if "<a href" in i.split('</p>')[0]:
	pars.append(i)
	for i in pars:
	pa = i.split('</p>')[0]
	if (pa[:2] in ['<b', '<i', '<a']) or (pa[0] != '<'):
	paragraph = pa
	break
	# Get the next article's title
	link = remove_parentheses(paragraph).split('"')[0].split('/')[-1]
	return link

	def next_url(title):
	''' URL formatting '''
	return wiki_endpoint + urllib2.quote(title).replace('%20', '_')

	def nextPage(name):
	global counter
	''' Recursive function to get from a given article to the Philosophy article '''
	if name == "Philosophy":
	return
	counter += 1
	# Get the title of the next article
	url = next_url(name)
	# Format it to a URL
	title = get_next(url)
	# Log to the user
	print "{0} reached.".format(title.replace('_', ' '))
	# Make a recursive call
	nextPage(title)


	def main():
	''' Get the first article's title from the user and run the code for it '''
	title = ' '.join(sys.argv[1:])
	try:
	nextPage(title)
	print 'Number of articles visited: %s' % str(counter)
	except KeyboardInterrupt:
	print 'User has stopped.'
	except Exception, e:
	print 'ERROR: {0}'.format(str(e))

	if __name__ == '__main__':
	main()
No results found