w0ng · October 7, 2014 05:14
diff --git a/wiki.py b/wiki.py
 #!/usr/bin/env python

 import urllib2 
 import json
 import re
 # 'Pretty print'. Outputs stuff nicer than normal print. Good for lists
 from pprint import pprint

 url = 'https://en.wikipedia.org/w/api.php?action=parse&page=List_of_national_capitals_in_alphabetical_order&prop=wikitext&format=json'
 response = urllib2.urlopen(url)
 json = json.load(response)
 data = json['parse']
 title = data['title']
 text = data['wikitext']['*']
 #print(text)
 text_list = text.split('\n')
 #pprint(text_list)

 # We want lines that begin with "| [["
 # For example: "| [[Warsaw]]..."
 relevant_lines = [line for line in text_list if re.match('^\|\ \[\[', line)]
 #pprint(relevant_lines)

 # Put in dictionary. e.g. capital['AUS'] = 'Canberra'
 # City is the first matching [[fdasfdasf]] of each line
 # Country is the matching {{fdafsafd}} of each line
 country_regex = '{{(.*?)}}'
 city_regex = '\[\[(.*?)\]\]'

 capital = {}
 for line in relevant_lines:
    if re.search(country_regex, line):
        country = re.search(country_regex, line).group(1)
    if re.search(city_regex, line):
        city = re.search(city_regex, line).group(1)
    if country and city:
        capital[country] = city

 for country_name,city_name in sorted(capital.iteritems()):
    # capital[country] = city_name
    print(country_name + ": " + city_name)
	#!/usr/bin/env python

	import urllib2
	import json
	import re
	# 'Pretty print'. Outputs stuff nicer than normal print. Good for lists
	from pprint import pprint

	url = 'https://en.wikipedia.org/w/api.php?action=parse&page=List_of_national_capitals_in_alphabetical_order&prop=wikitext&format=json'
	response = urllib2.urlopen(url)
	json = json.load(response)
	data = json['parse']
	title = data['title']
	text = data['wikitext']['*']
	#print(text)
	text_list = text.split('\n')
	#pprint(text_list)

	# We want lines that begin with "\| [["
	# For example: "\| [[Warsaw]]..."
	relevant_lines = [line for line in text_list if re.match('^\\|\ \[\[', line)]
	#pprint(relevant_lines)

	# Put in dictionary. e.g. capital['AUS'] = 'Canberra'
	# City is the first matching [[fdasfdasf]] of each line
	# Country is the matching {{fdafsafd}} of each line
	country_regex = '{{(.*?)}}'
	city_regex = '\[\[(.*?)\]\]'

	capital = {}
	for line in relevant_lines:
	if re.search(country_regex, line):
	country = re.search(country_regex, line).group(1)
	if re.search(city_regex, line):
	city = re.search(city_regex, line).group(1)
	if country and city:
	capital[country] = city

	for country_name,city_name in sorted(capital.iteritems()):
	# capital[country] = city_name
	print(country_name + ": " + city_name)
No results found