DominicBM · December 18, 2015 22:18
diff --git a/ushistory.ph b/ushistory.ph
 import requests, json, csv, urllib, argparse

 ## This is what allows the user to pass the initial Wikipedia category as an argument, such as'--c "History of the United States"'.

 parser = argparse.ArgumentParser()
 parser.add_argument('--c', dest='cat', metavar='CAT',
                    action='store')
 args = parser.parse_args()

 ## The script will create two CSVs. One with the articles and page views, and another that is a running list of subcategories, so that it can continue to run down the list and take each new category in turn. Here, the names of the CSVs are generated from the initial category given by the user, and a set is created, starting with that category, to ensure duplicates are not added.

 category_list = set()
 category = 'Category:' + str(args.cat)
 category_list.add(category)
 catscsv = str(args.cat).replace(' ', '_') + ' - cats.csv'
 viewscsv = str(args.cat).replace(' ', '_') + ' - views.csv'

 with open(catscsv, 'w') as log :
 	writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
 	writelog.writerow( (category.encode('utf-8'), ) )
 log.close()

 # Read the category CSV, to add any new categories to the running list.

 x = 0
 while x < len(category_list) :
 	with open(catscsv, 'r') as log :
 		readlog = list(csv.reader(log, delimiter= '\t', quoting=csv.QUOTE_ALL))
 		category = readlog[x][0]
 		for row in readlog:
 			if row[0] not in category_list:
 				category_list.add(row[0])
 		x = x + 1
 	log.close()

 	geturl = 'https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=' + category + '&cmlimit=500&format=json'
 	print geturl
 	parsed = json.loads(requests.get(geturl).text)

 	n = 0

 	while n < len(parsed['query']['categorymembers']):
 		ns = parsed['query']['categorymembers'][n]['ns']
 		if ns != 14:
 			print parsed['query']['categorymembers'][n]['title']
 			title = parsed['query']['categorymembers'][n]['title'].replace(' ', '_')
 			print urllib.quote_plus(title.encode('utf-8'))
 			viewurl = 	'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/' + urllib.quote_plus(title.encode('utf-8')) + '/daily/20151208/20151215'
 			viewed = json.loads(requests.get(viewurl).text)
 			try:
 				total_views = viewed['items'][0]['views'] + viewed['items'][1]['views'] + viewed['items'][2]['views'] + viewed['items'][3]['views'] + viewed['items'][4]['views'] + viewed['items'][5]['views'] + viewed['items'][6]['views']
 			except IndexError:
 				total_views = 0
 			except KeyError:
 				total_views = 'not found'
 			print total_views
 			with open(viewscsv, 'a') as log :
 				writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
 				writelog.writerow( (title.encode('utf-8'), total_views ) )
 			log.close()
 		if ns == 14:
 			with open(catscsv, 'a') as log :
 				writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
 				if parsed['query']['categorymembers'][n]['title'].encode('utf-8') not in category_list:
 					writelog.writerow( (parsed['query']['categorymembers'][n]['title'].encode('utf-8'), ) )
 					category_list.add( parsed['query']['categorymembers'][n]['title'].encode('utf-8') )
 			log.close()
 			print 'CAT ' + parsed['query']['categorymembers'][n]['title']
 		n = n + 1
	import requests, json, csv, urllib, argparse

	## This is what allows the user to pass the initial Wikipedia category as an argument, such as'--c "History of the United States"'.

	parser = argparse.ArgumentParser()
	parser.add_argument('--c', dest='cat', metavar='CAT',
	action='store')
	args = parser.parse_args()

	## The script will create two CSVs. One with the articles and page views, and another that is a running list of subcategories, so that it can continue to run down the list and take each new category in turn. Here, the names of the CSVs are generated from the initial category given by the user, and a set is created, starting with that category, to ensure duplicates are not added.

	category_list = set()
	category = 'Category:' + str(args.cat)
	category_list.add(category)
	catscsv = str(args.cat).replace(' ', '_') + ' - cats.csv'
	viewscsv = str(args.cat).replace(' ', '_') + ' - views.csv'

	with open(catscsv, 'w') as log :
	writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
	writelog.writerow( (category.encode('utf-8'), ) )
	log.close()

	# Read the category CSV, to add any new categories to the running list.

	x = 0
	while x < len(category_list) :
	with open(catscsv, 'r') as log :
	readlog = list(csv.reader(log, delimiter= '\t', quoting=csv.QUOTE_ALL))
	category = readlog[x][0]
	for row in readlog:
	if row[0] not in category_list:
	category_list.add(row[0])
	x = x + 1
	log.close()

	geturl = 'https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=' + category + '&cmlimit=500&format=json'
	print geturl
	parsed = json.loads(requests.get(geturl).text)

	n = 0

	while n < len(parsed['query']['categorymembers']):
	ns = parsed['query']['categorymembers'][n]['ns']
	if ns != 14:
	print parsed['query']['categorymembers'][n]['title']
	title = parsed['query']['categorymembers'][n]['title'].replace(' ', '_')
	print urllib.quote_plus(title.encode('utf-8'))
	viewurl = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/' + urllib.quote_plus(title.encode('utf-8')) + '/daily/20151208/20151215'
	viewed = json.loads(requests.get(viewurl).text)
	try:
	total_views = viewed['items'][0]['views'] + viewed['items'][1]['views'] + viewed['items'][2]['views'] + viewed['items'][3]['views'] + viewed['items'][4]['views'] + viewed['items'][5]['views'] + viewed['items'][6]['views']
	except IndexError:
	total_views = 0
	except KeyError:
	total_views = 'not found'
	print total_views
	with open(viewscsv, 'a') as log :
	writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
	writelog.writerow( (title.encode('utf-8'), total_views ) )
	log.close()
	if ns == 14:
	with open(catscsv, 'a') as log :
	writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
	if parsed['query']['categorymembers'][n]['title'].encode('utf-8') not in category_list:
	writelog.writerow( (parsed['query']['categorymembers'][n]['title'].encode('utf-8'), ) )
	category_list.add( parsed['query']['categorymembers'][n]['title'].encode('utf-8') )
	log.close()
	print 'CAT ' + parsed['query']['categorymembers'][n]['title']
	n = n + 1