boraseoksoon · November 19, 2020 13:54
diff --git a/StackOverflow.py b/StackOverflow.py
 #Stack Overflow scraper script

 #imports necessary modules

 from urllib2 import urlopen
 from BeautifulSoup import BeautifulSoup
 import time

 username = raw_input("Username: ")

 #defines url, retrieves it and turns it into a Beautiful Soup object
 # string interpolation doesn't work at present since user no is hardcoded


 url = 'http://stackoverflow.com/users/43089/%s' % username

 html = urlopen(url).read()
 soup = BeautifulSoup(html)

 # fetches user details table

 user_details = soup.find('table', {'class': 'user-details'})

 # defines username

 name = soup.h1.text

 # returns css tag containing join date.
 # trying to use .replace to cut out dross returns "NoneType" error...

 temp_date = user_details.find(text='member for').findNext('td').find('span',{'class' : 'cool'})

 # this removes the timestamp from the css tag and pops out the time of day

 birth = temp_date['title'].split().pop(0)

 # last seen data - temp var used to deliver result

 temp_seen = user_details.find(text='seen').findNext('td').find('span',{'class' : 'cool'})

 seen = temp_seen.span['title'].split().pop(0)

 # structured as time data 
 tm_seen = time.strptime(seen, "%Y-%m-%d")
 tm_birth = time.strptime(birth, "%Y-%m-%d")
 t1 = time.mktime(tm_seen)
 t2 = time.mktime(tm_birth)
 lifetime = (t1-t2)/86400


 # user location
 location = user_details.find(text='location').findNext('td').string.strip()


 # age

 age = user_details.find(text='age').findNext('td').string.strip()

 # reputation

 reputation = soup.find('span', {'class': 'summarycount'}).text

 # questions

 questions = soup.find('span', {'class': 'summarycount ar'}).text

 # answers

 answers = soup.find('div', {'class': 'summarycount ar'}).text

 # votes

 votesCast = soup.find('table', {'class': 'votes-cast-stats'})

 upVotes = int(votesCast.td.text)

 downVotes = int(votesCast.contents[5].contents[1].text)

 totalVotes =  upVotes + downVotes

 # Some custom metrics

 percentNice = (100/float(totalVotes))* upVotes

 percentMean = (100/float(totalVotes))* downVotes

 print 'name, birth, lastSeen, lifetime, age, location, reputation, questions, answers, totalVotes, upVotes, downVotes, percentNice, percentMean'
 print name,',',birth,',',seen,',',lifetime,',',age,',',location,',',reputation,',',questions,',',answers,',',totalVotes,',',upVotes,',',downVotes,',',percentNice,',',percentMean
	#Stack Overflow scraper script

	#imports necessary modules

	from urllib2 import urlopen
	from BeautifulSoup import BeautifulSoup
	import time

	username = raw_input("Username: ")

	#defines url, retrieves it and turns it into a Beautiful Soup object
	# string interpolation doesn't work at present since user no is hardcoded


	url = 'http://stackoverflow.com/users/43089/%s' % username

	html = urlopen(url).read()
	soup = BeautifulSoup(html)

	# fetches user details table

	user_details = soup.find('table', {'class': 'user-details'})

	# defines username

	name = soup.h1.text

	# returns css tag containing join date.
	# trying to use .replace to cut out dross returns "NoneType" error...

	temp_date = user_details.find(text='member for').findNext('td').find('span',{'class' : 'cool'})

	# this removes the timestamp from the css tag and pops out the time of day

	birth = temp_date['title'].split().pop(0)

	# last seen data - temp var used to deliver result

	temp_seen = user_details.find(text='seen').findNext('td').find('span',{'class' : 'cool'})

	seen = temp_seen.span['title'].split().pop(0)

	# structured as time data
	tm_seen = time.strptime(seen, "%Y-%m-%d")
	tm_birth = time.strptime(birth, "%Y-%m-%d")
	t1 = time.mktime(tm_seen)
	t2 = time.mktime(tm_birth)
	lifetime = (t1-t2)/86400


	# user location
	location = user_details.find(text='location').findNext('td').string.strip()


	# age

	age = user_details.find(text='age').findNext('td').string.strip()

	# reputation

	reputation = soup.find('span', {'class': 'summarycount'}).text

	# questions

	questions = soup.find('span', {'class': 'summarycount ar'}).text

	# answers

	answers = soup.find('div', {'class': 'summarycount ar'}).text

	# votes

	votesCast = soup.find('table', {'class': 'votes-cast-stats'})

	upVotes = int(votesCast.td.text)

	downVotes = int(votesCast.contents[5].contents[1].text)

	totalVotes = upVotes + downVotes

	# Some custom metrics

	percentNice = (100/float(totalVotes))* upVotes

	percentMean = (100/float(totalVotes))* downVotes

	print 'name, birth, lastSeen, lifetime, age, location, reputation, questions, answers, totalVotes, upVotes, downVotes, percentNice, percentMean'
	print name,',',birth,',',seen,',',lifetime,',',age,',',location,',',reputation,',',questions,',',answers,',',totalVotes,',',upVotes,',',downVotes,',',percentNice,',',percentMean
No results found