JosephShering · September 8, 2014 21:21
diff --git a/scrape.py b/scrape.py
 import requests, json
 from bs4 import BeautifulSoup
 # from pymongo import MongoClient

 from pprint import PrettyPrinter

 pprint 		= PrettyPrinter(indent=3)
 # client 		= MongoClient('localhost', 27017)
 bartender 	= client.bartender
 drinks 		= bartender.drinks

 def scrape():
 	html = requests.get("http://www.esquire.com/drinks/drinks-full-list/");
 	soup = BeautifulSoup(html.text)

 	results = soup.find_all(class_='result')
 	results_alt = soup.find_all(class_='result_alt')
 	drinks = [];

 	HOST = 'http://www.esquire.com'

 	for res in (results + results_alt):
 		# Get the name
 		result_content = res.find(class_='result_content')
 		name = result_content.h2.a.string

 		drink = {
 			'name' : name,
 			'ingredients' : [],
 			'instructions' : '',
 			'description' : '',
 			'image_url' : ''
 		}

 		#Grab the image url for later linking
 		img = res.select('> a > img')

 		drink['image_url'] = HOST + img[0]['src']

 		description_uri = result_content.h2.a['href']

 		# Make request to grab the description
 		description_page 	= requests.get(HOST + description_uri)
 		desc_soup 			= BeautifulSoup(description_page.text)

 		# Grabs the instructions
 		instructions 		= desc_soup.select('#instruction_container p')
 		for instruc in instructions:
 			drink['instructions'] += "\n" + instruc.text

 		# Grab the description
 		description 		= desc_soup.select('#drink_commentary > p')
 		for desc in description:
 			if desc is not None:
 				drink['description'] += "\n" + desc.text

 		ingredients = result_content.select('ul > li')
 		for ingred in ingredients:
 		 	drink['ingredients'].append(ingred.text)


 		pprint.pprint(drink)
 		drinks.append(drink)

 	# Put all those drinks into a file
 	with open('drinks.json', 'w') as f:
 		json.dump(drinks, f)

 if __name__ == '__main__':
 	scrape()
	import requests, json
	from bs4 import BeautifulSoup
	# from pymongo import MongoClient

	from pprint import PrettyPrinter

	pprint = PrettyPrinter(indent=3)
	# client = MongoClient('localhost', 27017)
	bartender = client.bartender
	drinks = bartender.drinks

	def scrape():
	html = requests.get("http://www.esquire.com/drinks/drinks-full-list/");
	soup = BeautifulSoup(html.text)

	results = soup.find_all(class_='result')
	results_alt = soup.find_all(class_='result_alt')
	drinks = [];

	HOST = 'http://www.esquire.com'

	for res in (results + results_alt):
	# Get the name
	result_content = res.find(class_='result_content')
	name = result_content.h2.a.string

	drink = {
	'name' : name,
	'ingredients' : [],
	'instructions' : '',
	'description' : '',
	'image_url' : ''
	}

	#Grab the image url for later linking
	img = res.select('> a > img')

	drink['image_url'] = HOST + img[0]['src']

	description_uri = result_content.h2.a['href']

	# Make request to grab the description
	description_page = requests.get(HOST + description_uri)
	desc_soup = BeautifulSoup(description_page.text)

	# Grabs the instructions
	instructions = desc_soup.select('#instruction_container p')
	for instruc in instructions:
	drink['instructions'] += "\n" + instruc.text

	# Grab the description
	description = desc_soup.select('#drink_commentary > p')
	for desc in description:
	if desc is not None:
	drink['description'] += "\n" + desc.text

	ingredients = result_content.select('ul > li')
	for ingred in ingredients:
	drink['ingredients'].append(ingred.text)


	pprint.pprint(drink)
	drinks.append(drink)

	# Put all those drinks into a file
	with open('drinks.json', 'w') as f:
	json.dump(drinks, f)

	if __name__ == '__main__':
	scrape()