Skip to content

Instantly share code, notes, and snippets.

@tdhopper
Created October 12, 2012 01:55
Show Gist options
  • Save tdhopper/3876912 to your computer and use it in GitHub Desktop.
Save tdhopper/3876912 to your computer and use it in GitHub Desktop.
AllRecipes to Markdown
# Python script to scrape a recipe from AllRecipes.com into a Markdown-ish file. #
# Takes an AllRecipes.com URL as an argument. #
# I connected this to Alfred App so I simply have to paste a URL into Alfred.
import subprocess
import urllib2
from BeautifulSoup import BeautifulSoup
import re, string, sys, os
recipeDirection = os.path.expanduser("~/Dropbox/Text Notes/")
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
# Stripping code from http://stackoverflow.com/a/925630/982745
if (len(sys.argv) <= 1):
print "Please include address as argument."
exit()
url = sys.argv[1]
soup = BeautifulSoup(urllib2.urlopen(url).read())
saveout = sys.stdout
title = recipeDirection+strip_tags(str(soup.find("h1",{"id":"itemTitle"})))+".txt"
fsock = open(title, 'w')
sys.stdout = fsock
# print url
print sys.argv[1]
print
# print number of servings
soup2 = soup.find("div", {"class": "servings"}).find("span", {"id": "lblYield"})
print strip_tags(soup2.__str__())
print
#print soup.find("div", {"id": "divRecipeTimesContainer"})
times = soup.find("div", {"id": "divRecipeTimesContainer"}).findAll("span")
#print times
times = [strip_tags(str(X).replace("\r","")) for X in soup.find("div", {"id": "divRecipeTimesContainer"}).findAll("li")]
for x in times:
print ' '.join(x.split()).replace("PREP","Prep Time:").replace("COOK","Cook Time:").replace("READY IN", "Ready in:")
print "\nIngredients:\n"
ingredients = soup.find("ul", {"class": "ingredient-wrap"}).findAll("li")
for i in ingredients:
print "* " + ' '.join(strip_tags(str(i)).split())
print "\nDirections:\n"
soup2 = soup.find("div", {"class": "directions"}).find("ol")
directions = soup2('li')
for idx, direction in enumerate(directions):
print str(idx+1) + ". " + strip_tags(direction.__str__()).lstrip().rstrip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment