Skip to content

Instantly share code, notes, and snippets.

@blenderdeluxe
Created May 19, 2021 20:43
Show Gist options
  • Select an option

  • Save blenderdeluxe/eb759fa3172a7211231348e368b5b8db to your computer and use it in GitHub Desktop.

Select an option

Save blenderdeluxe/eb759fa3172a7211231348e368b5b8db to your computer and use it in GitHub Desktop.
import urllib.request
import json
from bs4 import BeautifulSoup as soup
myurl = 'https://cookpad.com/cl/recetas/15028629-postre-de-huesillos?ref=guest_feed'
req = urllib.request.Request(
myurl,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
)
uClient = urllib.request.urlopen(req)
pageHtml = uClient.read()
uClient.close()
pageSoup = soup(pageHtml, "html.parser")
# save files
ingredientes = []
instrucciones = []
# get title and main image
recipeTitle = pageSoup.h1.string.strip()
# get ingredient
mainIngridient = pageSoup.find('section', {"id":"ingredients"})
ingredientList = mainIngridient.find('div', {"class":"ingredient-list"})
ingredients = ingredientList.find_all('li', {"class":"ingredient"})
for ingredient in ingredients:
ingWrap = ingredient.find('div', {"itemprop":"ingredients"})
Qty = ingWrap.find('bdi', {"class":"ingredient__quantity"}).text.strip()
if Qty:
ingredientes.append(ingWrap.text.strip())
# get steps
# print("-----------------------")
mainStep = pageSoup.find('section', {"id": "steps"})
listStep = mainStep.find_all('li', {"class":"step"})
for step in listStep:
steps = {}
textStep = step.find('div', {"itemprop": "recipeInstructions"}).text.strip()
instrucciones.append(textStep)
# print(textStep)
slugTitle = recipeTitle.lower().replace(' ', '-')
filename = slugTitle + '.json'
path = 'result/' + filename
dataJson = {"title": recipeTitle,"ingredientes": ingredientes, "instrucciones": instrucciones}
with open(path, "w") as writeJson:
json.dump(dataJson, writeJson, ensure_ascii=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment