Skip to content

Instantly share code, notes, and snippets.

@tverlaan
Created April 3, 2024 10:42
Show Gist options
  • Save tverlaan/5b8cf9aff50d56090c942a729723dad4 to your computer and use it in GitHub Desktop.
Save tverlaan/5b8cf9aff50d56090c942a729723dad4 to your computer and use it in GitHub Desktop.
Mealie can't scrape ah.nl from docker, but it works elsewhere. This is a workaround to scrape recipes from ah.nl and call the mealie api to store them.
import requests
import uuid
from recipe_scrapers import scrape_html
MEALIE_TOKEN = ""
MEALIE_URL = ""
MEALIE_TAG_ID = ""
MEALIE_HEADERS = {"Authorization": "Bearer " + MEALIE_TOKEN}
AH_HEADERS = {"Accept-Language": "nl", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:124.0) Gecko/20100101 Firefox/124.0"}
url = "https://www.ah.nl/allerhande/recept/R-R178212/turkse-linzensoep"
html = requests.get(url, headers=AH_HEADERS).content
recipe = scrape_html(html=html, org_url=url)
mealie_ingredients = list(map(lambda ingr: {"note": ingr, "referenceId": str(uuid.uuid4())}, recipe.ingredients()))
mealie_instructions = list(map(lambda instruction: {"text": instruction, "ingredientReferences": []}, recipe.instructions_list()))
mealie_recipe = {
"tags": [{"id": MEALIE_TAG_ID, "name": "ah.nl", "slug": "ah-nl"}],
"description": recipe.description(),
"recipeIngredient": mealie_ingredients,
"recipeInstructions": mealie_instructions,
"totalTime": recipe.total_time(),
"recipeYield": recipe.yields(),
"nutrition": recipe.nutrients()
}
create = requests.post(MEALIE_URL + "/api/recipes", json={"name": recipe.title()}, headers=MEALIE_HEADERS)
slug = create.text[1:-1]
requests.patch(MEALIE_URL + "/api/recipes/" + slug, json=mealie_recipe, headers=MEALIE_HEADERS)
requests.post(MEALIE_URL + "/api/recipes/" + slug + "/image", json={"url": recipe.image()}, headers=MEALIE_HEADERS)
print(MEALIE_URL + "/g/home/r/" + slug)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment