Skip to content

Instantly share code, notes, and snippets.

@mbarnes
Last active July 20, 2023 13:41
Show Gist options
  • Save mbarnes/3939c6daf6349d4c9a5f9ecc99ea28e1 to your computer and use it in GitHub Desktop.
Save mbarnes/3939c6daf6349d4c9a5f9ecc99ea28e1 to your computer and use it in GitHub Desktop.
AllRecipes.com saved recipe collections
#!/usr/bin/python3
#
# Retrieve saved recipe collections from AllRecipes.com.
#
# As of 2022, AllRecipes.com has become almost unusably slow and
# has dropped the capability to search among saved recipes. This
# script allows me to cache collections of recipe links offline.
#
# To avoid interactive prompts, either set environment variables
# ALLRECIPES_USERNAME and ALLRECIPES_PASSWORD or add credentials
# to your ~/.netrc file:
#
# machine allrecipes.com
# login <ALLRECIPES_USERNAME>
# password <ALLRECIPES_PASSWORD>
#
import collections
import getpass
import html.parser
import http
import json
import netrc
import operator
import os
import urllib
# 3rd-party modules
import requests
# Show HTTP requests and responses
http.client.HTTPConnection.debuglevel = 0
class FormExtractor(html.parser.HTMLParser):
def __init__(self, form_id=None, convert_charrefs=True):
super().__init__(convert_charrefs=convert_charrefs)
self.form_id = form_id
def reset(self):
self.__in_form = False
self.method = None
self.action = None
self.data = {}
super().reset()
def handle_starttag(self, tag, attrs):
attrs = {name: value for name, value in attrs}
if tag == 'form':
if not self.form_id or attrs.get('id') == self.form_id:
self.__in_form = True
self.method = attrs['method']
self.action = attrs['action']
elif tag == 'input' and self.__in_form:
self.data[attrs['name']] = attrs.get('value')
def handle_endtag(self, tag):
if tag == 'form':
self.__in_form = False
def allrecipes_session_login(method):
def inner(session, *args, **kwargs):
"""Log in to AllRecipes.com on first call"""
if not session.hash_id:
response = session.get('/authentication/login')
response.raise_for_status()
form = FormExtractor('kc-form-login')
form.feed(response.content.decode('utf-8'))
form.data['username'] = session.username
form.data['password'] = session.password
response = session.request(form.method, form.action, data=form.data)
response.raise_for_status()
session.hash_id = session.cookies.get('hid', domain='.allrecipes.com')
return method(session, *args, **kwargs)
return inner
class AllRecipesSession(requests.Session):
"""AllRecipes.com REST API session"""
base_url = 'https://www.allrecipes.com'
def __init__(self, base_url=None):
if base_url:
self.base_url = base_url
super().__init__()
self.__get_credentials()
self.hash_id = None
def __get_credentials(self):
self.username = os.environ.get('ALLRECIPES_USERNAME')
self.password = os.environ.get('ALLRECIPES_PASSWORD')
if not (self.username and self.password):
try:
if auth := netrc.netrc().authenticators('allrecipes.com'):
self.username, _, self.password = auth
except FileNotFoundError:
pass
if not (self.username and self.password):
print('AllRecipes.com Email Sign In')
self.username = input('Email Address: ').strip()
self.password = getpass.getpass('Password: ').strip()
def request(self, method, url, *args, **kwargs):
"""Send the request after generating the complete URL"""
url = self.create_url(url)
return super().request(method, url, *args, **kwargs)
def create_url(self, url):
"""Create the URL based off this partial path"""
return urllib.parse.urljoin(self.base_url, url)
@allrecipes_session_login
def get_bookmark_collections(self):
collections = {}
url = '/user-proxy/getbookmarkcollectionslite'
body = {
'excludePrivate': False,
'userHashId': self.hash_id
}
collation = {
'hasNextPage': True,
'nextPage': 1
}
while collation.get('hasNextPage'):
body['collation'] = { 'page': collation['nextPage'] }
response = self.post(url, json=body)
response.raise_for_status()
data = response.json()
collation = data['collation']
for item in data['collections']:
udf = item['udf']
key = 'bookmarks/' + udf['_type'] + '_' + udf['cms_id']
collections[key] = udf
return collections
@allrecipes_session_login
def get_bookmarks(self):
url = '/user-proxy/getbookmarks'
body = {
'brand': 'alrcom',
'hashId': self.hash_id
}
collation = {
'hasNextPage': True,
'nextPage': 1
}
while collation.get('hasNextPage'):
body['collation'] = {'page': collation['nextPage']}
response = self.post(url, json=body)
response.raise_for_status()
data = response.json()
collation = data['collation']
yield from data['bookmarks']
def main():
with AllRecipesSession() as session:
uncategorized = {'name': 'Uncategorized'}
bookmark_collections = session.get_bookmark_collections()
for bookmark in session.get_bookmarks():
categorized = False
if 'collections' in bookmark['udf']:
for reference in bookmark['udf']['collections']:
cid = reference['id']
if cid in bookmark_collections:
categorized = True
bookmark_collections[cid].setdefault('bookmarks', []).append(bookmark)
if not categorized:
uncategorized.setdefault('bookmarks', []).append(bookmark)
output = collections.OrderedDict()
bookmark_collections = list(bookmark_collections.values())
bookmark_collections.sort(key=operator.itemgetter('name'))
bookmark_collections.append(uncategorized)
for bookmark_collection in bookmark_collections:
bookmark_list = []
for bookmark in bookmark_collection.get('bookmarks', []):
item = collections.OrderedDict()
item['name'] = bookmark['blockContent']['headline']
item['url'] = bookmark['blockContent']['url']['absoluteUrl']
bookmark_list.append(item)
if bookmark_list:
output[bookmark_collection['name']] = bookmark_list
print(json.dumps(output, indent=2))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment