Last active
July 20, 2023 13:41
-
-
Save mbarnes/3939c6daf6349d4c9a5f9ecc99ea28e1 to your computer and use it in GitHub Desktop.
AllRecipes.com saved recipe collections
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# | |
# Retrieve saved recipe collections from AllRecipes.com. | |
# | |
# As of 2022, AllRecipes.com has become almost unusably slow and | |
# has dropped the capability to search among saved recipes. This | |
# script allows me to cache collections of recipe links offline. | |
# | |
# To avoid interactive prompts, either set environment variables | |
# ALLRECIPES_USERNAME and ALLRECIPES_PASSWORD or add credentials | |
# to your ~/.netrc file: | |
# | |
# machine allrecipes.com | |
# login <ALLRECIPES_USERNAME> | |
# password <ALLRECIPES_PASSWORD> | |
# | |
import collections | |
import getpass | |
import html.parser | |
import http | |
import json | |
import netrc | |
import operator | |
import os | |
import urllib | |
# 3rd-party modules | |
import requests | |
# Show HTTP requests and responses | |
http.client.HTTPConnection.debuglevel = 0 | |
class FormExtractor(html.parser.HTMLParser): | |
def __init__(self, form_id=None, convert_charrefs=True): | |
super().__init__(convert_charrefs=convert_charrefs) | |
self.form_id = form_id | |
def reset(self): | |
self.__in_form = False | |
self.method = None | |
self.action = None | |
self.data = {} | |
super().reset() | |
def handle_starttag(self, tag, attrs): | |
attrs = {name: value for name, value in attrs} | |
if tag == 'form': | |
if not self.form_id or attrs.get('id') == self.form_id: | |
self.__in_form = True | |
self.method = attrs['method'] | |
self.action = attrs['action'] | |
elif tag == 'input' and self.__in_form: | |
self.data[attrs['name']] = attrs.get('value') | |
def handle_endtag(self, tag): | |
if tag == 'form': | |
self.__in_form = False | |
def allrecipes_session_login(method): | |
def inner(session, *args, **kwargs): | |
"""Log in to AllRecipes.com on first call""" | |
if not session.hash_id: | |
response = session.get('/authentication/login') | |
response.raise_for_status() | |
form = FormExtractor('kc-form-login') | |
form.feed(response.content.decode('utf-8')) | |
form.data['username'] = session.username | |
form.data['password'] = session.password | |
response = session.request(form.method, form.action, data=form.data) | |
response.raise_for_status() | |
session.hash_id = session.cookies.get('hid', domain='.allrecipes.com') | |
return method(session, *args, **kwargs) | |
return inner | |
class AllRecipesSession(requests.Session): | |
"""AllRecipes.com REST API session""" | |
base_url = 'https://www.allrecipes.com' | |
def __init__(self, base_url=None): | |
if base_url: | |
self.base_url = base_url | |
super().__init__() | |
self.__get_credentials() | |
self.hash_id = None | |
def __get_credentials(self): | |
self.username = os.environ.get('ALLRECIPES_USERNAME') | |
self.password = os.environ.get('ALLRECIPES_PASSWORD') | |
if not (self.username and self.password): | |
try: | |
if auth := netrc.netrc().authenticators('allrecipes.com'): | |
self.username, _, self.password = auth | |
except FileNotFoundError: | |
pass | |
if not (self.username and self.password): | |
print('AllRecipes.com Email Sign In') | |
self.username = input('Email Address: ').strip() | |
self.password = getpass.getpass('Password: ').strip() | |
def request(self, method, url, *args, **kwargs): | |
"""Send the request after generating the complete URL""" | |
url = self.create_url(url) | |
return super().request(method, url, *args, **kwargs) | |
def create_url(self, url): | |
"""Create the URL based off this partial path""" | |
return urllib.parse.urljoin(self.base_url, url) | |
@allrecipes_session_login | |
def get_bookmark_collections(self): | |
collections = {} | |
url = '/user-proxy/getbookmarkcollectionslite' | |
body = { | |
'excludePrivate': False, | |
'userHashId': self.hash_id | |
} | |
collation = { | |
'hasNextPage': True, | |
'nextPage': 1 | |
} | |
while collation.get('hasNextPage'): | |
body['collation'] = { 'page': collation['nextPage'] } | |
response = self.post(url, json=body) | |
response.raise_for_status() | |
data = response.json() | |
collation = data['collation'] | |
for item in data['collections']: | |
udf = item['udf'] | |
key = 'bookmarks/' + udf['_type'] + '_' + udf['cms_id'] | |
collections[key] = udf | |
return collections | |
@allrecipes_session_login | |
def get_bookmarks(self): | |
url = '/user-proxy/getbookmarks' | |
body = { | |
'brand': 'alrcom', | |
'hashId': self.hash_id | |
} | |
collation = { | |
'hasNextPage': True, | |
'nextPage': 1 | |
} | |
while collation.get('hasNextPage'): | |
body['collation'] = {'page': collation['nextPage']} | |
response = self.post(url, json=body) | |
response.raise_for_status() | |
data = response.json() | |
collation = data['collation'] | |
yield from data['bookmarks'] | |
def main(): | |
with AllRecipesSession() as session: | |
uncategorized = {'name': 'Uncategorized'} | |
bookmark_collections = session.get_bookmark_collections() | |
for bookmark in session.get_bookmarks(): | |
categorized = False | |
if 'collections' in bookmark['udf']: | |
for reference in bookmark['udf']['collections']: | |
cid = reference['id'] | |
if cid in bookmark_collections: | |
categorized = True | |
bookmark_collections[cid].setdefault('bookmarks', []).append(bookmark) | |
if not categorized: | |
uncategorized.setdefault('bookmarks', []).append(bookmark) | |
output = collections.OrderedDict() | |
bookmark_collections = list(bookmark_collections.values()) | |
bookmark_collections.sort(key=operator.itemgetter('name')) | |
bookmark_collections.append(uncategorized) | |
for bookmark_collection in bookmark_collections: | |
bookmark_list = [] | |
for bookmark in bookmark_collection.get('bookmarks', []): | |
item = collections.OrderedDict() | |
item['name'] = bookmark['blockContent']['headline'] | |
item['url'] = bookmark['blockContent']['url']['absoluteUrl'] | |
bookmark_list.append(item) | |
if bookmark_list: | |
output[bookmark_collection['name']] = bookmark_list | |
print(json.dumps(output, indent=2)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment