Last active
June 12, 2024 12:30
-
-
Save jonathanballs/dd3e8f8ddd79225030b768602d21d7ef to your computer and use it in GitHub Desktop.
Pulls the latest crossword clues from The Times. Writes a clue + link to crossword to a file so that I can display it in my vim startup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
<plist version="1.0"> | |
<dict> | |
<key>Label</key> | |
<string>crossword.fetch.at.midnight</string> | |
<key>UserName</key> | |
<string>jonathanballs</string> | |
<key>ProgramArguments</key> | |
<array> | |
<string>/usr/bin/python3</string> | |
<string>/Users/jonathanballs/.config/nvim/fetchcrosswords.py</string> | |
</array> | |
<key>RunAtLoad</key> | |
<true/> | |
<key>StartCalendarInterval</key><!-- ◄ Start job at specified times --> | |
<dict> | |
<key>Hour</key> | |
<integer>0</integer> <!-- ◄ Start at 12:15 (00:00, 12 AM) --> | |
<key>Minute</key> | |
<integer>15</integer> | |
</dict> | |
</dict> | |
</plist> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fetches the latest crossword clues from the times | |
# For use in my neovim config | |
import datetime | |
import urllib.parse | |
import re | |
import html | |
import os | |
import json | |
import requests | |
from bs4 import BeautifulSoup | |
# Filters to decide which crossword to take. Not all are available every day. | |
PRECEDENCE = ["O Tempora", "Quick Cryptic", "Times Cryptic No", "Concise No"] | |
# The value of acs_tnl can be found in a cookie for thetimes.co.uk | |
secrets_filename = os.path.join( | |
os.path.dirname(os.path.realpath(__file__)), "./.secrets.json" | |
) | |
ACS_TNL = json.load(open(secrets_filename, "r"))["acs_tnl"] | |
def crossword_urls_for_date(crossword_date): | |
cookies = {"acs_tnl": ACS_TNL} | |
date_string = urllib.parse.quote(crossword_date.strftime("%d/%m/%Y"), safe="") | |
url = f"https://www.thetimes.co.uk/puzzleclub/crosswordclub/puzzles-list?filter%5Bpuzzle_type%5D=&filter%5Bpublish_at%5D%5Bfrom%5D={date_string}&filter%5Bpublish_at%5D%5Bto%5D={date_string}" | |
resp = requests.get(url, cookies=cookies) | |
soup = BeautifulSoup(resp.text, "html.parser") | |
todays_puzzles = [] | |
for puzzle in soup.find_all("div", {"class": "puzzle-link"}): | |
title = puzzle.find("h3", {"class": "puzzle-list-puzzle-title"}).text | |
link = puzzle.find("a", {"class": "puzzle-title-link"})["href"] | |
print_link = puzzle.find("p", {"class": "PuzzleItem--print-link"}).find("a")[ | |
"href" | |
] | |
todays_puzzles.append({"title": title, "link": link, "print_link": print_link}) | |
puzzle = None | |
for puzzle_type in PRECEDENCE: | |
matching_puzzles = [ | |
p for p in todays_puzzles if puzzle_type.lower() in p["title"].lower() | |
] | |
if len(matching_puzzles) > 0: | |
puzzle = matching_puzzles[0] | |
break | |
resp = requests.get(puzzle["print_link"], cookies=cookies, allow_redirects=False) | |
redirect_url = resp.headers["Location"] | |
crossword_data_url = redirect_url[: redirect_url.rfind("/")] + "/data.json" | |
return crossword_data_url, puzzle["link"] | |
def fetch_crossword(crossword_data_url): | |
resp = requests.get(crossword_data_url).json() | |
copy = resp["data"]["copy"] | |
clues = [] | |
for direction in copy["clues"]: | |
for clue in direction["clues"]: | |
clue = f'{clue["number"]}{direction["title"][0].lower()} - {html.unescape(clue["clue"])} ({clue["length"]})' | |
clues.append(clue) | |
return clues | |
try: | |
today = datetime.date.today() | |
crossword_data_url, crossword_web_url = crossword_urls_for_date(today) | |
clues = fetch_crossword(crossword_data_url) | |
shortest_clue = min(clues, key=len) | |
with open( | |
os.path.join(os.path.dirname(os.path.realpath(__file__)), "crossword.txt"), "w+" | |
) as f: | |
f.write(f"{crossword_web_url}\n") | |
f.write(shortest_clue) | |
except Exception as e: | |
with open( | |
os.path.join(os.path.dirname(os.path.realpath(__file__)), "crossword.txt"), "w+" | |
) as f: | |
print(e) | |
f.write( | |
"https://www.thetimes.co.uk/puzzleclub/crosswordclub/\nAlas your script is broken..." | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I use the script above to link to the latest times crossword automatically in my vim. Building it required reverse engineering a certain amount of the times website so I'm sharing it here to save some other people time if they so wish.
If you want something similar - it is actually possible to find the data publicly at e.g. https://feeds.thetimes.co.uk/puzzles/crossword/20240612/55137/data.json however you will need to know the ids of the crossword which I wasn't not able to do reliably.