Skip to content

Instantly share code, notes, and snippets.

@hay
Created November 24, 2022 09:37
Show Gist options
  • Save hay/de8f14e787fedfe504417e4b3e8a2613 to your computer and use it in GitHub Desktop.
Save hay/de8f14e787fedfe504417e4b3e8a2613 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from bs4 import BeautifulSoup
from dataknead import Knead
from pathlib import Path
import requests
import sys
import urllib.request
API_KEY = "your-api-key-here"
class Builder:
API_ENDPOINT = "https://www.getrevue.co/api"
def __init__(self, api_key):
self.api_key = API_KEY
self.issues = None
def get_issues(self):
url = f"{self.API_ENDPOINT}/v2/issues"
req = requests.get(url, headers = {
"Authorization" : f"Token {self.api_key}"
})
if req.status_code != 200:
raise Exception(f"Invalid status code: {req.status_code}")
data = req.json()
# Check if data is valid
if not data or (len(data) == 0):
raise Exception("No data in JSON")
# Also check for the html property
if "html" not in data[0]:
raise Exception("Valid JSON, but no html attribute")
Knead(data).write("data/issues.json")
self.issues = data
# Create an index and split out the html to separate files
def parse_issues(self):
for issue in self.issues:
path = f"issues/{issue['id']}.html"
html = issue["html"]
issue_id = issue["id"]
print(f"Parsing {issue_id}")
# Replace all images with the 'image-missing' image
soup = BeautifulSoup(html, "lxml")
for img in soup.select("img"):
img["src"] = "img/image-missing.svg"
img["class"] = "image-missing"
img["alt"] = "Deze afbeelding is niet beschikbaar"
html = soup.prettify()
# Write issue html
with open(path, "w") as f:
f.write(html)
print(f"Wrote {path}")
issue["html"] = None
Knead(self.issues).write("data/index.json", indent = 4)
def main():
builder = Builder(API_KEY)
print("Getting all issues")
builder.get_issues()
print("Now parsing")
builder.parse_issues()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment