Skip to content

Instantly share code, notes, and snippets.

@jarrodnorwell
Created January 22, 2024 14:05
Show Gist options
  • Save jarrodnorwell/f67868b3fcb1a9fde6efda91249c87aa to your computer and use it in GitHub Desktop.
Save jarrodnorwell/f67868b3fcb1a9fde6efda91249c87aa to your computer and use it in GitHub Desktop.
Wodwell Data Scraper w/o BS4
from .wodwell import Wodwell
if __name__ == "__main__":
wodwell = Wodwell()
wodwell.get_wods(page=1, sort="popular")
wodwell.get_all_wods(sort="popular") # best not use this
# example usage
for wods in wodwell.get_wods(page=1, sort="popular")
wodwell.get_wod(wod_id=wod["id'])
from json import loads
from requests import Session
from typing import Any, Dict, List
class Wodwell:
def __init__(self) -> None:
self.session = Session()
self.session.headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
}
def get_wods(self, page: int = 1, sort_by: str = "popular") -> List[Dict[str, Any]]:
response = self.session.get(
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort={sort_by}&paged={page}"
)
return loads(response.text.encode().decode("utf-8-sig"))["wods"]
def get_all_wods(self, sort_by: str = "popular") -> List[Dict[str, Any]]:
count = loads(
self.session.get(
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort=popular&paged=1"
)
.text.encode()
.decode("utf-8-sig")
)["count"]
page = 1
wods = []
while len(wods) <= count:
response = self.session.get(
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort={sort_by}&paged={page}"
)
response_json = loads(response.text.encode().decode("utf-8-sig"))["wods"]
page += 1
wods += response_json
return wods
def get_wod(self, wod_id: int) -> Dict[str, Any]:
response = self.session.get(f"https://wodwell.com/wp-json/wp/v2/wods/{wod_id}")
return loads(response.text.encode().decode("utf-8-sig"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment