Created
January 22, 2024 14:05
-
-
Save jarrodnorwell/f67868b3fcb1a9fde6efda91249c87aa to your computer and use it in GitHub Desktop.
Wodwell Data Scraper w/o BS4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from .wodwell import Wodwell | |
if __name__ == "__main__": | |
wodwell = Wodwell() | |
wodwell.get_wods(page=1, sort="popular") | |
wodwell.get_all_wods(sort="popular") # best not use this | |
# example usage | |
for wods in wodwell.get_wods(page=1, sort="popular") | |
wodwell.get_wod(wod_id=wod["id']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from json import loads | |
from requests import Session | |
from typing import Any, Dict, List | |
class Wodwell: | |
def __init__(self) -> None: | |
self.session = Session() | |
self.session.headers = { | |
"Content-Type": "application/json", | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", | |
} | |
def get_wods(self, page: int = 1, sort_by: str = "popular") -> List[Dict[str, Any]]: | |
response = self.session.get( | |
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort={sort_by}&paged={page}" | |
) | |
return loads(response.text.encode().decode("utf-8-sig"))["wods"] | |
def get_all_wods(self, sort_by: str = "popular") -> List[Dict[str, Any]]: | |
count = loads( | |
self.session.get( | |
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort=popular&paged=1" | |
) | |
.text.encode() | |
.decode("utf-8-sig") | |
)["count"] | |
page = 1 | |
wods = [] | |
while len(wods) <= count: | |
response = self.session.get( | |
f"https://wodwell.com/wp-json/wodwell/v2/wods/?sort={sort_by}&paged={page}" | |
) | |
response_json = loads(response.text.encode().decode("utf-8-sig"))["wods"] | |
page += 1 | |
wods += response_json | |
return wods | |
def get_wod(self, wod_id: int) -> Dict[str, Any]: | |
response = self.session.get(f"https://wodwell.com/wp-json/wp/v2/wods/{wod_id}") | |
return loads(response.text.encode().decode("utf-8-sig")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment