Last active
December 5, 2023 17:30
-
-
Save CatherineH/6b9e9edd7bf8a29564e5d0f62e22d24a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import re | |
import os | |
from time import sleep | |
import urllib.parse | |
from pandas import read_csv | |
from selenium.webdriver.common.by import By | |
from selenium import webdriver | |
def name_to_time(game_name): | |
query = urllib.parse.quote(game_name) | |
query_url = f"https://howlongtobeat.com/?q={query}" | |
driver = webdriver.Chrome() | |
driver.get(query_url) | |
divs = driver.find_elements(By.TAG_NAME, "div") | |
for div in divs: | |
groups = re.match("(^[0-9½]+) Hours", div.text) | |
if groups: | |
# assume the first result is correct | |
return int(groups.group(1).replace("½", "")) | |
print(f"could not find: {game_name}") | |
return None | |
if __name__ == "__main__": | |
# games list is a csv with a "Name" column. I generate mine by loading my games libraries into https://playnite.link/, | |
# hiding the ones I've completed or don't want to play, and then use the "library exporter advanced" Add-on: | |
# https://playnite.link/addons.html#LibraryExporter_54bf64c6-c453-4cbc-92f8-4960b56f930e | |
games_list = read_csv("games_list.csv") | |
games_list = games_list[games_list["Hidden"] == False] | |
cache_filename = "cached_data.pkl" | |
cached_data = {} | |
if os.path.exists(cache_filename): | |
cached_data = pickle.load(open(cache_filename, "rb")) | |
print(cached_data) | |
for game_name in sorted(games_list["Name"]): | |
if game_name in cached_data: | |
# skip this, we've already read it | |
continue | |
print(game_name) | |
time_to_beat = name_to_time(game_name) | |
if time_to_beat: | |
cached_data[game_name] = time_to_beat | |
pickle.dump(cached_data, open(cache_filename, "wb")) | |
sleep(60) # time to beat throttles after about 5 page loads without a break | |
print(f"total play time: {sum(cached_data.values())} hours") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment