Created
May 16, 2024 05:53
-
-
Save fry69/181fea0bd462f25a3379abf27475f9e8 to your computer and use it in GitHub Desktop.
Extract created_at timestamps for OpenRouter models from their webpage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import subprocess | |
import re | |
# Any OpenRouter model page should do the trick, e.g. | |
# curl https://openrouter.ai/models/google/gemini-flash-1.5 > or.html | |
# Due to skill issues this script needs prettier installed to format the code | |
# so Python can correctly search it with RegEx | |
# Open the local HTML file and read its contents | |
with open("or.html", "r") as f: | |
html_content = f.read() | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(html_content, "html.parser") | |
lines = soup.prettify().split("\n") | |
matched_lines = "" | |
for line in lines: | |
if line.lstrip().startswith("self.__next_f.push("): | |
matched_lines += line.lstrip() + "\n" | |
# Format the code using prettier | |
formatted_lines = subprocess.run( | |
[ | |
"prettier", | |
"--parser", | |
"babel", | |
"--print-width", | |
"100", | |
"--single-quote", | |
"--trailing-comma", | |
"es5", | |
], | |
input=matched_lines, | |
text=True, | |
stdout=subprocess.PIPE, | |
).stdout | |
created_at_matches = re.findall(r'"created_at":"([^"]*)"', formatted_lines) | |
base_model_slug_matches = re.findall(r'"base_model_slug":"([^"]*)"', formatted_lines) | |
# Ensure the lists have the same length | |
while len(created_at_matches) > len(base_model_slug_matches): | |
base_model_slug_matches.append(None) | |
while len(base_model_slug_matches) > len(created_at_matches): | |
created_at_matches.append(None) | |
# Create a set of unique (created_at, base_model_slug) pairs | |
unique_pairs = set() | |
for i in range(len(created_at_matches)): | |
if created_at_matches[i] and base_model_slug_matches[i]: | |
pair = (created_at_matches[i], base_model_slug_matches[i]) | |
if pair not in unique_pairs: | |
unique_pairs.add(pair) | |
print( | |
f"INSERT INTO added_models (timestamp, id) VALUES ('{created_at_matches[i]}', '{base_model_slug_matches[i]}');" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment