Skip to content

Instantly share code, notes, and snippets.

@fry69
Created May 16, 2024 05:53
Show Gist options
  • Save fry69/181fea0bd462f25a3379abf27475f9e8 to your computer and use it in GitHub Desktop.
Save fry69/181fea0bd462f25a3379abf27475f9e8 to your computer and use it in GitHub Desktop.
Extract created_at timestamps for OpenRouter models from their webpage
from bs4 import BeautifulSoup
import subprocess
import re
# Any OpenRouter model page should do the trick, e.g.
# curl https://openrouter.ai/models/google/gemini-flash-1.5 > or.html
# Due to skill issues this script needs prettier installed to format the code
# so Python can correctly search it with RegEx
# Open the local HTML file and read its contents
with open("or.html", "r") as f:
html_content = f.read()
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
lines = soup.prettify().split("\n")
matched_lines = ""
for line in lines:
if line.lstrip().startswith("self.__next_f.push("):
matched_lines += line.lstrip() + "\n"
# Format the code using prettier
formatted_lines = subprocess.run(
[
"prettier",
"--parser",
"babel",
"--print-width",
"100",
"--single-quote",
"--trailing-comma",
"es5",
],
input=matched_lines,
text=True,
stdout=subprocess.PIPE,
).stdout
created_at_matches = re.findall(r'"created_at":"([^"]*)"', formatted_lines)
base_model_slug_matches = re.findall(r'"base_model_slug":"([^"]*)"', formatted_lines)
# Ensure the lists have the same length
while len(created_at_matches) > len(base_model_slug_matches):
base_model_slug_matches.append(None)
while len(base_model_slug_matches) > len(created_at_matches):
created_at_matches.append(None)
# Create a set of unique (created_at, base_model_slug) pairs
unique_pairs = set()
for i in range(len(created_at_matches)):
if created_at_matches[i] and base_model_slug_matches[i]:
pair = (created_at_matches[i], base_model_slug_matches[i])
if pair not in unique_pairs:
unique_pairs.add(pair)
print(
f"INSERT INTO added_models (timestamp, id) VALUES ('{created_at_matches[i]}', '{base_model_slug_matches[i]}');"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment