fry69 · May 16, 2024 05:53
diff --git a/extract.py b/extract.py
 from bs4 import BeautifulSoup
 import subprocess
 import re

 # Any OpenRouter model page should do the trick, e.g.
 # curl https://openrouter.ai/models/google/gemini-flash-1.5 > or.html

 # Due to skill issues this script needs prettier installed to format the code
 # so Python can correctly search it with RegEx

 # Open the local HTML file and read its contents
 with open("or.html", "r") as f:
    html_content = f.read()

 # Parse the HTML content using BeautifulSoup
 soup = BeautifulSoup(html_content, "html.parser")

 lines = soup.prettify().split("\n")

 matched_lines = ""

 for line in lines:
    if line.lstrip().startswith("self.__next_f.push("):
        matched_lines += line.lstrip() + "\n"


 # Format the code using prettier
 formatted_lines = subprocess.run(
    [
        "prettier",
        "--parser",
        "babel",
        "--print-width",
        "100",
        "--single-quote",
        "--trailing-comma",
        "es5",
    ],
    input=matched_lines,
    text=True,
    stdout=subprocess.PIPE,
 ).stdout

 created_at_matches = re.findall(r'"created_at":"([^"]*)"', formatted_lines)
 base_model_slug_matches = re.findall(r'"base_model_slug":"([^"]*)"', formatted_lines)

 # Ensure the lists have the same length
 while len(created_at_matches) > len(base_model_slug_matches):
    base_model_slug_matches.append(None)
 while len(base_model_slug_matches) > len(created_at_matches):
    created_at_matches.append(None)

 # Create a set of unique (created_at, base_model_slug) pairs
 unique_pairs = set()

 for i in range(len(created_at_matches)):
    if created_at_matches[i] and base_model_slug_matches[i]:
        pair = (created_at_matches[i], base_model_slug_matches[i])
        if pair not in unique_pairs:
            unique_pairs.add(pair)
            print(
                f"INSERT INTO added_models (timestamp, id) VALUES ('{created_at_matches[i]}', '{base_model_slug_matches[i]}');"
            )
	from bs4 import BeautifulSoup
	import subprocess
	import re

	# Any OpenRouter model page should do the trick, e.g.
	# curl https://openrouter.ai/models/google/gemini-flash-1.5 > or.html

	# Due to skill issues this script needs prettier installed to format the code
	# so Python can correctly search it with RegEx

	# Open the local HTML file and read its contents
	with open("or.html", "r") as f:
	html_content = f.read()

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(html_content, "html.parser")

	lines = soup.prettify().split("\n")

	matched_lines = ""

	for line in lines:
	if line.lstrip().startswith("self.__next_f.push("):
	matched_lines += line.lstrip() + "\n"


	# Format the code using prettier
	formatted_lines = subprocess.run(
	[
	"prettier",
	"--parser",
	"babel",
	"--print-width",
	"100",
	"--single-quote",
	"--trailing-comma",
	"es5",
	],
	input=matched_lines,
	text=True,
	stdout=subprocess.PIPE,
	).stdout

	created_at_matches = re.findall(r'"created_at":"([^"]*)"', formatted_lines)
	base_model_slug_matches = re.findall(r'"base_model_slug":"([^"]*)"', formatted_lines)

	# Ensure the lists have the same length
	while len(created_at_matches) > len(base_model_slug_matches):
	base_model_slug_matches.append(None)
	while len(base_model_slug_matches) > len(created_at_matches):
	created_at_matches.append(None)

	# Create a set of unique (created_at, base_model_slug) pairs
	unique_pairs = set()

	for i in range(len(created_at_matches)):
	if created_at_matches[i] and base_model_slug_matches[i]:
	pair = (created_at_matches[i], base_model_slug_matches[i])
	if pair not in unique_pairs:
	unique_pairs.add(pair)
	print(
	f"INSERT INTO added_models (timestamp, id) VALUES ('{created_at_matches[i]}', '{base_model_slug_matches[i]}');"
	)