Created
March 13, 2024 08:09
-
-
Save metal3d/448da715534baf686c1fffc685483296 to your computer and use it in GitHub Desktop.
Generate French products in JSON with AI, using LM Studio or OpenAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generate a list of products in JSON format using AI | |
# Author: Patrice Ferlet <[email protected]> | |
# License: MIT | |
# | |
# This script uses the OpenAI API to generate a list of products in JSON format. You may use | |
# LM Studio to serve a prefered model locally. Use the API tab to serve the model. | |
import argparse | |
import glob | |
import json | |
import logging | |
import os | |
from openai import OpenAI | |
from rich.logging import RichHandler | |
NUM_PRODUCTS_PER_REQUEST = 5 # The number of products you want to generate | |
NUM_PRODUCTS = 50 # The total number of products you want to generate | |
MODEL_NAME = "local-model" # The model name you want to use, if you use LM Studio, it is not used | |
API_KEY = "not-needed" # The API key is not needed if you use LM Studio | |
API_URL = ( | |
"http://localhost:1234/v1" # The URL of the API, if you use OpenAI, set it to None | |
) | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(message)s", | |
datefmt="[%X]", | |
handlers=[RichHandler()], | |
) | |
# Point to the local server | |
CLIENT = OpenAI(base_url=API_URL, api_key=API_KEY) | |
SCHEMA = """ | |
```json | |
[ | |
{ | |
"name": "string", | |
"description": "string", | |
"price": 0.0, | |
"categories": ["string"] | |
}, {...} | |
} | |
``` | |
""" | |
KNOWN_PRODUCTS = [] | |
INIT_HISTORY = [ | |
# the system role is used to set the language and the role of the model. We exxplicitly | |
# tell that the model must respond in French - change the message to your target language | |
{ | |
"role": "system", | |
"content": ( | |
"Tu es un développeur français et ne répond qu'en français. " | |
"Tu peux proposer des solutions à des problèmes de programmation " | |
"et générer du contenu technique." | |
), | |
}, | |
# here we ask the model to generate a list of products in JSON format, | |
# with the given schema | |
{ | |
"role": "user", | |
"content": ( | |
"J'ai besoin que tu me génères, en JSON, une liste de " | |
f"{NUM_PRODUCTS_PER_REQUEST} produits avec name, " | |
"description, price et categories. Le nom doit être pertinent et unique, " | |
"la description claire, et de 2 à 5 catégories. " | |
"Le contenu doit être en français. " | |
"Retourne un tableau d'objets JSON. " | |
"Le schema doit être conforme à : " + SCHEMA | |
), | |
}, | |
] | |
# This is a prompt to add to the conversation to continue the | |
# generation, we say that we already have some products, so | |
# the model will continue from this point. It avoids to send the entire history. | |
APPEND_PROMPT = """ | |
J'ai déjà ces produits : | |
""" | |
FOUND_PRODUCTS_PROMPT = """Found products files in the current directory. | |
We can continue from here. The script will load the existing products and ask | |
the model to generate more products. | |
A new file will be created with the new products. | |
Or you can remove the existing files and start from scratch. | |
Do you want to remove them and start from scratch? (y/N): """ | |
def load_existing_products(): | |
"""Load the existing products and return a list of product existing names""" | |
product_files = glob.glob("products_*.json") | |
if not product_files: | |
return [] | |
products = [] | |
for file in product_files: | |
with open(file, "r", encoding="utf-8") as json_file: | |
products.extend(json.load(json_file)) | |
return [p["name"] for p in products] | |
def list_of_products(): | |
"""Generate a list of products in markdown format""" | |
return APPEND_PROMPT.strip() + "\n".join([f"- {p}" for p in KNOWN_PRODUCTS]) | |
def create_completion(continuation=False): | |
"""Create a completion using the conversation history""" | |
# force a copy of the history | |
history = list(INIT_HISTORY) | |
# add the continuation prompt | |
if len(glob.glob("products_*.json")) > 0 or continuation: | |
history.append( | |
{ | |
"role": "user", | |
"content": list_of_products(), | |
} | |
) | |
logging.info("History: %s", history) | |
return CLIENT.chat.completions.create( | |
model=MODEL_NAME, # this field is currently unused if you use LM Studio | |
messages=history, # pyright: ignore | |
temperature=0.7, | |
stream=False, | |
) | |
def extract_json_from_markdown(content): | |
""" Extract the JSON content from the response inside a markdown block """ "" | |
# The response is a string, there is a json inside "```" and "```", we extract it | |
# it is possible that the model starts the json with a newline, so we need to remove it | |
json_start = content.find("```") + 3 | |
json_end = content.rfind("```") | |
json_content = content[json_start:json_end] | |
if json_content.startswith("json"): | |
json_content = json_content[4:] | |
try: | |
loaded = json.loads(json_content) | |
except Exception: # pyright: ignore pylint: disable=broad-except | |
logging.info("JSON content not found as Markdown") | |
return None | |
return loaded | |
def extract_json_from_content(content): | |
"""Extract the JSON content from the response""" | |
# if the response is not warp in brackets, add them | |
content = content.strip() | |
if not content.startswith("["): | |
content = f"[{content}]" | |
try: | |
json_content = json.loads(content) | |
except Exception: # pyright: ignore pylint: disable=broad-except | |
logging.info("JSON content not found as full response") | |
return None | |
return json_content | |
def extract_json_content(content): | |
"""Extract the JSON content from the response""" | |
content = content.strip() | |
loaded = extract_json_from_content(content) | |
if loaded is None: | |
loaded = extract_json_from_markdown(content) | |
if loaded is None: | |
logging.error("Error parsing JSON content, no content") | |
return None | |
# save known products name | |
KNOWN_PRODUCTS.extend([product["name"] for product in loaded]) | |
return loaded | |
def save_json_content(content): | |
"""Save the JSON content to a file""" | |
# get next filename based on index | |
existing_files = glob.glob("products_*.json") | |
index = len(existing_files) + 1 | |
if content is None: | |
logging.warning("No content to save") | |
return | |
filename = f"products_{index:04}.json" | |
with open(filename, "w", encoding="utf-8") as json_file: | |
json.dump(content, json_file, indent=2) | |
def main(): | |
"""Generate a list of products in JSON format""" | |
# create flags to set NUM_PRODUCTS_PER_REQUEST and NUM_PRODUCTS | |
global NUM_PRODUCTS_PER_REQUEST, NUM_PRODUCTS, KNOWN_PRODUCTS # pylint: disable=global-statement | |
arg_parser = argparse.ArgumentParser() | |
arg_parser.add_argument( | |
"-r", | |
"--per-request", | |
type=int, | |
default=NUM_PRODUCTS_PER_REQUEST, | |
help=( | |
"The number of products you want to generate per request, " | |
f"default is {NUM_PRODUCTS_PER_REQUEST}. " | |
"(not guaranteed, the model is sometimes a bit cheeky and decides to do what he wants)" | |
), | |
) | |
arg_parser.add_argument( | |
"-p", | |
"--num-products", | |
type=int, | |
default=NUM_PRODUCTS, | |
help=( | |
f"The total number of products you want to, default is {NUM_PRODUCTS}. " | |
"generate (one more time, the model is a bit cheeky)" | |
), | |
) | |
args = arg_parser.parse_args() | |
NUM_PRODUCTS_PER_REQUEST = args.per_request | |
NUM_PRODUCTS = args.num_products | |
# check if there are existing files | |
existing_files = glob.glob("products_*.json") | |
if existing_files: | |
logging.warning("Existing files: %s", existing_files) | |
if input(FOUND_PRODUCTS_PROMPT).lower() == "y": | |
for file in existing_files: | |
os.remove(file) | |
else: | |
KNOWN_PRODUCTS = load_existing_products() | |
# ask for the first 10 products | |
completion = create_completion(continuation=False) # initial request | |
# extract the JSON content from the response | |
content = str(completion.choices[0].message.content) | |
logging.info("Response: %s", content) | |
json_content = extract_json_content(content) | |
logging.info("Extracted JSON: %s", json_content) | |
save_json_content(json_content) | |
for _ in range(NUM_PRODUCTS // NUM_PRODUCTS_PER_REQUEST): | |
completion = create_completion( | |
continuation=True | |
) # continuation request to generate more products | |
# convert the response to a string | |
content = str(completion.choices[0].message.content) | |
# extract the JSON content from the response | |
json_content = extract_json_content(content) | |
logging.info("Extracted JSON: %s", json_content) | |
save_json_content(json_content) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[[source]] | |
url = "https://pypi.org/simple" | |
verify_ssl = true | |
name = "pypi" | |
[packages] | |
openai = "*" | |
rich = "*" | |
[dev-packages] | |
[requires] | |
python_version = "3.12" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment