gk3 · May 31, 2024 23:01
diff --git a/icon_categorizer.py b/icon_categorizer.py
 # Importing necessary libraries
 # To install the dependencies, use the following commands:
 # pip install openai
 # pip install pandas

 import json  # Built-in library, no installation required
 import openai  # For interacting with OpenAI's API
 import pandas as pd  # For data manipulation and analysis

 client = openai.OpenAI()
 # client.api_key = "" # uncomment and add key if you dont have OPENAI_API_KEY set in your environment

 # optional set of seed categories to influence category generation
 categories = {"Weather", "Connectivity"}

 # load icons.txt and split by comma
 with open("icons.txt", "r") as file:
    icons = file.read().split(",")
    print(icons)


 def get_icon_category(icon_name):
    system_prompt = """
 You are an expert icon categorizer. You are given an icon name and you need to categorize it. You can categorize it as one of the following categories: {categories}

 If no category is suitable, create a new category using the guidelines below:
 - It should be a category that is related to the icon.
 - It should be a category that is not already in the categories list.
 - It should be a category that is not too specific.

 Respond with a JSON object with the following format: {{"category": "<category>"}}
 """.format(categories=", ".join(categories)).strip()

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            # add few shot examples here for steering the model
            # {"role": "user", "content": "icon: Tree"},
            # {"role": "assistant", "content": "{\"category\": \"PLANT\"}"},
            # {"role": "user", "content": "icon: Car"},
            # {"role": "assistant", "content": "{\"category\": \"TRANSPORTATION\"}"},
            # {"role": "user", "content": "icon: Phone"},
            # {"role": "assistant", "content": "{\"category\": \"COMMUNICATION\"}"},
            {"role": "user", "content": f"icon: {icon_name}"}
        ],
        temperature=0,
        response_format={"type": "json_object"}
    )
    # load the json response
    response = json.loads(response.choices[0].message.content)
    return response["category"]


 def get_icon_synonyms(icon_name):
    system_prompt = """
 You are an expert writer. You are given an icon name and you need to create a list of keywords for it. Keywords are words that are related to the icon, and can be synonyms.

 Respond with a JSON object with the following format: {"keywords": ["<keyword>", "<keyword>", ...]}
 """.strip()
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            # add few shot examples here for steering the model
            # {"role": "user", "content": "icon: Tree"},
            # {"role": "assistant", "content": "{\"keywords\": [\"tree\", \"plant\"]}"},
            # {"role": "user", "content": "icon: Car"},
            # {"role": "assistant", "content": "{\"keywords\": [\"automobile\", \"transportation\"]}"},
            # {"role": "user", "content": "icon: Phone"},
            # {"role": "assistant", "content": "{\"keywords\": [\"telephone\", \"communication\"]}"},
            {"role": "user", "content": f"icon: {icon_name}"}
        ],
        temperature=0,
        response_format={"type": "json_object"}
    )
    response = json.loads(response.choices[0].message.content)
    return response["keywords"]


 limit = 10  # limit the number of icons to process
 # limit = len(icons) # uncomment to process all icons

 data = []
 for icon in icons[0:limit]:
    print(f"Processing icon: {icon}")

    category = get_icon_category(icon)
    print(f"Identified category for {icon}: {category}")

    synonyms = get_icon_synonyms(icon)
    print(f"Generated synonyms for {icon}: {synonyms}")

    # Add the new category to the set
    if category not in categories:
        print(f"Adding new category to the set: {category}")
        categories.add(category)

    # Append the data for the DataFrame
    data.append({"icon_name": icon,
                "category": category,
                "keywords": ", ".join(synonyms)
                })

 # Create a DataFrame and write to a CSV
 df = pd.DataFrame(data)
 df.to_csv("icon_categories.csv", index=False)
	# Importing necessary libraries
	# To install the dependencies, use the following commands:
	# pip install openai
	# pip install pandas

	import json # Built-in library, no installation required
	import openai # For interacting with OpenAI's API
	import pandas as pd # For data manipulation and analysis

	client = openai.OpenAI()
	# client.api_key = "" # uncomment and add key if you dont have OPENAI_API_KEY set in your environment

	# optional set of seed categories to influence category generation
	categories = {"Weather", "Connectivity"}

	# load icons.txt and split by comma
	with open("icons.txt", "r") as file:
	icons = file.read().split(",")
	print(icons)


	def get_icon_category(icon_name):
	system_prompt = """
	You are an expert icon categorizer. You are given an icon name and you need to categorize it. You can categorize it as one of the following categories: {categories}

	If no category is suitable, create a new category using the guidelines below:
	- It should be a category that is related to the icon.
	- It should be a category that is not already in the categories list.
	- It should be a category that is not too specific.

	Respond with a JSON object with the following format: {{"category": "<category>"}}
	""".format(categories=", ".join(categories)).strip()

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": system_prompt},
	# add few shot examples here for steering the model
	# {"role": "user", "content": "icon: Tree"},
	# {"role": "assistant", "content": "{\"category\": \"PLANT\"}"},
	# {"role": "user", "content": "icon: Car"},
	# {"role": "assistant", "content": "{\"category\": \"TRANSPORTATION\"}"},
	# {"role": "user", "content": "icon: Phone"},
	# {"role": "assistant", "content": "{\"category\": \"COMMUNICATION\"}"},
	{"role": "user", "content": f"icon: {icon_name}"}
	],
	temperature=0,
	response_format={"type": "json_object"}
	)
	# load the json response
	response = json.loads(response.choices[0].message.content)
	return response["category"]


	def get_icon_synonyms(icon_name):
	system_prompt = """
	You are an expert writer. You are given an icon name and you need to create a list of keywords for it. Keywords are words that are related to the icon, and can be synonyms.

	Respond with a JSON object with the following format: {"keywords": ["<keyword>", "<keyword>", ...]}
	""".strip()
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": system_prompt},
	# add few shot examples here for steering the model
	# {"role": "user", "content": "icon: Tree"},
	# {"role": "assistant", "content": "{\"keywords\": [\"tree\", \"plant\"]}"},
	# {"role": "user", "content": "icon: Car"},
	# {"role": "assistant", "content": "{\"keywords\": [\"automobile\", \"transportation\"]}"},
	# {"role": "user", "content": "icon: Phone"},
	# {"role": "assistant", "content": "{\"keywords\": [\"telephone\", \"communication\"]}"},
	{"role": "user", "content": f"icon: {icon_name}"}
	],
	temperature=0,
	response_format={"type": "json_object"}
	)
	response = json.loads(response.choices[0].message.content)
	return response["keywords"]


	limit = 10 # limit the number of icons to process
	# limit = len(icons) # uncomment to process all icons

	data = []
	for icon in icons[0:limit]:
	print(f"Processing icon: {icon}")

	category = get_icon_category(icon)
	print(f"Identified category for {icon}: {category}")

	synonyms = get_icon_synonyms(icon)
	print(f"Generated synonyms for {icon}: {synonyms}")

	# Add the new category to the set
	if category not in categories:
	print(f"Adding new category to the set: {category}")
	categories.add(category)

	# Append the data for the DataFrame
	data.append({"icon_name": icon,
	"category": category,
	"keywords": ", ".join(synonyms)
	})

	# Create a DataFrame and write to a CSV
	df = pd.DataFrame(data)
	df.to_csv("icon_categories.csv", index=False)