Last active
May 31, 2024 23:01
-
-
Save gk3/b746abb94c771a7f5c0e94091453e190 to your computer and use it in GitHub Desktop.
organize a list of icons using an LLM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Importing necessary libraries | |
| # To install the dependencies, use the following commands: | |
| # pip install openai | |
| # pip install pandas | |
| import json # Built-in library, no installation required | |
| import openai # For interacting with OpenAI's API | |
| import pandas as pd # For data manipulation and analysis | |
| client = openai.OpenAI() | |
| # client.api_key = "" # uncomment and add key if you dont have OPENAI_API_KEY set in your environment | |
| # optional set of seed categories to influence category generation | |
| categories = {"Weather", "Connectivity"} | |
| # load icons.txt and split by comma | |
| with open("icons.txt", "r") as file: | |
| icons = file.read().split(",") | |
| print(icons) | |
| def get_icon_category(icon_name): | |
| system_prompt = """ | |
| You are an expert icon categorizer. You are given an icon name and you need to categorize it. You can categorize it as one of the following categories: {categories} | |
| If no category is suitable, create a new category using the guidelines below: | |
| - It should be a category that is related to the icon. | |
| - It should be a category that is not already in the categories list. | |
| - It should be a category that is not too specific. | |
| Respond with a JSON object with the following format: {{"category": "<category>"}} | |
| """.format(categories=", ".join(categories)).strip() | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| # add few shot examples here for steering the model | |
| # {"role": "user", "content": "icon: Tree"}, | |
| # {"role": "assistant", "content": "{\"category\": \"PLANT\"}"}, | |
| # {"role": "user", "content": "icon: Car"}, | |
| # {"role": "assistant", "content": "{\"category\": \"TRANSPORTATION\"}"}, | |
| # {"role": "user", "content": "icon: Phone"}, | |
| # {"role": "assistant", "content": "{\"category\": \"COMMUNICATION\"}"}, | |
| {"role": "user", "content": f"icon: {icon_name}"} | |
| ], | |
| temperature=0, | |
| response_format={"type": "json_object"} | |
| ) | |
| # load the json response | |
| response = json.loads(response.choices[0].message.content) | |
| return response["category"] | |
| def get_icon_synonyms(icon_name): | |
| system_prompt = """ | |
| You are an expert writer. You are given an icon name and you need to create a list of keywords for it. Keywords are words that are related to the icon, and can be synonyms. | |
| Respond with a JSON object with the following format: {"keywords": ["<keyword>", "<keyword>", ...]} | |
| """.strip() | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| # add few shot examples here for steering the model | |
| # {"role": "user", "content": "icon: Tree"}, | |
| # {"role": "assistant", "content": "{\"keywords\": [\"tree\", \"plant\"]}"}, | |
| # {"role": "user", "content": "icon: Car"}, | |
| # {"role": "assistant", "content": "{\"keywords\": [\"automobile\", \"transportation\"]}"}, | |
| # {"role": "user", "content": "icon: Phone"}, | |
| # {"role": "assistant", "content": "{\"keywords\": [\"telephone\", \"communication\"]}"}, | |
| {"role": "user", "content": f"icon: {icon_name}"} | |
| ], | |
| temperature=0, | |
| response_format={"type": "json_object"} | |
| ) | |
| response = json.loads(response.choices[0].message.content) | |
| return response["keywords"] | |
| limit = 10 # limit the number of icons to process | |
| # limit = len(icons) # uncomment to process all icons | |
| data = [] | |
| for icon in icons[0:limit]: | |
| print(f"Processing icon: {icon}") | |
| category = get_icon_category(icon) | |
| print(f"Identified category for {icon}: {category}") | |
| synonyms = get_icon_synonyms(icon) | |
| print(f"Generated synonyms for {icon}: {synonyms}") | |
| # Add the new category to the set | |
| if category not in categories: | |
| print(f"Adding new category to the set: {category}") | |
| categories.add(category) | |
| # Append the data for the DataFrame | |
| data.append({"icon_name": icon, | |
| "category": category, | |
| "keywords": ", ".join(synonyms) | |
| }) | |
| # Create a DataFrame and write to a CSV | |
| df = pd.DataFrame(data) | |
| df.to_csv("icon_categories.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment