Skip to content

Instantly share code, notes, and snippets.

@kabir0st
Created June 18, 2023 04:18
Show Gist options
  • Save kabir0st/99c8f2b05d3b9094d67880b35d3a9b31 to your computer and use it in GitHub Desktop.
Save kabir0st/99c8f2b05d3b9094d67880b35d3a9b31 to your computer and use it in GitHub Desktop.
Use OpenAI to resort genres from a tree
import json
import csv
import openai
openai.api_key = ''
def map_collected_genres(collected_genres, verified_genres):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{
"role":
"system",
"content":
f"You will map the given genre to it's closest matching genre from given array below and reply in json mapping {verified_genres}"
}, {
"role": "user",
"content": f"map this genres {collected_genres}"
}])
verified_genre = response.choices[0].message.content
return verified_genre
if __name__ == "__main__":
collected_genres = set()
book_data = []
with open('files_seeder/genres.json', 'r') as f:
genres = json.load(f)
genre_in_trees = []
for key, value in genres.items():
genre_in_trees.append(key)
genre_in_trees = genre_in_trees + value
with open('genres_assigned.csv', 'r') as f:
csv_reader = csv.reader(f)
for row in [row for row in csv_reader][1:]:
book_data.append({
'barcode': row[0],
'name': row[1],
'collected_genres': [],
'verified_genres': []
})
for genre in row[2:]:
if len(genre) > 2:
collected_genres.add(genre)
book_data[-1]['collected_genres'].append(genre)
print(len(collected_genres))
need_verification = list(collected_genres - set(genre_in_trees))
verification_chunks = []
for i in range(0, len(need_verification), 50):
verification_chunks.append(need_verification[i:i + 50])
returned_mapping = []
for index, chunk in enumerate(verification_chunks):
print(f'{index}, {len(verification_chunks)}', end='\r')
returned_mapping.append(map_collected_genres(chunk, genre_in_trees))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment