Created
June 18, 2023 04:18
-
-
Save kabir0st/99c8f2b05d3b9094d67880b35d3a9b31 to your computer and use it in GitHub Desktop.
Use OpenAI to resort genres from a tree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import csv | |
import openai | |
openai.api_key = '' | |
def map_collected_genres(collected_genres, verified_genres): | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[{ | |
"role": | |
"system", | |
"content": | |
f"You will map the given genre to it's closest matching genre from given array below and reply in json mapping {verified_genres}" | |
}, { | |
"role": "user", | |
"content": f"map this genres {collected_genres}" | |
}]) | |
verified_genre = response.choices[0].message.content | |
return verified_genre | |
if __name__ == "__main__": | |
collected_genres = set() | |
book_data = [] | |
with open('files_seeder/genres.json', 'r') as f: | |
genres = json.load(f) | |
genre_in_trees = [] | |
for key, value in genres.items(): | |
genre_in_trees.append(key) | |
genre_in_trees = genre_in_trees + value | |
with open('genres_assigned.csv', 'r') as f: | |
csv_reader = csv.reader(f) | |
for row in [row for row in csv_reader][1:]: | |
book_data.append({ | |
'barcode': row[0], | |
'name': row[1], | |
'collected_genres': [], | |
'verified_genres': [] | |
}) | |
for genre in row[2:]: | |
if len(genre) > 2: | |
collected_genres.add(genre) | |
book_data[-1]['collected_genres'].append(genre) | |
print(len(collected_genres)) | |
need_verification = list(collected_genres - set(genre_in_trees)) | |
verification_chunks = [] | |
for i in range(0, len(need_verification), 50): | |
verification_chunks.append(need_verification[i:i + 50]) | |
returned_mapping = [] | |
for index, chunk in enumerate(verification_chunks): | |
print(f'{index}, {len(verification_chunks)}', end='\r') | |
returned_mapping.append(map_collected_genres(chunk, genre_in_trees)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment