Created
January 7, 2024 18:05
-
-
Save andreluiz1987/0a846517b4f46042c9617a8aba6aa17a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import openai | |
from elasticsearch import Elasticsearch | |
from elasticsearch.helpers import bulk | |
es = Elasticsearch( | |
[{'host': 'localhost', 'port': 9200, 'scheme': 'http'}]) | |
openai.api_key = 'key' | |
movie_list = [ | |
{"title": "The Shawshank Redemption", "synopsis": "Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency."}, | |
{"title": "The Dark Knight", "synopsis": "When the Joker wreaks havoc on Gotham, Batman must confront one of the greatest psychological and physical tests of his ability to fight injustice."}, | |
{"title": "Inception", "synopsis": "A thief who enters the dreams of others to steal their secrets faces a new challenge when tasked with planting an idea into someone's mind."}, | |
{"title": "The Lord of the Rings: The Return of the King", "synopsis": "Frodo and Sam continue their journey to Mount Doom to destroy the One Ring, while the rest of the fellowship prepares for a final battle."}, | |
{"title": "City of God", "synopsis": "In the poverty-stricken favelas of Rio de Janeiro, two boys choose different paths: one becomes a photographer, the other a drug dealer."}, | |
{"title": "The Social Network", "synopsis": "The founding and rise of Facebook, highlighting the relationships and betrayals that marked the creation of the social media giant."}, | |
{"title": "Parasite", "synopsis": "A poor family scams their way into working for a wealthy family, but their deception leads to unexpected consequences."}, | |
{"title": "Eternal Sunshine of the Spotless Mind", "synopsis": "After a painful breakup, a man undergoes a medical procedure to erase memories of his former girlfriend."}, | |
{"title": "No Country for Old Men", "synopsis": "A hunter stumbles upon a drug deal gone wrong and takes a suitcase full of money, setting off a violent and suspenseful chase."}, | |
{"title": "The Grand Budapest Hotel", "synopsis": "The adventures of a hotel concierge and his protégé as they become involved in the theft and recovery of a priceless painting."}, | |
] | |
def translate_text(text, language): | |
prompt = f"Translate the following text to {language}: \n\n {text}" | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], max_tokens=100 | |
) | |
return response.choices[0].message.content.strip() | |
def format_action(movie): | |
action = { | |
"_index": "idx_movies", | |
"_source": { | |
"title": { | |
"en": movie["title"]["en"], | |
"es": movie["title"]["es"], | |
"fr": movie["title"]["fr"], | |
"pt-br": movie["title"]["pt-br"] | |
}, | |
"synopsis": { | |
"en": movie["synopsis"]["en"], | |
"es": movie["synopsis"]["es"], | |
"fr": movie["synopsis"]["fr"], | |
"pt-br": movie["synopsis"]["pt-br"] | |
} | |
} | |
} | |
return action | |
if __name__ == '__main__': | |
language_mapping = { | |
"Portuguese": "pt-br", | |
"Spanish": "es", | |
"French": "fr", | |
} | |
for movie in movie_list: | |
movie["title"] = { "en" : movie["title"] } | |
movie["synopsis"] = { "en" : movie["synopsis"] } | |
for language in language_mapping: | |
movie["title"][language_mapping[language]] = translate_text(movie["title"]["en"], language) | |
movie["synopsis"][language_mapping[language]] = translate_text(movie["synopsis"]["en"], language) | |
actions = [format_action(movie) for movie in movie_list] | |
success, failed = bulk(es, actions, index="idx_movies", raise_on_error=True) | |
print(f"Successfully indexed {success} documents") | |
print(f"Failed to index {failed} documents") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment