Last active
January 18, 2023 00:21
-
-
Save ymoslem/30a56b6c0e9ada2313c223038c8b2b14 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ctranslate2 | |
# Replace with your tokenize function and source tokenization model | |
def tokenize(input_sentences): | |
tokens = [input_sentence.split(" ") for input_sentence in input_sentences] | |
return tokens | |
# Replace with your detokenize function and target tokenization model | |
def detokenize(outputs): | |
translation = [" ".join([t for t in output]) for output in outputs] | |
return translation | |
# Modify the path to the CTranslate2 model directory | |
model_path = "ctranslate2_model" | |
source_sentences = ["how are you?", "fine, thanks!", "everything is great.", "I am happy to know that."] | |
translator = ctranslate2.Translator(model_path, "cpu") # "cpu" or "cuda" | |
outputs = translator.translate_batch(tokenize(source_sentences), beam_size=5) | |
translations = detokenize([output.hypotheses[0] for output in outputs]) | |
print(*translations, sep="\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment