Skip to content

Instantly share code, notes, and snippets.

@ymoslem
Created April 20, 2021 12:47
Show Gist options
  • Save ymoslem/576ee5751489d3f9ad11bf60230c0950 to your computer and use it in GitHub Desktop.
Save ymoslem/576ee5751489d3f9ad11bf60230c0950 to your computer and use it in GitHub Desktop.
CTranslate2 MWE
import ctranslate2
def detokenize(result):
translation = " ".join([t for t in result])
return translation
def tokenize(input_sentence):
tokens = input_sentence.split(" ")
return tokens
# Change these variables
model_path = "fren_ctranslate2/"
my_sent = "ce qui a creusé les inégalités préexistantes"
prefix = "this has deepended"
translator = ctranslate2.Translator(model_path, "cpu") # "cpu" or "cuda"
original_result = translator.translate_batch([tokenize(my_sent)], beam_size=5)
translation = detokenize(original_result[0][0]["tokens"])
print(translation)
results = translator.translate_batch(
[tokenize(my_sent)],
target_prefix=[tokenize(prefix)],
num_hypotheses=10,
return_alternatives=True,
beam_size=5)
for hypothesis in results[0]:
print(detokenize(hypothesis["tokens"]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment