Created
April 20, 2021 12:47
-
-
Save ymoslem/576ee5751489d3f9ad11bf60230c0950 to your computer and use it in GitHub Desktop.
CTranslate2 MWE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ctranslate2 | |
def detokenize(result): | |
translation = " ".join([t for t in result]) | |
return translation | |
def tokenize(input_sentence): | |
tokens = input_sentence.split(" ") | |
return tokens | |
# Change these variables | |
model_path = "fren_ctranslate2/" | |
my_sent = "ce qui a creusé les inégalités préexistantes" | |
prefix = "this has deepended" | |
translator = ctranslate2.Translator(model_path, "cpu") # "cpu" or "cuda" | |
original_result = translator.translate_batch([tokenize(my_sent)], beam_size=5) | |
translation = detokenize(original_result[0][0]["tokens"]) | |
print(translation) | |
results = translator.translate_batch( | |
[tokenize(my_sent)], | |
target_prefix=[tokenize(prefix)], | |
num_hypotheses=10, | |
return_alternatives=True, | |
beam_size=5) | |
for hypothesis in results[0]: | |
print(detokenize(hypothesis["tokens"])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment