Last active
September 18, 2022 03:03
-
-
Save lantiga/eea9db320f81de6031c261ead1382464 to your computer and use it in GitHub Desktop.
🤗 Huggingface Bert on RedisAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import BertForQuestionAnswering | |
import torch | |
bert_name = "bert-large-uncased-whole-word-masking-finetuned-squad" | |
model = BertForQuestionAnswering.from_pretrained(bert_name, torchscript=True) | |
model.eval() | |
inputs = [torch.ones(1, 2, dtype=torch.int64), | |
torch.ones(1, 2, dtype=torch.int64), | |
torch.ones(1, 2, dtype=torch.int64)] | |
with torch.no_grad(): | |
traced_model = torch.jit.trace(model, inputs) | |
torch.jit.save(traced_model, "traced_bert_qa.pt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redisai | |
r = redisai.Client() | |
model_file = 'traced_bert_qa.pt' | |
with open(model_file, 'rb') as f: | |
model = f.read() | |
chunk_size = 500 * 1024 * 1024 | |
model_chunks = [model[i:i + chunk_size] for i in range(0, len(model), chunk_size)] | |
r.modelset('bert-qa', 'TORCH', 'CPU', model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redisai | |
from transformers import AutoTokenizer | |
import numpy as np | |
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") | |
r = redisai.Client() | |
text = r""" | |
At a very high level, one of the most critical steps in any ML pipeline is called AI serving, a task usually performed by an AI inference engine. The AI inference engine is responsible for the model deployment and performance monitoring steps in the figure above, and represents a whole new world that will eventually determine whether applications can use AI technologies to improve operational efficiencies and solve real business problems. | |
""" | |
questions = [ | |
"What is the most critical step in any ML pipeline?", | |
"What is AI serving?", | |
"What is an AI inference engine?", | |
] | |
for question in questions: | |
inputs = tokenizer.encode_plus(question, text, add_special_tokens=True, return_tensors="pt") | |
input_ids = inputs['input_ids'].numpy() | |
attention_mask = inputs['attention_mask'].numpy() | |
token_type_ids = inputs['token_type_ids'].numpy() | |
r.tensorset('input_ids', input_ids) | |
r.tensorset('attention_mask', attention_mask) | |
r.tensorset('token_type_ids', token_type_ids) | |
r.modelrun('bert-qa', ['input_ids', 'attention_mask', 'token_type_ids'], | |
['answer_start_scores', 'answer_end_scores']) | |
answer_start_scores = r.tensorget('answer_start_scores') | |
answer_end_scores = r.tensorget('answer_end_scores') | |
answer_start = np.argmax(answer_start_scores) | |
answer_end = np.argmax(answer_end_scores) + 1 | |
input_ids = inputs["input_ids"].tolist()[0] | |
output_tokens = tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]) | |
answer = tokenizer.convert_tokens_to_string(output_tokens) | |
print(f"Question: {question}") | |
print(f"Answer: {answer}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment