Skip to content

Instantly share code, notes, and snippets.

@merrymercy
Last active August 21, 2022 18:24
Show Gist options
  • Save merrymercy/e6ea094713b79763c86af8b41d119edd to your computer and use it in GitHub Desktop.
Save merrymercy/e6ea094713b79763c86af8b41d119edd to your computer and use it in GitHub Desktop.
test padding in the middle
"""Use huggingface/transformers interface and Alpa backend for distributed inference."""
from transformers import AutoTokenizer
from opt_serving.model.wrapper import get_model
import numpy as np
import torch
# Load the tokenizer. We have to use the 30B version because
# other versions have some issues. The 30B version works for all OPT models.
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-30b", use_fast=False)
tokenizer.add_bos_token = False
generate_params = {"do_sample": False, "num_beams": 1, "num_return_sequences": 1}
# Load the model
model_bs4 = get_model(model_name="alpa/opt-2.7b",
path="/home/ubuntu/opt_weights",
batch_size=4,
**generate_params)
model_bs1 = get_model(model_name="alpa/opt-2.7b",
path="/home/ubuntu/opt_weights",
batch_size=1,
**generate_params)
# Padding
prompts = [
"Paris is the capital city of",
"Today is a good day and I'd like to",
"Computer Science studies the area of",
"University of California Berkeley is a public university"
]
left = []
right = []
for i in range(len(prompts)):
tokens = prompts[i].split(" ")
left.append(" ".join(tokens[:i+1]) + " ")
right.append(" ".join(tokens[i+1:]))
tokenizer.padding_side = "right"
left = tokenizer(left, return_tensors="pt", padding="longest").input_ids
tokenizer.padding_side = "left"
right = tokenizer(right, return_tensors="pt", padding="longest").input_ids
input_ids = torch.cat((left, right), dim=-1)
# Generate
padded_outputs = model_bs4.generate(input_ids=input_ids,
max_length=64,
**generate_params)
padded_outputs = tokenizer.batch_decode(padded_outputs, skip_special_tokens=True)
# Generate w/o padding
outputs = []
for i in range(len(prompts)):
input_ids = tokenizer(prompts[i], return_tensors="pt", padding="longest").input_ids
out = model_bs1.generate(input_ids=input_ids, max_length=64, **generate_params)
out = tokenizer.batch_decode(out, skip_special_tokens=True)
outputs.append(out[0])
for i in range(len(prompts)):
print("-" * 100)
print(" = w/o padding = ")
print(outputs[i])
print(" = w/ padding = ")
print(padded_outputs[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment