Skip to content

Instantly share code, notes, and snippets.

@funkytaco
Forked from ahoho/prompt_alpaca_lora.py
Created July 26, 2023 05:09

Revisions

  1. @ahoho ahoho revised this gist Apr 25, 2023. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions prompt_alpaca_lora.py
    Original file line number Diff line number Diff line change
    @@ -38,7 +38,7 @@ def load_adapted_hf_generation_pipeline(
    if device == "cuda":
    if not is_accelerate_available():
    raise ValueError("Install `accelerate`")
    if load_in_8bit and not is_accelerate_available():
    if load_in_8bit and not is_bitsandbytes_available():
    raise ValueError("Install `bitsandbytes`")

    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    @@ -96,7 +96,6 @@ def load_adapted_hf_generation_pipeline(
    top_p=top_p,
    **generation_kwargs,
    )
    # TODO: add generation config to pipeline
    pipe = pipeline(
    task,
    model=model,
  2. @ahoho ahoho created this gist Apr 25, 2023.
    120 changes: 120 additions & 0 deletions prompt_alpaca_lora.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,120 @@
    from typing import Optional, Any

    import torch

    from transformers.utils import is_accelerate_available, is_bitsandbytes_available
    from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    pipeline,
    )

    from peft import PeftModel

    ALPACA_TEMPLATE = (
    "Below is an instruction that describes a task, paired with an input that provides "
    "further context. Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
    )


    def load_adapted_hf_generation_pipeline(
    base_model_name,
    lora_model_name,
    temperature: float = 0,
    top_p: float = 1.,
    max_tokens: int = 50,
    batch_size: int = 16,
    device: str = "cpu",
    load_in_8bit: bool = True,
    generation_kwargs: Optional[dict] = None,
    ):
    """
    Load a huggingface model & adapt with PEFT.
    Borrowed from https://github.com/tloen/alpaca-lora/blob/main/generate.py
    """

    if device == "cuda":
    if not is_accelerate_available():
    raise ValueError("Install `accelerate`")
    if load_in_8bit and not is_accelerate_available():
    raise ValueError("Install `bitsandbytes`")

    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    task = "text-generation"

    if device == "cuda":
    model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    load_in_8bit=load_in_8bit,
    torch_dtype=torch.float16,
    device_map="auto",
    )
    model = PeftModel.from_pretrained(
    model,
    lora_model_name,
    torch_dtype=torch.float16,
    )
    elif device == "mps":
    model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map={"": device},
    torch_dtype=torch.float16,
    )
    model = PeftModel.from_pretrained(
    model,
    lora_model_name,
    device_map={"": device},
    torch_dtype=torch.float16,
    )
    else:
    model = AutoModelForCausalLM.from_pretrained(
    base_model_name, device_map={"": device}, low_cpu_mem_usage=True
    )
    model = PeftModel.from_pretrained(
    model,
    lora_model_name,
    device_map={"": device},
    )

    # unwind broken decapoda-research config
    model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
    model.config.bos_token_id = 1
    model.config.eos_token_id = 2

    if not load_in_8bit:
    model.half() # seems to fix bugs for some users.

    model.eval()

    generation_kwargs = generation_kwargs if generation_kwargs is not None else {}
    config = GenerationConfig(
    do_sample=True,
    temperature=temperature,
    max_new_tokens=max_tokens,
    top_p=top_p,
    **generation_kwargs,
    )
    # TODO: add generation config to pipeline
    pipe = pipeline(
    task,
    model=model,
    tokenizer=tokenizer,
    batch_size=16, # TODO: make a parameter
    generation_config=config,
    framework="pt",
    )

    return pipe

    if __name__ == "__main__":
    pipe = load_adapted_hf_generation_pipeline(
    base_model_name="decapoda-research/llama-7b-hf",
    lora_model_name="tloen/alpaca-lora-7b",
    )
    prompt = ALPACA_TEMPLATE.format(
    instruction="Paraphrase the sentence."
    input="The quick brown fox jumped over the lazy dog."
    )
    print(pipe(prompt))