Last active
August 1, 2023 13:36
-
-
Save John-Lin/05e2e9e36c692e8ab9c10b413dfd9cae to your computer and use it in GitHub Desktop.
LangChain with Llama2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.llms import LlamaCpp | |
from langchain import PromptTemplate, LLMChain | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
template_cot = """Question: {question} | |
Answer: Let's work this out in a step by step way to be sure we have the right answer.""" | |
template_zero_shot = """{question}""" | |
prompt = PromptTemplate(template=template_zero_shot, input_variables=["question"]) | |
# Callbacks support token-wise streaming | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Verbose is required to pass to the callback manager | |
n_gpu_layers = 1 # Metal set to 1 is enough. | |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip. | |
# Make sure the model path is correct for your system! | |
llm = LlamaCpp( | |
model_path="./models/7B/ggml-model-q4_0.bin", | |
temperature=0.75, | |
n_gpu_layers=n_gpu_layers, | |
n_batch=n_batch, | |
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls | |
max_tokens=1024, | |
callback_manager=callback_manager, | |
verbose=True, | |
) # type: ignore | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
question = "I went to the market and bought 10 apples. I gave 2 apples to the neighbor and 2 to the repairman. I then went and bought 5 more apples and ate 1. How many apples did I remain with?" | |
print(llm_chain.run(question)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When switching PromptTemplate's template from
template_zero_shot
totemplate_cot
, the CoT (Chain-of-Thought) capability will be demonstrated.