Created
September 26, 2023 17:21
-
-
Save imartinez/4a826c1f7b7738c9bce8f6d3caba8045 to your computer and use it in GitHub Desktop.
FastAPI streaming local Llama 2 GGUF LLM using LLamaIndex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import uvicorn | |
from contextlib import asynccontextmanager | |
from fastapi import FastAPI | |
from fastapi.responses import StreamingResponse | |
from typing import AsyncGenerator | |
from llama_index.llms import LlamaCPP | |
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt | |
llms = {} | |
@asynccontextmanager | |
async def lifespan(app: FastAPI): | |
# Make sure the model path is correct for your system! | |
llms["llama"] = LlamaCPP( | |
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf", | |
temperature=0.1, | |
max_new_tokens=256, | |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
context_window=3900, | |
# kwargs to pass to __call__() | |
generate_kwargs={}, | |
# set to at least 1 to use GPU | |
model_kwargs={"n_gpu_layers": 1}, | |
# transform inputs into Llama2 format | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
verbose=True, | |
) | |
yield | |
app = FastAPI(lifespan=lifespan) | |
def run_llm(question: str) -> AsyncGenerator: | |
llm : LlamaCPP = llms["llama"] | |
response_iter = llm.stream_complete(question) | |
for response in response_iter: | |
yield f"data: {response.delta}\n\n" | |
@app.get("/") | |
async def root(question: str) -> StreamingResponse: | |
return StreamingResponse(run_llm(question), media_type="text/event-stream") | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=8000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment