Last active
June 3, 2024 20:22
-
-
Save gmasse/e1f99339e161f4830df6be5d0095349a to your computer and use it in GitHub Desktop.
llama-index starter tutorial with OVHCloud AI Endpoints
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
### Llama-index starter tutorial with OVHcloud AI Endpoints | |
import os | |
import requests | |
import time | |
import logging | |
import sys | |
from llama_index.llms.openai_like import OpenAILike | |
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader | |
from typing import Any, List, Optional | |
from llama_index.core.embeddings import BaseEmbedding | |
from llama_index.core.bridge.pydantic import PrivateAttr | |
#logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
""" | |
Usage: | |
OVH_AI_ENDPOINTS_ACCESS_TOKEN="your-token" python3 llama-index_starter.py | |
NB: Make sure you are using a valid token. In the contrary, document indexing will be long due to rate-limite | |
""" | |
class OVHcloudAIEEmbeddings(BaseEmbedding): | |
_api_key: str = PrivateAttr() | |
_api_base: str = PrivateAttr() | |
def __init__( | |
self, | |
api_key: Optional[str] = None, | |
api_base: str = "https://multilingual-e5-base.endpoints.kepler.ai.cloud.ovh.net/api/text2vec", | |
**kwargs: Any, | |
) -> None: | |
self._api_key = api_key or os.environ.get("OVH_AI_ENDPOINTS_ACCESS_TOKEN", None) | |
self._api_base = api_base | |
super().__init__(**kwargs) | |
@classmethod | |
def class_name(cls) -> str: | |
return "ovhcloud ai endpoints embedding" | |
def _generate_embedding(self, text: str) -> List[float]: | |
"""Generate embeddings from OVHCLOUD AIE. | |
Args: | |
text: str. An input text sentence or document. | |
Returns: | |
embeddings: a list of float numbers. Embeddings correspond to your given text. | |
""" | |
headers = { | |
"content-type": "text/plain", | |
"Authorization": f"Bearer {self._api_key}", | |
} | |
session = requests.session() | |
while True: | |
response = session.post( | |
self._api_base, | |
headers=headers, | |
data=text, | |
) | |
if response.status_code != 200: | |
if response.status_code == 429: | |
"""Rate limit exceeded, wait for reset""" | |
reset_time = int(response.headers.get("RateLimit-Reset", 0)) | |
logging.info("Rate limit exceeded. Waiting %d seconds.", reset_time) | |
if reset_time > 0: | |
time.sleep(reset_time) | |
continue | |
else: | |
"""Rate limit reset time has passed, retry immediately""" | |
continue | |
""" Handle other non-200 status codes """ | |
raise ValueError( | |
f"Request failed with status code {response.status_code}: {response.text}" | |
) | |
return response.json() | |
async def _aget_query_embedding(self, query: str) -> List[float]: | |
return self._get_query_embedding(query) | |
async def _aget_text_embedding(self, text: str) -> List[float]: | |
return self._get_text_embedding(text) | |
def _get_text_embedding(self, text: str) -> List[float]: | |
"""Get text embedding.""" | |
return self._generate_embedding(text) | |
def _get_query_embedding(self, query: str) -> List[float]: | |
"""Get query embedding.""" | |
return self._generate_embedding(query) | |
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: | |
"""Get text embeddings.""" | |
return [self._generate_embedding(text) for text in texts] | |
Settings.embed_model = OVHcloudAIEEmbeddings() | |
Settings.chunk_size = 512 | |
Settings.context_window = 4096 | |
Settings.num_output = 256 | |
Settings.llm = OpenAILike(model="Mixtral-8x7B-Instruct-v0.1", api_base="https://mixtral-8x7b-instruct-v01.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1", api_key=os.environ.get("OVH_AI_ENDPOINTS_ACCESS_TOKEN", None), temperature=0.1, max_tokens=Settings.num_output) | |
# mkdir data | |
# curl --output-dir data -O https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt | |
documents = SimpleDirectoryReader("data").load_data() | |
index = VectorStoreIndex.from_documents(documents) | |
response = index.as_query_engine().query("What did the author do growing up?") | |
print(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment