Last active
May 23, 2023 17:14
-
-
Save zoharbabin/df9dc4d6ce345b80cef6ff134a2c05b8 to your computer and use it in GitHub Desktop.
Sample code to use the LlamaIndex KalturaESearchReader - https://github.com/emptycrown/llama-hub/pull/286
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import sys | |
from llama_index import ( | |
download_loader, | |
GPTVectorStoreIndex, | |
LLMPredictor, | |
ServiceContext | |
) | |
from langchain.llms import OpenAI | |
from KalturaClient.Plugins.Core import KalturaMediaType | |
from KalturaClient.Plugins.ElasticSearch import ( | |
KalturaESearchSortOrder, KalturaESearchEntryOrderByFieldName, | |
KalturaESearchOrderBy, KalturaESearchEntryOrderByItem, KalturaESearchCaptionItem, | |
KalturaESearchEntryItem, KalturaESearchEntryFieldName, KalturaESearchCaptionFieldName, | |
KalturaESearchEntryParams, KalturaESearchCategoryEntryItem, KalturaESearchEntryOperator, | |
KalturaESearchOperatorType, KalturaESearchItemType, KalturaCategoryEntryStatus, KalturaESearchCategoryEntryFieldName | |
) | |
## Kaltura credentials | |
PARTNER_ID: int = 0 | |
API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" | |
USER_ID: str = "LlamaTester" | |
KS_TYPE: int = 2 | |
KS_EXPIRY: int = 86400 | |
KS_PRIVILEGES: str = "disableentitlement" | |
KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/" | |
REQUEST_TIMEOUT: int = 500 | |
SHOULD_LOG_API_CALLS: bool = True | |
MAX_ENTRIES = 1 # how many entries to load (pageSize) | |
CATEGORY_NAME_TO_FILTER: str = "categoryname" # <-- replace this to your category name | |
logging.basicConfig(stream=sys.stdout, level=logging.WARN) | |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
KalturaESearchReader = download_loader(loader_class="KalturaESearchReader", | |
custom_path="../llama-hub/loader_hub", | |
loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/") | |
reader = KalturaESearchReader( | |
partner_id=PARTNER_ID, | |
api_secret=API_SECRET, | |
user_id=USER_ID, | |
ks_type=KS_TYPE, | |
ks_expiry=KS_EXPIRY, | |
ks_privileges=KS_PRIVILEGES, | |
kaltura_api_endpoint=KALTURA_API_ENDPOINT, | |
request_timeout=REQUEST_TIMEOUT, | |
should_log_api_calls=SHOULD_LOG_API_CALLS | |
) # type: ignore KalturaESearchReader | |
search_params = KalturaESearchEntryParams() | |
# Sort the search results in descending order by entry last updated | |
search_params.orderBy = KalturaESearchOrderBy() | |
search_params.orderBy.orderItems = [] | |
search_params.orderBy.orderItems.append(KalturaESearchEntryOrderByItem()) | |
search_params.orderBy.orderItems[0].sortField = KalturaESearchEntryOrderByFieldName.UPDATED_AT | |
search_params.orderBy.orderItems[0].sortOrder = KalturaESearchSortOrder.ORDER_BY_DESC | |
# Create an AND relationship between the following search queries - | |
search_params.searchOperator = KalturaESearchEntryOperator() | |
search_params.searchOperator.operator = KalturaESearchOperatorType.AND_OP | |
search_params.searchOperator.searchItems = [] | |
# Find only entries that have captions - | |
caption_item = KalturaESearchCaptionItem() | |
caption_item.fieldName = KalturaESearchCaptionFieldName.CONTENT | |
caption_item.itemType = KalturaESearchItemType.EXISTS | |
search_params.searchOperator.searchItems.append(caption_item) | |
# Find only entries that are inside an exact category name - | |
category_item = KalturaESearchCategoryEntryItem() | |
category_item.categoryEntryStatus = KalturaCategoryEntryStatus.ACTIVE | |
category_item.fieldName = KalturaESearchCategoryEntryFieldName.NAME | |
category_item.addHighlight = False | |
category_item.itemType = KalturaESearchItemType.EXACT_MATCH | |
category_item.searchTerm = CATEGORY_NAME_TO_FILTER | |
search_params.searchOperator.searchItems.append(category_item) | |
# Find only video entries (KalturaMediaType.VIDEO) | |
entry_item = KalturaESearchEntryItem() | |
entry_item.fieldName = KalturaESearchEntryFieldName.MEDIA_TYPE | |
entry_item.addHighlight = False | |
entry_item.itemType = KalturaESearchItemType.EXACT_MATCH | |
entry_item.searchTerm = KalturaMediaType.VIDEO | |
search_params.searchOperator.searchItems.append(entry_item) | |
entry_docs = reader.load_data(search_params, True, 5) | |
#langchain_documents = [d.to_langchain_format() for d in entry_docs] | |
# LLM Predictor (gpt-3.5-turbo) + service context | |
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", streaming=True)) | |
service_context = ServiceContext.from_defaults( | |
llm_predictor=llm_predictor | |
) | |
index = GPTVectorStoreIndex.from_documents(entry_docs, service_context=service_context) | |
query_engine = index.as_query_engine( | |
streaming=True, | |
similarity_top_k=10 | |
) | |
request = "the top 5 video segments where the speaker discusses the future of events in education" | |
response_stream = query_engine.query("Provide a json formatted response of the following: " + request + | |
". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." + | |
"startTime represents the time in the video this segment begins. " + | |
"endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " + | |
"speakerName represents the primary speaker talking in that segment. " + | |
"keyword represents a one-word description of the segment as a title of that segment. " ) | |
response_stream.print_response_stream() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import sys | |
from llama_index import GPTVectorStoreIndex, download_loader | |
# Kaltura credentials | |
PARTNER_ID: int = 0 | |
API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" | |
USER_ID: str = "LlamaTester" | |
KS_TYPE: int = 2 | |
KS_EXPIRY: int = 86400 | |
KS_PRIVILEGES: str = "disableentitlement" | |
KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/" | |
REQUEST_TIMEOUT: int = 500 | |
SHOULD_LOG_API_CALLS: bool = True | |
MAX_ENTRIES = 1 # how many entries to load (pageSize) | |
CATEGORY_IDS_TO_FILTER: str = "123,56,6846" # <-- replace this to your categories | |
logging.basicConfig(stream=sys.stdout, level=logging.WARN) | |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
KalturaESearchReader = download_loader(loader_class="KalturaESearchReader", | |
custom_path="../llama-hub/loader_hub", | |
loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/") | |
reader = KalturaESearchReader( | |
partner_id=PARTNER_ID, | |
api_secret=API_SECRET, | |
user_id=USER_ID, | |
ks_type=KS_TYPE, | |
ks_expiry=KS_EXPIRY, | |
ks_privileges=KS_PRIVILEGES, | |
kaltura_api_endpoint=KALTURA_API_ENDPOINT, | |
request_timeout=REQUEST_TIMEOUT, | |
should_log_api_calls=SHOULD_LOG_API_CALLS | |
) # type: ignore KalturaESearchReader | |
entry_docs = reader.load_data(search_operator_and=True, | |
free_text="education", | |
category_ids=None, | |
with_captions=True, | |
max_entries=5) | |
#pprint(entries) | |
index = GPTVectorStoreIndex.from_documents(entry_docs) | |
query_engine = index.as_query_engine() | |
request = "the top 5 video segments where the speaker discusses the future of events in education" | |
response = query_engine.query("Provide a json formatted response of the following: " + request + | |
". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." + | |
"startTime represents the time in the video this segment begins. " + | |
"endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " + | |
"speakerName represents the primary speaker talking in that segment. " + | |
"keyword represents a one-word description of the segment as a title of that segment. " ) | |
print(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment