zoharbabin · May 23, 2023 17:14
diff --git a/kaltura_llama_esearch_object.py b/kaltura_llama_esearch_object.py
 import logging
 import sys
 from llama_index import (
    download_loader,
    GPTVectorStoreIndex,
    LLMPredictor,
    ServiceContext
 )
 from langchain.llms import OpenAI
 from KalturaClient.Plugins.Core import KalturaMediaType
 from KalturaClient.Plugins.ElasticSearch import (
    KalturaESearchSortOrder, KalturaESearchEntryOrderByFieldName, 
    KalturaESearchOrderBy, KalturaESearchEntryOrderByItem, KalturaESearchCaptionItem, 
    KalturaESearchEntryItem, KalturaESearchEntryFieldName, KalturaESearchCaptionFieldName, 
    KalturaESearchEntryParams, KalturaESearchCategoryEntryItem, KalturaESearchEntryOperator, 
    KalturaESearchOperatorType, KalturaESearchItemType, KalturaCategoryEntryStatus, KalturaESearchCategoryEntryFieldName
 )

 ## Kaltura credentials
 PARTNER_ID: int = 0
 API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 USER_ID: str = "LlamaTester"
 KS_TYPE: int = 2
 KS_EXPIRY: int = 86400
 KS_PRIVILEGES: str = "disableentitlement"
 KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/"
 REQUEST_TIMEOUT: int = 500
 SHOULD_LOG_API_CALLS: bool = True
 MAX_ENTRIES = 1 # how many entries to load (pageSize)
 CATEGORY_NAME_TO_FILTER: str = "categoryname" # <-- replace this to your category name

 logging.basicConfig(stream=sys.stdout, level=logging.WARN)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

 KalturaESearchReader = download_loader(loader_class="KalturaESearchReader", 
                                       custom_path="../llama-hub/loader_hub", 
                                       loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/")

 reader = KalturaESearchReader(
    partner_id=PARTNER_ID,
    api_secret=API_SECRET,
    user_id=USER_ID,
    ks_type=KS_TYPE,
    ks_expiry=KS_EXPIRY,
    ks_privileges=KS_PRIVILEGES,
    kaltura_api_endpoint=KALTURA_API_ENDPOINT,
    request_timeout=REQUEST_TIMEOUT,
    should_log_api_calls=SHOULD_LOG_API_CALLS
 )  # type: ignore KalturaESearchReader

 search_params = KalturaESearchEntryParams()
 # Sort the search results in descending order by entry last updated 
 search_params.orderBy = KalturaESearchOrderBy()
 search_params.orderBy.orderItems = []
 search_params.orderBy.orderItems.append(KalturaESearchEntryOrderByItem())
 search_params.orderBy.orderItems[0].sortField = KalturaESearchEntryOrderByFieldName.UPDATED_AT
 search_params.orderBy.orderItems[0].sortOrder = KalturaESearchSortOrder.ORDER_BY_DESC
 # Create an AND relationship between the following search queries - 
 search_params.searchOperator = KalturaESearchEntryOperator()
 search_params.searchOperator.operator = KalturaESearchOperatorType.AND_OP
 search_params.searchOperator.searchItems = []
 # Find only entries that have captions -
 caption_item = KalturaESearchCaptionItem()
 caption_item.fieldName = KalturaESearchCaptionFieldName.CONTENT
 caption_item.itemType = KalturaESearchItemType.EXISTS
 search_params.searchOperator.searchItems.append(caption_item)
 # Find only entries that are inside an exact category name - 
 category_item = KalturaESearchCategoryEntryItem()
 category_item.categoryEntryStatus = KalturaCategoryEntryStatus.ACTIVE
 category_item.fieldName = KalturaESearchCategoryEntryFieldName.NAME
 category_item.addHighlight = False
 category_item.itemType = KalturaESearchItemType.EXACT_MATCH
 category_item.searchTerm = CATEGORY_NAME_TO_FILTER
 search_params.searchOperator.searchItems.append(category_item)
 # Find only video entries (KalturaMediaType.VIDEO)
 entry_item = KalturaESearchEntryItem()
 entry_item.fieldName = KalturaESearchEntryFieldName.MEDIA_TYPE
 entry_item.addHighlight = False
 entry_item.itemType = KalturaESearchItemType.EXACT_MATCH
 entry_item.searchTerm = KalturaMediaType.VIDEO
 search_params.searchOperator.searchItems.append(entry_item)

 entry_docs = reader.load_data(search_params, True, 5)
 #langchain_documents = [d.to_langchain_format() for d in entry_docs]

 # LLM Predictor (gpt-3.5-turbo) + service context
 llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", streaming=True))
 service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor
 )
 index = GPTVectorStoreIndex.from_documents(entry_docs, service_context=service_context)
 query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=10
 )
 request = "the top 5 video segments where the speaker discusses the future of events in education"
 response_stream = query_engine.query("Provide a json formatted response of the following: " + request +
                              ". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." + 
                              "startTime represents the time in the video this segment begins. " + 
                              "endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " + 
                              "speakerName represents the primary speaker talking in that segment. " + 
                              "keyword represents a one-word description of the segment as a title of that segment. " )
 response_stream.print_response_stream()
diff --git a/kaltura_llama_txt_simple.py b/kaltura_llama_txt_simple.py
 import logging
 import sys
 from llama_index import GPTVectorStoreIndex, download_loader

 # Kaltura credentials
 PARTNER_ID: int = 0
 API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 USER_ID: str = "LlamaTester"
 KS_TYPE: int = 2
 KS_EXPIRY: int = 86400
 KS_PRIVILEGES: str = "disableentitlement"
 KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/"
 REQUEST_TIMEOUT: int = 500
 SHOULD_LOG_API_CALLS: bool = True
 MAX_ENTRIES = 1 # how many entries to load (pageSize)
 CATEGORY_IDS_TO_FILTER: str = "123,56,6846" # <-- replace this to your categories

 logging.basicConfig(stream=sys.stdout, level=logging.WARN)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

 KalturaESearchReader = download_loader(loader_class="KalturaESearchReader", 
                                       custom_path="../llama-hub/loader_hub", 
                                       loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/")

 reader = KalturaESearchReader(
    partner_id=PARTNER_ID,
    api_secret=API_SECRET,
    user_id=USER_ID,
    ks_type=KS_TYPE,
    ks_expiry=KS_EXPIRY,
    ks_privileges=KS_PRIVILEGES,
    kaltura_api_endpoint=KALTURA_API_ENDPOINT,
    request_timeout=REQUEST_TIMEOUT,
    should_log_api_calls=SHOULD_LOG_API_CALLS
 )  # type: ignore KalturaESearchReader

 entry_docs = reader.load_data(search_operator_and=True, 
                              free_text="education", 
                              category_ids=None, 
                              with_captions=True, 
                              max_entries=5)
 #pprint(entries)

 index = GPTVectorStoreIndex.from_documents(entry_docs)
 query_engine = index.as_query_engine()
 request = "the top 5 video segments where the speaker discusses the future of events in education"
 response = query_engine.query("Provide a json formatted response of the following: " + request +
                              ". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." + 
                              "startTime represents the time in the video this segment begins. " + 
                              "endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " + 
                              "speakerName represents the primary speaker talking in that segment. " + 
                              "keyword represents a one-word description of the segment as a title of that segment. " )
 print(response)
	import logging
	import sys
	from llama_index import (
	download_loader,
	GPTVectorStoreIndex,
	LLMPredictor,
	ServiceContext
	)
	from langchain.llms import OpenAI
	from KalturaClient.Plugins.Core import KalturaMediaType
	from KalturaClient.Plugins.ElasticSearch import (
	KalturaESearchSortOrder, KalturaESearchEntryOrderByFieldName,
	KalturaESearchOrderBy, KalturaESearchEntryOrderByItem, KalturaESearchCaptionItem,
	KalturaESearchEntryItem, KalturaESearchEntryFieldName, KalturaESearchCaptionFieldName,
	KalturaESearchEntryParams, KalturaESearchCategoryEntryItem, KalturaESearchEntryOperator,
	KalturaESearchOperatorType, KalturaESearchItemType, KalturaCategoryEntryStatus, KalturaESearchCategoryEntryFieldName
	)

	## Kaltura credentials
	PARTNER_ID: int = 0
	API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
	USER_ID: str = "LlamaTester"
	KS_TYPE: int = 2
	KS_EXPIRY: int = 86400
	KS_PRIVILEGES: str = "disableentitlement"
	KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/"
	REQUEST_TIMEOUT: int = 500
	SHOULD_LOG_API_CALLS: bool = True
	MAX_ENTRIES = 1 # how many entries to load (pageSize)
	CATEGORY_NAME_TO_FILTER: str = "categoryname" # <-- replace this to your category name

	logging.basicConfig(stream=sys.stdout, level=logging.WARN)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	KalturaESearchReader = download_loader(loader_class="KalturaESearchReader",
	custom_path="../llama-hub/loader_hub",
	loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/")

	reader = KalturaESearchReader(
	partner_id=PARTNER_ID,
	api_secret=API_SECRET,
	user_id=USER_ID,
	ks_type=KS_TYPE,
	ks_expiry=KS_EXPIRY,
	ks_privileges=KS_PRIVILEGES,
	kaltura_api_endpoint=KALTURA_API_ENDPOINT,
	request_timeout=REQUEST_TIMEOUT,
	should_log_api_calls=SHOULD_LOG_API_CALLS
	) # type: ignore KalturaESearchReader

	search_params = KalturaESearchEntryParams()
	# Sort the search results in descending order by entry last updated
	search_params.orderBy = KalturaESearchOrderBy()
	search_params.orderBy.orderItems = []
	search_params.orderBy.orderItems.append(KalturaESearchEntryOrderByItem())
	search_params.orderBy.orderItems[0].sortField = KalturaESearchEntryOrderByFieldName.UPDATED_AT
	search_params.orderBy.orderItems[0].sortOrder = KalturaESearchSortOrder.ORDER_BY_DESC
	# Create an AND relationship between the following search queries -
	search_params.searchOperator = KalturaESearchEntryOperator()
	search_params.searchOperator.operator = KalturaESearchOperatorType.AND_OP
	search_params.searchOperator.searchItems = []
	# Find only entries that have captions -
	caption_item = KalturaESearchCaptionItem()
	caption_item.fieldName = KalturaESearchCaptionFieldName.CONTENT
	caption_item.itemType = KalturaESearchItemType.EXISTS
	search_params.searchOperator.searchItems.append(caption_item)
	# Find only entries that are inside an exact category name -
	category_item = KalturaESearchCategoryEntryItem()
	category_item.categoryEntryStatus = KalturaCategoryEntryStatus.ACTIVE
	category_item.fieldName = KalturaESearchCategoryEntryFieldName.NAME
	category_item.addHighlight = False
	category_item.itemType = KalturaESearchItemType.EXACT_MATCH
	category_item.searchTerm = CATEGORY_NAME_TO_FILTER
	search_params.searchOperator.searchItems.append(category_item)
	# Find only video entries (KalturaMediaType.VIDEO)
	entry_item = KalturaESearchEntryItem()
	entry_item.fieldName = KalturaESearchEntryFieldName.MEDIA_TYPE
	entry_item.addHighlight = False
	entry_item.itemType = KalturaESearchItemType.EXACT_MATCH
	entry_item.searchTerm = KalturaMediaType.VIDEO
	search_params.searchOperator.searchItems.append(entry_item)

	entry_docs = reader.load_data(search_params, True, 5)
	#langchain_documents = [d.to_langchain_format() for d in entry_docs]

	# LLM Predictor (gpt-3.5-turbo) + service context
	llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", streaming=True))
	service_context = ServiceContext.from_defaults(
	llm_predictor=llm_predictor
	)
	index = GPTVectorStoreIndex.from_documents(entry_docs, service_context=service_context)
	query_engine = index.as_query_engine(
	streaming=True,
	similarity_top_k=10
	)
	request = "the top 5 video segments where the speaker discusses the future of events in education"
	response_stream = query_engine.query("Provide a json formatted response of the following: " + request +
	". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." +
	"startTime represents the time in the video this segment begins. " +
	"endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " +
	"speakerName represents the primary speaker talking in that segment. " +
	"keyword represents a one-word description of the segment as a title of that segment. " )
	response_stream.print_response_stream()
	import logging
	import sys
	from llama_index import GPTVectorStoreIndex, download_loader

	# Kaltura credentials
	PARTNER_ID: int = 0
	API_SECRET: str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
	USER_ID: str = "LlamaTester"
	KS_TYPE: int = 2
	KS_EXPIRY: int = 86400
	KS_PRIVILEGES: str = "disableentitlement"
	KALTURA_API_ENDPOINT: str = "https://cdnapi-ev.kaltura.com/"
	REQUEST_TIMEOUT: int = 500
	SHOULD_LOG_API_CALLS: bool = True
	MAX_ENTRIES = 1 # how many entries to load (pageSize)
	CATEGORY_IDS_TO_FILTER: str = "123,56,6846" # <-- replace this to your categories

	logging.basicConfig(stream=sys.stdout, level=logging.WARN)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	KalturaESearchReader = download_loader(loader_class="KalturaESearchReader",
	custom_path="../llama-hub/loader_hub",
	loader_hub_url="https://raw.githubusercontent.com/zoharbabin/llama-hub/main/loader_hub/")

	reader = KalturaESearchReader(
	partner_id=PARTNER_ID,
	api_secret=API_SECRET,
	user_id=USER_ID,
	ks_type=KS_TYPE,
	ks_expiry=KS_EXPIRY,
	ks_privileges=KS_PRIVILEGES,
	kaltura_api_endpoint=KALTURA_API_ENDPOINT,
	request_timeout=REQUEST_TIMEOUT,
	should_log_api_calls=SHOULD_LOG_API_CALLS
	) # type: ignore KalturaESearchReader

	entry_docs = reader.load_data(search_operator_and=True,
	free_text="education",
	category_ids=None,
	with_captions=True,
	max_entries=5)
	#pprint(entries)

	index = GPTVectorStoreIndex.from_documents(entry_docs)
	query_engine = index.as_query_engine()
	request = "the top 5 video segments where the speaker discusses the future of events in education"
	response = query_engine.query("Provide a json formatted response of the following: " + request +
	". Your json response should look like so: {startTime: xxx, endTime:zzz, speakerName: zzz, keywords: nnn} ." +
	"startTime represents the time in the video this segment begins. " +
	"endTime represents the time in the video this segment ends and another topic begins (segment can be multiple lines long). " +
	"speakerName represents the primary speaker talking in that segment. " +
	"keyword represents a one-word description of the segment as a title of that segment. " )
	print(response)