Skip to content

Instantly share code, notes, and snippets.

@tspannhw
Last active July 4, 2024 15:29
Show Gist options
  • Select an option

  • Save tspannhw/93fbc60c03a2933ba814118503636698 to your computer and use it in GitHub Desktop.

Select an option

Save tspannhw/93fbc60c03a2933ba814118503636698 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "eb201b96-3bd6-4f54-9f3e-0de2aa1a5b2b",
"metadata": {},
"source": [
"## 03-July-2024 == BM25 with Tim slides\n",
"\n",
"#### Tim Spann @PaaSDev\n",
"\n",
"### Milvus - Attu\n",
"\n",
"![milvuslogo](https://milvus.io/images/milvus_logo.svg)\n",
"\n",
"\n",
"### CODE + COMMUNITY\n",
"\n",
"Please join my meetup group NJ/NYC/Philly/Virtual. \n",
"\n",
"[https://www.meetup.com/unstructured-data-meetup-new-york/](https://www.meetup.com/unstructured-data-meetup-new-york/)\n",
"\n",
"\n",
"#### Contact Us\n",
"\n",
"Get Milvused! [https://milvus.io/](https://milvus.io/)\n",
"\n",
"Read my Newsletter every week! [https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md](https://github.com/tspannhw/FLiPStackWeekly/blob/main/142-17June2024.md)\n",
"\n",
"For more cool Unstructured Data, AI and Vector Database videos check out the Milvus vector database videos here\n",
"[https://www.youtube.com/@MilvusVectorDatabase/videos](https://www.youtube.com/@MilvusVectorDatabase/videos)\n",
"\n",
"#### Unstructured Data Meetups \n",
"\n",
"[https://www.meetup.com/pro/unstructureddata/](https://www.meetup.com/pro/unstructureddata/)\n",
"[https://zilliz.com/community/unstructured-data-meetup](https://zilliz.com/community/unstructured-data-meetup)\n",
"[https://zilliz.com/event](https://zilliz.com/event)\n",
"\n",
"#### [https://x.com/milvusio](Twitter/X) \n",
"\n",
"#### [https://www.linkedin.com/company/zilliz/](LinkedIn)\n",
"\n",
"#### [https://discord.com/invite/FjCMmaJng6](Discord)\n",
"\n",
"#### [https://milvusio.medium.com/](Blog)\n",
"\n",
"#### Please star our [https://github.com/milvus-io/milvus](Github)\n",
"\n",
"#### [https://www.youtube.com/@FLaNK-Stack](Youtube)\n",
"\n",
"#### [https://medium.com/@tspann/subscribe](Blog)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "4deca7a7-2817-4f18-b6ab-d9397b3735cb",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pymilvus import MilvusClient\n",
"from pymilvus import (\n",
" utility,\n",
" FieldSchema, CollectionSchema, DataType,\n",
" Collection, AnnSearchRequest, RRFRanker, connections,\n",
")\n",
"from pymilvus.model.sparse.bm25.tokenizers import build_default_analyzer\n",
"from pymilvus.model.sparse import BM25EmbeddingFunction\n",
"from pymilvus import model\n",
"\n",
"DIMENSION = 64 \n",
"MILVUS_URL = \"http://192.168.1.163:19530\" \n",
"COLLECTION_NAME = \"traveladvisories\"\n",
"TRAVEL_URL = \"https://travel.state.gov/_res/rss/TAsTWs.xml\"\n",
"\n",
"# -----------------------------------------------------------------------------\n",
"# Connect to Milvus\n",
"\n",
"# Local Docker Server\n",
"milvus_client = MilvusClient( uri=MILVUS_URL )"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "e4cd2db0-3d15-4980-a3b7-40634755402e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'state': <LoadState: NotLoad>}\n"
]
}
],
"source": [
"from pymilvus import connections\n",
"from pymilvus import utility\n",
"from pymilvus import FieldSchema, CollectionSchema, DataType, Collection\n",
"import pprint\n",
"\n",
"## schema\n",
"schema = milvus_client.create_schema(auto_id=True, enable_dynamic_fields=True)\n",
"schema.add_field(field_name=\"pk\", datatype=DataType.VARCHAR, is_primary=True, max_length=100)\n",
"schema.add_field(field_name=\"title\", datatype=DataType.VARCHAR, max_length=512)\n",
"schema.add_field(field_name=\"link\", datatype=DataType.VARCHAR, max_length=512)\n",
"schema.add_field(field_name=\"summary\", datatype=DataType.VARCHAR, max_length=50000)\n",
"schema.add_field(field_name=\"publisheddate\", datatype=DataType.VARCHAR, max_length=100)\n",
"schema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)\n",
"\n",
"## index\n",
"##index_params = milvus_client.prepare_index_params()\n",
"\n",
"##index_params.add_index(\n",
"## field_name=\"summaryvector\",\n",
"## index_type=\"SPARSE_INVERTED_INDEX\",\n",
"## metric_type=\"IP\"\n",
"##)\n",
"# index_params=index_params\n",
"## create collection\n",
"milvus_client.create_collection(\n",
" collection_name = COLLECTION_NAME,\n",
" schema=schema\n",
")\n",
"\n",
"res = milvus_client.get_load_state(\n",
" collection_name = COLLECTION_NAME\n",
")\n",
"\n",
"print(res)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "763d98dd-cc2b-4d56-9461-bbfec7b39194",
"metadata": {},
"outputs": [],
"source": [
"# Build Corpous\n",
"\n",
"analyzer = build_default_analyzer(language=\"en\") \n",
"\n",
"# Create corpus based samples from documents\n",
"corpus = [\n",
" \"Reissued after periodic review without changes. Exercise normal precautions in Bhutan. Read the country information page for additional information on travel to Bhutan. If you decide to travel to Bhutan: Enroll in the Smart Traveler Enrollment Program ( STEP ) to receive Alerts and make it easier to locate you in an emergency. Follow the Department of State on Facebook and Twitter . Review the Country Security Report for Bhutan. Visit the CDC page for the latest Travel Health Information related to your travel. Prepare a contingency plan for emergency situations. Review the Traveler’s Checklist\",\n",
" \"Reissued with obsolete COVID-19 page links removed\",\n",
" \"Exercise increased caution in Tajikistan due to terrorism, unexploded landmines, and occasional violence near the border with Kyrgyzstan\",\n",
" \"Prepare a contingency plan for emergency situations. Review the Traveler’s Checklist\",\n",
" \"If you decide to travel to Zambia: Enroll in the Smart Traveler Enrollment Program ( STEP ) to receive Alerts and make it easier to locate you in an emergency. Follow the Department of State on Facebook and Twitter . Follow the U.S. Embassy in Zambia on Facebook and Twitter . Review the Country Security Report for Zambia. Prepare a contingency plan for emergency situations. Review the Traveler’s Checklist . Visit the CDC page for the latest Travel Health Information related to your travel\",\n",
" \"Exercise normal precautions in Barbados. Read the country information page for additional information on travel to Barbados. If you decide to travel to Barbados\",\n",
"]\n",
"\n",
"# Use the analyzer to instantiate the BM25EmbeddingFunction\n",
"bm25_ef = BM25EmbeddingFunction(analyzer)\n",
"\n",
"# Fit the model on the corpus to get the statstics of the corpus\n",
"bm25_ef.fit(corpus)\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "a7a384ba-86fe-4df7-8086-d50a4158f271",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Bhutan - Level 1: Exercise Normal Precautions\n",
"Senegal - Level 1: Exercise Normal Precautions\n",
"Tajikistan - Level 2: Exercise Increased Caution\n",
"Iceland - Level 1: Exercise Normal Precautions\n",
"Mainland China, Hong Kong & Macau - See Summaries - Level 3: Reconsider Travel\n",
"Zambia - Level 1: Exercise Normal Precautions\n",
"Armenia - Level 2: Exercise Increased Caution\n",
"Syria - Level 4: Do Not Travel\n",
"Bolivia - Level 2: Exercise Increased Caution\n",
"Taiwan - Level 1: Exercise Normal Precautions\n",
"Rwanda - Level 1: Exercise Normal Precautions\n",
"Uruguay - Level 2: Exercise Increased Caution\n",
"Barbados - Level 1: Exercise Normal Precautions\n",
"North Korea - Level 4: Do Not Travel\n",
"Chad - Level 3: Reconsider Travel\n",
"Hungary - Level 1: Exercise Normal Precautions\n",
"Malta - Level 1: Exercise Normal Precautions\n",
"Burma (Myanmar) - Level 4: Do Not Travel\n",
"Qatar - Level 1: Exercise Normal Precautions\n",
"Saudi Arabia - Level 3: Reconsider Travel\n",
"Estonia - Level 1: Exercise Normal Precautions\n",
"British Virgin Islands - Level 1: Exercise Normal Precautions\n",
"The Bahamas - Level 2: Exercise Increased Caution\n",
"Montenegro - Level 1: Exercise Normal Precautions\n",
"Argentina - Level 1: Exercise Normal Precautions\n",
"Belgium - Level 2: Exercise Increased Caution\n",
"Jordan - Level 2: Exercise Increased Caution\n",
"Sudan - Level 4: Do Not Travel\n",
"Cuba - Level 2: Exercise Increased Caution\n",
"Slovakia - Level 1: Exercise Normal Precautions\n",
"Guinea-Bissau - Level 3: Reconsider Travel\n",
"Cyprus - Level 1: Exercise Normal Precautions\n",
"Eritrea - Level 2: Exercise Increased Caution\n",
"Morocco - Level 2: Exercise Increased Caution\n",
"Germany - Level 2: Exercise Increased Caution\n",
"Jamaica - Level 3: Reconsider Travel\n",
"Algeria - Level 2: Exercise Increased Caution\n",
"Paraguay - Level 1: Exercise Normal Precautions\n",
"Andorra - Level 1: Exercise Normal Precautions\n",
"Lesotho - Level 1: Exercise Normal Precautions\n",
"Dominica - Level 1: Exercise Normal Precautions\n",
"French West Indies - Level 1: Exercise Normal Precautions\n",
"Turkey - Level 2: Exercise Increased Caution\n",
"Czech Republic - Level 1: Exercise Normal Precautions\n",
"Portugal - Level 1: Exercise Normal Precautions\n",
"Afghanistan - Level 4: Do Not Travel\n",
"Suriname - Level 1: Exercise Normal Precautions\n",
"Guatemala - Level 3: Reconsider Travel\n",
"Curaçao - Level 1: Exercise Normal Precautions\n",
"Oman - Level 2: Exercise Increased Caution\n",
"Brazil - Level 2: Exercise Increased Caution\n",
"Austria - Level 1: Exercise Normal Precautions\n",
"Azerbaijan - Level 2: Exercise Increased Caution\n",
"Cameroon - Level 2: Exercise Increased Caution\n",
"Colombia - Level 3: Reconsider Travel\n",
"Georgia - Level 1: Exercise Normal Precautions\n",
"Saint Vincent and the Grenadines - Level 1: Exercise Normal Precautions\n",
"Chile - Level 2: Exercise Increased Caution\n",
"Canada - Level 1: Exercise Normal Precautions\n",
"Belarus - Level 4: Do Not Travel\n",
"Angola - Level 2: Exercise Increased Caution\n",
"Luxembourg - Level 1: Exercise Normal Precautions\n",
"Tuvalu - Level 1: Exercise Normal Precautions\n",
"Kiribati - Level 1: Exercise Normal Precautions\n",
"Zimbabwe - Level 2: Exercise Increased Caution\n",
"Anguilla - Level 1: Exercise Normal Precautions\n",
"North Macedonia - Level 1: Exercise Normal Precautions\n",
"Japan - Level 1: Exercise Normal Precautions\n",
"Bangladesh - Level 2: Exercise Increased Caution\n",
"Ghana - Level 2: Exercise Increased Caution\n",
"Aruba - Level 1: Exercise Normal Precautions\n",
"Sweden - Level 2: Exercise Increased Caution\n",
"French Guiana - Level 1: Exercise Normal Precautions\n",
"Saint Kitts and Nevis - Level 1: Exercise Normal Precautions\n",
"Gabon - Level 2: Exercise Increased Caution\n",
"Mongolia - Level 1: Exercise Normal Precautions\n",
"El Salvador - Level 3: Reconsider Travel\n",
"Madagascar - Level 2: Exercise Increased Caution\n",
"Poland - Level 1: Exercise Normal Precautions\n",
"Mauritius - Level 1: Exercise Normal Precautions\n",
"Moldova - Level 2: Exercise Increased Caution\n",
"Namibia - Level 2: Exercise Increased Caution\n",
"Nigeria - Level 3: Reconsider Travel\n",
"Tunisia - Level 2: Exercise Increased Caution\n",
"Maldives - Level 2: Exercise Increased Caution\n",
"Greece - Level 1: Exercise Normal Precautions\n",
"Central African Republic - Level 4: Do Not Travel\n",
"Somalia - Level 4: Do Not Travel\n",
"Grenada - Level 1: Exercise Normal Precautions\n",
"Norway - Level 1: Exercise Normal Precautions\n",
"Tanzania - Level 2: Exercise Increased Caution\n",
"Tonga - Level 1: Exercise Normal Precautions\n",
"South Sudan - Level 4: Do Not Travel\n",
"Ukraine - Level 4: Do Not Travel\n",
"Ireland - Level 1: Exercise Normal Precautions\n",
"Palau - Level 1: Exercise Normal Precautions\n",
"Russia - Level 4: Do Not Travel\n",
"Sao Tome and Principe - Level 1: Exercise Normal Precautions\n",
"Antarctica - Level 2: Exercise Increased Caution\n",
"Democratic Republic of the Congo - Level 3: Reconsider Travel\n",
"Ecuador - Level 2: Exercise Increased Caution\n",
"Indonesia - Level 2: Exercise Increased Caution\n",
"Latvia - Level 1: Exercise Normal Precautions\n",
"Philippines - Level 2: Exercise Increased Caution\n",
"Marshall Islands - Level 1: Exercise Normal Precautions\n",
"Togo - Level 1: Exercise Normal Precautions\n",
"Uganda - Level 3: Reconsider Travel\n",
"Finland - Level 1: Exercise Normal Precautions\n",
"Croatia - Level 1: Exercise Normal Precautions\n",
"United Kingdom - Level 2: Exercise Increased Caution\n",
"Turks and Caicos Islands - Level 2: Exercise Increased Caution\n",
"Nicaragua - Level 3: Reconsider Travel\n",
"Cambodia - Level 1: Exercise Normal Precautions\n",
"Bosnia and Herzegovina - Level 2: Exercise Increased Caution\n",
"Dominican Republic - Level 2: Exercise Increased Caution\n",
"Spain - Level 2: Exercise Increased Caution\n",
"Liechtenstein - Level 1: Exercise Normal Precautions\n",
"Brunei - Level 1: Exercise Normal Precautions\n",
"Uzbekistan - Level 1: Exercise Normal Precautions\n",
"Kenya - Level 2: Exercise Increased Caution\n",
"Saint Lucia - Level 1: Exercise Normal Precautions\n",
"Benin - Level 2: Exercise Increased Caution\n",
"Papua New Guinea - Level 3: Reconsider Travel\n",
"Slovenia - Level 1: Exercise Normal Precautions\n",
"Sri Lanka - Level 2: Exercise Increased Caution\n",
"French Polynesia - Level 1: Exercise Normal Precautions\n",
"Vietnam - Level 1: Exercise Normal Precautions\n",
"Antigua and Barbuda - Level 1: Exercise Normal Precautions\n",
"Burkina Faso - Level 4: Do Not Travel\n",
"Switzerland - Level 1: Exercise Normal Precautions\n",
"See Individual Summaries -\n",
"Singapore - Level 1: Exercise Normal Precautions\n",
"Iraq - Level 4: Do Not Travel\n",
"Albania - Level 2: Exercise Increased Caution\n",
"The Gambia - Level 2: Exercise Increased Caution\n",
"Netherlands - Level 2: Exercise Increased Caution\n",
"Nauru - Level 1: Exercise Normal Precautions\n",
"Seychelles - Level 1: Exercise Normal Precautions\n",
"Yemen - Level 4: Do Not Travel\n",
"Comoros - Level 2: Exercise Increased Caution\n",
"Trinidad and Tobago - Level 3: Reconsider Travel\n",
"Mozambique - Level 2: Exercise Increased Caution\n",
"Micronesia - Level 1: Exercise Normal Precautions\n",
"Kingdom of Denmark - Level 2: Exercise Increased Caution\n",
"Nepal - Level 2: Exercise Increased Caution\n",
"Honduras - Level 3: Reconsider Travel\n",
"Mali - Level 4: Do Not Travel\n",
"Equatorial Guinea - Level 2: Exercise Increased Caution\n",
"Kazakhstan - Level 1: Exercise Normal Precautions\n",
"Laos - Level 2: Exercise Increased Caution\n",
"Djibouti - Level 2: Exercise Increased Caution\n",
"Sierra Leone - Level 2: Exercise Increased Caution\n",
"Peru - Level 2: Exercise Increased Caution\n",
"Lithuania - Level 1: Exercise Normal Precautions\n",
"Fiji - Level 1: Exercise Normal Precautions\n",
"Egypt - Level 3: Reconsider Travel\n",
"Serbia - Level 2: Exercise Increased Caution\n",
"Cabo Verde - Level 1: Exercise Normal Precautions\n",
"Haiti - Level 4: Do Not Travel\n",
"Thailand - Level 1: Exercise Normal Precautions\n",
"New Caledonia - Level 3: Reconsider Travel\n",
"Burundi - Level 3: Reconsider Travel\n",
"Costa Rica - Level 2: Exercise Increased Caution\n",
"South Korea - Level 1: Exercise Normal Precautions\n",
"Pakistan - Level 3: Reconsider Travel\n",
"Venezuela - Level 4: Do Not Travel\n",
"Solomon Islands - Level 2: Exercise Increased Caution\n",
"Belize - Level 2: Exercise Increased Caution\n",
"Malaysia - Level 1: Exercise Normal Precautions\n",
"United Arab Emirates - Level 2: Exercise Increased Caution\n",
"Liberia - Level 2: Exercise Increased Caution\n",
"Timor-Leste - Level 2: Exercise Increased Caution\n",
"Iran - Level 4: Do Not Travel\n",
"Mauritania - Level 3: Reconsider Travel\n",
"Guinea - Level 3: Reconsider Travel\n",
"Guyana - Level 3: Reconsider Travel\n",
"Niger - Level 3: Reconsider Travel\n",
"Republic of the Congo - Level 2: Exercise Increased Caution\n",
"Botswana - Level 2: Exercise Increased Caution\n",
"Samoa - Level 1: Exercise Normal Precautions\n",
"Bahrain - Level 2: Exercise Increased Caution\n",
"Kosovo - Level 2: Exercise Increased Caution\n",
"Cayman Islands - Level 1: Exercise Normal Precautions\n",
"Malawi - Level 2: Exercise Increased Caution\n",
"Kyrgyzstan - Level 1: Exercise Normal Precautions\n",
"Bulgaria - Level 1: Exercise Normal Precautions\n",
"Eswatini - Level 2: Exercise Increased Caution\n",
"Turkmenistan - Level 1: Exercise Normal Precautions\n",
"Australia - Level 1: Exercise Normal Precautions\n",
"Bermuda - Level 1: Exercise Normal Precautions\n",
"France - Level 2: Exercise Increased Caution\n",
"Sint Maarten - Level 1: Exercise Normal Precautions\n",
"Kuwait - Level 1: Exercise Normal Precautions\n",
"Libya - Level 4: Do Not Travel\n",
"New Zealand - Level 1: Exercise Normal Precautions\n",
"Ethiopia - Level 3: Reconsider Travel\n",
"South Africa - Level 2: Exercise Increased Caution\n",
"Panama - Level 2: Exercise Increased Caution\n",
"Romania - Level 1: Exercise Normal Precautions\n",
"Italy - Level 2: Exercise Increased Caution\n",
"Cote d Ivoire - Level 2: Exercise Increased Caution\n",
"Montserrat - Level 1: Exercise Normal Precautions\n",
"Lebanon - Level 3: Reconsider Travel\n",
"Sint Eustatius - Level 1: Exercise Normal Precautions\n",
"Bonaire - Level 1: Exercise Normal Precautions\n",
"Saba - Level 1: Exercise Normal Precautions\n",
"Mexico - See State Summaries - Level 3: Reconsider Travel\n",
"India - Level 2: Exercise Increased Caution\n",
"Worldwide Caution - Caution\n",
"Vanuatu - Level 1: Exercise Normal Precautions\n",
"Hong Kong - Level 2: Exercise Increased Caution\n",
"Macau - Level 3: Reconsider Travel\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"RPC error: [insert_rows], <MilvusException: (code=1100, message=the length (5479) of 7th string exceeds max length (4096): invalid parameter[expected=valid length string][actual=string length exceeds max length])>, <Time:{'RPC start': '2024-07-04 11:28:36.813465', 'RPC error': '2024-07-04 11:28:36.870016'}>\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"<MilvusException: (code=1100, message=the length (5479) of 7th string exceeds max length (4096): invalid parameter[expected=valid length string][actual=string length exceeds max length])>\n"
]
}
],
"source": [
"import feedparser\n",
"from bs4 import BeautifulSoup\n",
"\n",
"# Function to remove tags\n",
"def remove_tags(html):\n",
"\n",
" # parse html content\n",
" soup = BeautifulSoup(html, \"html.parser\")\n",
"\n",
" for data in soup(['style', 'script']):\n",
" # Remove tags\n",
" data.decompose()\n",
"\n",
" # return data by retrieving the tag content\n",
" return ' '.join(soup.stripped_strings)\n",
"\n",
"# Lets Explore What our Data Looks like\n",
"# Travel Advisories\n",
"feed = feedparser.parse(TRAVEL_URL)\n",
"\n",
"# print(feed)\n",
"\n",
"summaries = [] \n",
"titles = []\n",
"links = []\n",
"publisheddates = []\n",
"\n",
"for post in feed.entries:\n",
" try:\n",
" print(post.title)\n",
" summary = remove_tags(str(post.summary))\n",
" summaries.append(summary)\n",
" titles.append(str(post.title))\n",
" links.append(str(post.link))\n",
" publisheddates.append(str(post.published))\n",
" except Exception as e: \n",
" print(e)\n",
"\n",
"try:\n",
" summaryvector = bm25_ef.encode_documents(summaries)\n",
" i = 0\n",
" \n",
" #entities = [\n",
" # {\n",
" # \"title\": titles[i],\n",
" # \"link\": links[i],\n",
" # \"summary\": summaries[i],\n",
" # \"publisheddate\": publisheddates[i],\n",
" # \"sparse_vector\": embedding.todok(),\n",
" # }\n",
" # for embedding in summaryvector\n",
" # ]\n",
"\n",
" entities = []\n",
" i = 0\n",
" for embedding in summaryvector:\n",
" entities.append({\n",
" \"title\": titles[i],\n",
" \"link\": links[i],\n",
" \"summary\": summaries[i],\n",
" \"publisheddate\": publisheddates[i],\n",
" \"sparse_vector\": embedding.todok(),\n",
" })\n",
" i += 1\n",
"\n",
" # for embedding in summaryvector\n",
" # for i in range(summaryvector.shape[0])\n",
" # Insert entities\n",
" res = milvus_client.insert(collection_name=COLLECTION_NAME, data=entities)\n",
" # print(res)\n",
"except Exception as e: \n",
" print(e)\n",
"\n",
"index_params = milvus_client.prepare_index_params()\n",
"index_params.add_index(\n",
" field_name=\"sparse_vector\",\n",
" index_name=\"sparse_inverted_index\",\n",
" index_type=\"SPARSE_INVERTED_INDEX\",\n",
" metric_type=\"IP\",\n",
" params={\"drop_ratio_build\": 0.2},\n",
")\n",
"\n",
"milvus_client.create_index(collection_name=COLLECTION_NAME, index_params=index_params)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2e796aaf-e5dc-4ca1-bbb3-42155be4e6f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting feedparser\n",
" Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)\n",
"Collecting sgmllib3k (from feedparser)\n",
" Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hDownloading feedparser-6.0.11-py3-none-any.whl (81 kB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.3/81.3 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hBuilding wheels for collected packages: sgmllib3k\n",
" Building wheel for sgmllib3k (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6049 sha256=ee9f7c228edf44b1ad3eaec8ee7c7422c44b552d3a2c0bbe75c69b9f39274883\n",
" Stored in directory: /Users/timothyspann/Library/Caches/pip/wheels/03/f5/1a/23761066dac1d0e8e683e5fdb27e12de53209d05a4a37e6246\n",
"Successfully built sgmllib3k\n",
"Installing collected packages: sgmllib3k, feedparser\n",
"Successfully installed feedparser-6.0.11 sgmllib3k-1.0.0\n"
]
}
],
"source": [
"!pip3 install feedparser"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "1906f66f-bf10-40e8-bcb4-a44757b27de7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: bs4 in ./milvusvenv/lib/python3.12/site-packages (0.0.2)\n",
"Requirement already satisfied: beautifulsoup4 in ./milvusvenv/lib/python3.12/site-packages (from bs4) (4.12.3)\n",
"Requirement already satisfied: soupsieve>1.2 in ./milvusvenv/lib/python3.12/site-packages (from beautifulsoup4->bs4) (2.5)\n"
]
}
],
"source": [
"!pip3 install bs4"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a151f840-91c7-41ab-896f-55143c262a78",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"search_results = milvus_client.search(\n",
" collection_name=COLLECTION_NAME, # Collection name\n",
" data=query_vector, # Replace with your query vector\n",
" search_params={\n",
" \"metric_type\": \"IP\"\n",
" }, # Search parameters\n",
" limit=10, # Max. number of search results to return\n",
" output_fields=[\"pk\",\"title\",\"link\",\"summary\",\"publisheddate\"], # Fields to return in the search results\n",
" consistency_level=\"Eventually\"\n",
")\n",
"\n",
"# Print search results\n",
"for hits in search_results:\n",
" for hit in hits:\n",
" print(f\"Hit: {hit}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment