Created
January 15, 2025 06:17
-
-
Save iamaziz/730e2f895793bd419b08680013604841 to your computer and use it in GitHub Desktop.
RAG: vector search vs. semantic reranker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "866fc9e5-ae73-466c-bf61-61b2c66cd0ea", | |
"metadata": {}, | |
"source": [ | |
"# RAG: Vector Search vs. Semantic Re-ranking\n", | |
"\n", | |
"---" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "948fd54d-a472-4018-b65b-03ba709b492e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from typing import List, Tuple\n", | |
"\n", | |
"import torch\n", | |
"from sentence_transformers import SentenceTransformer, CrossEncoder, util" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "1b3869a0-d77d-4876-91a4-b8bc54788523", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class VectorSearch:\n", | |
" def __init__(self, docs: List[str]):\n", | |
" # Load embedding model\n", | |
" self.model = SentenceTransformer('all-MiniLM-L6-v2')\n", | |
" self.docs = docs\n", | |
" self.doc_embeddings = self.model.encode(self.docs)\n", | |
"\n", | |
" def search(self, query: str, top_k: int = 5) -> List[Tuple[str, torch.Tensor]]:\n", | |
"\n", | |
" # Compute cosine similarity scores between the query and docs\n", | |
" similarities = util.cos_sim(self.model.encode(query), self.doc_embeddings)[0]\n", | |
" \n", | |
" # Sort documents by highest similarity\n", | |
" similar_docs = sorted(zip(self.docs, similarities), key=lambda x: x[1], reverse=True)\n", | |
" \n", | |
" return similar_docs[:top_k]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "1a015e13-4b24-4444-b6c7-454a86ce5ba7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# knowledge base\n", | |
"documents = [\n", | |
" \"Green tea contains antioxidants that reduce oxidative stress.\",\n", | |
" \"Green tea may help with weight loss by boosting metabolism.\",\n", | |
" \"Green tea can improve brain function due to its caffeine content.\",\n", | |
" \"Green tea is grown in Japan and China.\",\n", | |
" \"The cat slept on the windowsill.\",\n", | |
" \"She ate a sandwich for lunch yesterday.\",\n", | |
" \"New York City's subway system is the oldest in the world.\"\n", | |
"]\n", | |
"\n", | |
"vector_db = VectorSearch(documents)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "79602754-cea3-4fd2-907e-c3e6c1ade79d", | |
"metadata": {}, | |
"source": [ | |
"> # vector search results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "be0fed76-9ab6-4031-9a75-c0d56c512b75", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Similarity score \t\t Document\n", | |
"0.74 | Green tea contains antioxidants that reduce oxidative stress. \n", | |
"0.66 | Green tea may help with weight loss by boosting metabolism. \n", | |
"0.65 | Green tea can improve brain function due to its caffeine content. \n", | |
"0.55 | Green tea is grown in Japan and China. \n", | |
"0.03 | She ate a sandwich for lunch yesterday. \n" | |
] | |
} | |
], | |
"source": [ | |
"query = \"What are the health benefits of green tea?\"\n", | |
"\n", | |
"# initial retrieval (i.e. Vector DB Similarity Search)\n", | |
"similar_docs = vector_db.search(query, top_k=5)\n", | |
"\n", | |
"print(f\"Similarity score \\t\\t Document\")\n", | |
"for doc, score in similar_docs: print(f\"{score:<17.2f} | {doc} \")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "8a4e5963-0054-441c-b465-fb9f3ad243de", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['Green tea contains antioxidants that reduce oxidative stress.',\n", | |
" 'Green tea may help with weight loss by boosting metabolism.',\n", | |
" 'Green tea can improve brain function due to its caffeine content.',\n", | |
" 'Green tea is grown in Japan and China.',\n", | |
" 'She ate a sandwich for lunch yesterday.']" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"initial_results = [d[0] for d in similar_docs]\n", | |
"initial_results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "1d927d38-341d-4e5c-a40d-7d4f4a89d369", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Re-Ranker\n", | |
"class SemanticReranker:\n", | |
" '''\n", | |
" Cross-attention mechanisim is more computationally intensive but with accurate relevance scoring.\n", | |
" CrossEncoder has O(n²) complexity where n is len(docs) i.e. 10-100x slower than vector search.\n", | |
" '''\n", | |
" def __init__(self):\n", | |
" # Load cross-encoder model\n", | |
" self.model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')\n", | |
" \n", | |
" def rerank(self, query: str, docs: List[str], top_k: int = 10) -> List[Tuple[str, float]]:\n", | |
" # Create query-passage pairs\n", | |
" pairs = [[query, doc] for doc in docs]\n", | |
" \n", | |
" # Get relevance scores\n", | |
" scores = self.model.predict(pairs)\n", | |
" \n", | |
" # Sort by score\n", | |
" ranked_results = [(docs[i], scores[i]) for i in sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)]\n", | |
" \n", | |
" return ranked_results[:top_k]\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "c97aa703-1a6b-4f7c-97ff-56a48ff8d445", | |
"metadata": {}, | |
"source": [ | |
"> # re-ranking results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "7e1a590f-729d-4cd9-9d1f-0618ee4a6130", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Relevance score \t\t Document\n", | |
"7.93 | Green tea may help with weight loss by boosting metabolism. \n", | |
"7.02 | Green tea contains antioxidants that reduce oxidative stress. \n", | |
"6.80 | Green tea can improve brain function due to its caffeine content. \n", | |
"-0.99 | Green tea is grown in Japan and China. \n", | |
"-11.08 | She ate a sandwich for lunch yesterday. \n" | |
] | |
} | |
], | |
"source": [ | |
"# final re-ranked results\n", | |
"reranker = SemanticReranker()\n", | |
"final_results = reranker.rerank(query, initial_results, top_k=10)\n", | |
"\n", | |
"print(f\"Relevance score \\t\\t Document\")\n", | |
"for doc, score in final_results: print(f\"{score:<17.2f} | {doc} \")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2e3a8344-81dd-4582-b792-091e5bc125c4", | |
"metadata": {}, | |
"source": [ | |
"---\n", | |
"\n", | |
"# Vector Search (i.e. RAG Retrieval) vs. Semantic Reranking: Key Differences\n", | |
"\n", | |
"While both **vector similarity search** and **semantic reranking** involve semantic similarity, they serve different purposes and operate at different stages of the pipeline.\n", | |
"\n", | |
"---\n", | |
"\n", | |
"## Key Differences Between Retriever and Reranker\n", | |
"\n", | |
"| Feature | Retriever | Semantic Reranker |\n", | |
"|------------------------|------------------------------------|------------------------------------|\n", | |
"| **Scope** | Entire corpus | Retrieved documents (e.g., top-100) |\n", | |
"| **Model** | Lightweight encoder (e.g., BERT) | More powerful model (e.g., cross-encoder) |\n", | |
"| **Efficiency** | Optimized for speed (e.g., FAISS) | Computationally expensive |\n", | |
"| **Output** | Initial set of documents | Reordered documents by relevance |\n", | |
"\n", | |
"---\n", | |
"\n", | |
"## When is Reranking Most Useful?\n", | |
"\n", | |
"- When the retriever’s initial results are noisy or suboptimal.\n", | |
"- When the query requires nuanced understanding (e.g., complex or ambiguous queries).\n", | |
"- When the downstream task (e.g., answer generation) benefits from highly relevant context.\n", | |
"\n", | |
"---\n", | |
"\n", | |
"## Summary\n", | |
"\n", | |
"While RAG retrieval and semantic reranking both leverage semantic similarity, they serve complementary roles:\n", | |
"- The **retriever** quickly fetches a broad set of candidate documents.\n", | |
"- The **reranker** refines this set by reordering documents based on fine-grained semantic relevance.\n", | |
"\n", | |
"By combining the two, RAG achieves a balance between efficiency and accuracy, ensuring that the generator receives the most relevant context for high-quality answer generation." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment