Created
March 31, 2025 00:44
-
-
Save harishkotra/476b1eeb71c79fa4d2b560aa50ae584e to your computer and use it in GitHub Desktop.
Convert your csv file into locally running qdrant embeddings using this python script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.models import PointStruct, Distance, VectorParams | |
| # Configuration | |
| CSV_FILE = "snippets.csv" # Update with your csv path | |
| QDRANT_HOST = "localhost" # Change if using a remote Qdrant instance | |
| QDRANT_PORT = 6333 | |
| COLLECTION_NAME = "solidity_snippets" | |
| def setup_qdrant(): | |
| """Initialize Qdrant client and create a collection.""" | |
| client = QdrantClient(QDRANT_HOST, port=QDRANT_PORT) | |
| # Load CSV | |
| df = pd.read_csv(CSV_FILE, header=None, names=["solidity_code"]) | |
| # Load embedding model | |
| model = SentenceTransformer("microsoft/codebert-base") | |
| # Generate embeddings | |
| df["embedding"] = df["solidity_code"].apply(lambda code: model.encode(code, convert_to_numpy=True)) | |
| # Create a Qdrant collection (if not exists) | |
| client.recreate_collection( | |
| collection_name=COLLECTION_NAME, | |
| vectors_config=VectorParams(size=df["embedding"].iloc[0].shape[0], distance=Distance.COSINE), | |
| ) | |
| # Insert embeddings into Qdrant | |
| points = [ | |
| PointStruct(id=i, vector=embedding.tolist(), payload={"code": code}) | |
| for i, (code, embedding) in enumerate(zip(df["solidity_code"], df["embedding"])) | |
| ] | |
| client.upsert(collection_name=COLLECTION_NAME, points=points) | |
| print(f"Inserted {len(df)} embeddings into Qdrant.") | |
| # Create a snapshot | |
| client.create_snapshot(collection_name=COLLECTION_NAME) | |
| print("Snapshot created successfully!") | |
| if __name__ == "__main__": | |
| setup_qdrant() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment