Skip to content

Instantly share code, notes, and snippets.

@harishkotra
Created March 31, 2025 00:44
Show Gist options
  • Save harishkotra/476b1eeb71c79fa4d2b560aa50ae584e to your computer and use it in GitHub Desktop.
Save harishkotra/476b1eeb71c79fa4d2b560aa50ae584e to your computer and use it in GitHub Desktop.
Convert your csv file into locally running qdrant embeddings using this python script.
import os
import pandas as pd
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, Distance, VectorParams
# Configuration
CSV_FILE = "snippets.csv" # Update with your csv path
QDRANT_HOST = "localhost" # Change if using a remote Qdrant instance
QDRANT_PORT = 6333
COLLECTION_NAME = "solidity_snippets"
def setup_qdrant():
"""Initialize Qdrant client and create a collection."""
client = QdrantClient(QDRANT_HOST, port=QDRANT_PORT)
# Load CSV
df = pd.read_csv(CSV_FILE, header=None, names=["solidity_code"])
# Load embedding model
model = SentenceTransformer("microsoft/codebert-base")
# Generate embeddings
df["embedding"] = df["solidity_code"].apply(lambda code: model.encode(code, convert_to_numpy=True))
# Create a Qdrant collection (if not exists)
client.recreate_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorParams(size=df["embedding"].iloc[0].shape[0], distance=Distance.COSINE),
)
# Insert embeddings into Qdrant
points = [
PointStruct(id=i, vector=embedding.tolist(), payload={"code": code})
for i, (code, embedding) in enumerate(zip(df["solidity_code"], df["embedding"]))
]
client.upsert(collection_name=COLLECTION_NAME, points=points)
print(f"Inserted {len(df)} embeddings into Qdrant.")
# Create a snapshot
client.create_snapshot(collection_name=COLLECTION_NAME)
print("Snapshot created successfully!")
if __name__ == "__main__":
setup_qdrant()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment