Skip to content

Instantly share code, notes, and snippets.

@Filimoa
Created January 19, 2025 02:19
Show Gist options
  • Save Filimoa/73f487716929115be54e219317298f06 to your computer and use it in GitHub Desktop.
Save Filimoa/73f487716929115be54e219317298f06 to your computer and use it in GitHub Desktop.
Polars With Cosine Similarity
# !uv pip install polars polars_distance numpy
import polars_distance as pld
import polars as pl
import numpy as np
import time
dims = 512
num_rows = 250_000
df = pl.DataFrame(
{
"vec1": [np.random.rand(dims) for _ in range(num_rows)],
"vec2": [np.random.rand(dims) for _ in range(num_rows)],
},
schema={
"vec1": pl.Array(pl.Float64, dims),
"vec2": pl.Array(pl.Float64, dims),
},
)
start = time.time()
df.with_columns(pld.col("vec1").dist_arr.cosine("vec2").alias("cosine_similarity"))
end = time.time()
print(f"Time taken: {end - start:0.3f} seconds")
display(df.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment