Skip to content

Instantly share code, notes, and snippets.

@0x0L
Last active January 13, 2025 09:25
Show Gist options
  • Save 0x0L/5bcbe57822d83de6bb308829e74dfcdd to your computer and use it in GitHub Desktop.
Save 0x0L/5bcbe57822d83de6bb308829e74dfcdd to your computer and use it in GitHub Desktop.
arrow chunked_binary_search
use arrow::array::{make_array, ArrayData, Int64Array};
use arrow::pyarrow::PyArrowType;
use pyo3::prelude::*;
fn chunked_binary_search(chunks: Vec<&[i64]>, val: i64, left: bool) -> usize {
let val = if left { val } else { val + 1 };
let chunks: Vec<&[i64]> = chunks.into_iter().filter(|x| !x.is_empty()).collect();
if chunks.is_empty() {
return 0;
} else if chunks.len() == 1 {
return chunks[0].partition_point(|&x| x < val);
}
let idx = chunks.partition_point(|&x| *x.last().unwrap() < val);
if idx == chunks.len() {
return chunks.iter().map(|x| x.len()).sum();
}
let offset: usize = chunks.iter().take(idx).map(|x| x.len()).sum();
offset + chunks[idx].partition_point(|&x| x < val)
}
#[pyfunction]
#[pyo3(name = "chunked_binary_search", signature = (chunks, val, left=true))]
fn chunked_binary_search_py(chunks: Vec<PyArrowType<ArrayData>>, val: i64, left: bool) -> usize {
let chunks = chunks
.into_iter()
.map(|x| make_array(x.0))
.collect::<Vec<_>>();
let chunks = chunks
.iter()
.map(|x| {
let x: &Int64Array = x.as_any().downcast_ref().unwrap();
&x.values()[..]
})
.collect::<Vec<_>>();
chunked_binary_search(chunks, val, left)
}
/// A Python module implemented in Rust.
#[pymodule]
fn rust_helpers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(chunked_binary_search_py, m)?)?;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment