Skip to content

Instantly share code, notes, and snippets.

use arrow::array::ArrayRef;
use arrow::record_batch::RecordBatch;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ArrowWriter;
use std::env;
use std::fs::File;
use std::path::Path;
use std::time::Instant;
fn main() -> Result<(), Box<dyn std::error::Error>> {
[package]
name = "poodah"
version = "0.1.0"
edition = "2021"
[dependencies]
arrow = "25.0.0"
parquet = "25.0.0"
rusoto_core = "0.48.0"
rusoto_mock = "0.48.0"
@mooreniemi
mooreniemi / Cargo.toml
Created April 29, 2024 03:10
local (non-prod) shard assignment via etcd
[package]
name = "cete_node"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-web = "4"
aws-config = "*"
aws-sdk-codecommit = "*"
@mooreniemi
mooreniemi / increment_counter.rs
Created April 29, 2024 01:51
a basic cas op for etcd
/// Increments a counter stored in a JSON object within etcd under the specified key.
/// Assumes the JSON structure is {"count": int}. Will try 5 times before failing.
async fn increment_counter(
etcd: &AsyncMutex<Client>,
key: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let key = key.to_string();
let mut client = etcd.lock().await;
// FIXME: actually we don't want to retry - we want to bail out and try to find another assignment
@mooreniemi
mooreniemi / ia_select.rs
Created January 29, 2024 03:54
example of ia-select alg
use std::collections::HashMap;
use std::time::Instant;
/**
The IA-Select algorithm translated into Rust from
[Diversifying Search Results](https://www.microsoft.com/en-us/research/wp-content/uploads/2009/02/diversifying-wsdm09.pdf)
which is a "(1 − 1/`e`)-approximation algorithm for `Diversify(k)`." `Diversify(k)` is an NP-hard function that maximizes
the probability that you find a subset of documents from multiple categories of documents that "satisfies the average user."
IA stands for "intent aware" but intents are then mapped to categories.
@mooreniemi
mooreniemi / ray_worker_progress.py
Created July 20, 2023 16:38
ray get worker progress and result
import ray
import time
import logging
from threading import Thread
log = logging.getLogger(__name__)
@ray.remote
class Worker:
extern crate blas_src;
use hnsw::{Hnsw, Params, Searcher};
use ndarray_npy::{ViewNpyExt, WriteNpyExt};
use memmap2::{Mmap, MmapMut};
use rand_pcg::Pcg64;
use space::Metric;
use std::{fs::OpenOptions, io::BufWriter, time::Instant};
extern crate blas_src;
use hnsw::{Hnsw, Params, Searcher};
use ndarray_npy::{ViewNpyExt, WriteNpyExt};
use memmap2::{Mmap, MmapMut};
use rand_pcg::Pcg64;
use space::Metric;
use std::{fs::OpenOptions, io::BufWriter, time::Instant};
[package]
name = "mem"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rayon = "1.5"
indicatif = {version = "0.16", features = ["rayon"]}
[package]
name = "mem"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rayon = "1.5"
indicatif = {version = "0.16", features = ["rayon"]}