Skip to content

Instantly share code, notes, and snippets.

View a10y's full-sized avatar
🇺🇲
DC

Andrew Duffy a10y

🇺🇲
DC
View GitHub Profile
config:
    flowchart:
        defaultRenderer: elk
flowchart TD
   %% Data Types
   Integer[Integer]:::datatype
   Float[Float]:::datatype
   String[String]:::datatype
@a10y
a10y / macros.rs
Created May 8, 2025 13:31
Vortex DType macros to revisit later
/// Create a new struct type. For example:
///
/// ```
/// use vortex_dtype::{struct_type, DType, PType};
///
/// let the_struct = struct_type! {
/// "x" => DType::from(PType::F64),
/// "y" => DType::from(PType::F64),
/// };
///
@a10y
a10y / Dockerfile
Created April 27, 2025 20:50
Get NanoLLM working on AGX Orin JetPack 6.2
FROM dustynv/nano_llm:r36.4.0
RUN apt update && apt install -y gstreamer1.0-nice
package org.apache.iceberg.parquet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@a10y
a10y / jfk_ocr.py
Created March 19, 2025 15:33
JFK Files analysis
import json
import os
import glob
import pytesseract
import multiprocessing
from pdf2image import convert_from_path
def ocr_pdf_to_json(pdf_path):
# Convert PDF to images (one per page)
@a10y
a10y / zstd_bench.rs
Created February 18, 2025 15:04
Benchmarking ZSTD throughput. Code mostly generated with Claude 3.5 Sonnet
// use rand::Rng;
use std::time::Instant;
use zstd::{decode_all, stream::encode_all};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Generate sample data - 1 million random u32 integers
let count = 64_000;
println!("Generating {} random integers...", count);
// let mut rng = rand::rng();
plugins {
`java-library`
`maven-publish`
`signing`
}
val tokenizerSharedLibrary by configurations.creating {
isCanBeConsumed = false
}
@a10y
a10y / monty.py
Last active February 8, 2025 19:57
"""
A simple simulation of the Monty Hall problem
"""
import random
ROUNDS = 100_000
WINS = 0
import pyarrow.parquet as pq
import vortex as vx
import numpy as np
from time import time
# taken from OpenAI text-3-small
EMBED_DIM = 1536
N_EMBEDS = 1
@a10y
a10y / fl_bitpack.rs
Last active September 27, 2024 15:30
Rust bit-packing/unpacking for u8/u3
pub fn pack_u8_u3(input: &[u8], packed: &mut [u8]) {
// We have 1024 / size_of<T>() == 128 lanes to pull from.
// Each lane accesses 1024 / T elements of data.
const MASK: u8 = 0b111;
const LANES: usize = 1024 / 8;
for lane in 0..LANES {
// First kernel: take in chunks of W values from the lane, and apply the same
// operation. Being careful to shift off each time.
let a = input[128 * 0 + lane] & MASK;
let b = input[128 * 1 + lane] & MASK;