Skip to content

Instantly share code, notes, and snippets.

View a10y's full-sized avatar
🇺🇲
DC

Andrew Duffy a10y

🇺🇲
DC
View GitHub Profile
package org.apache.iceberg.parquet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@a10y
a10y / jfk_ocr.py
Created March 19, 2025 15:33
JFK Files analysis
import json
import os
import glob
import pytesseract
import multiprocessing
from pdf2image import convert_from_path
def ocr_pdf_to_json(pdf_path):
# Convert PDF to images (one per page)
@a10y
a10y / zstd_bench.rs
Created February 18, 2025 15:04
Benchmarking ZSTD throughput. Code mostly generated with Claude 3.5 Sonnet
// use rand::Rng;
use std::time::Instant;
use zstd::{decode_all, stream::encode_all};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Generate sample data - 1 million random u32 integers
let count = 64_000;
println!("Generating {} random integers...", count);
// let mut rng = rand::rng();
plugins {
`java-library`
`maven-publish`
`signing`
}
val tokenizerSharedLibrary by configurations.creating {
isCanBeConsumed = false
}
@a10y
a10y / monty.py
Last active February 8, 2025 19:57
"""
A simple simulation of the Monty Hall problem
"""
import random
ROUNDS = 100_000
WINS = 0
import pyarrow.parquet as pq
import vortex as vx
import numpy as np
from time import time
# taken from OpenAI text-3-small
EMBED_DIM = 1536
N_EMBEDS = 1
@a10y
a10y / fl_bitpack.rs
Last active September 27, 2024 15:30
Rust bit-packing/unpacking for u8/u3
pub fn pack_u8_u3(input: &[u8], packed: &mut [u8]) {
// We have 1024 / size_of<T>() == 128 lanes to pull from.
// Each lane accesses 1024 / T elements of data.
const MASK: u8 = 0b111;
const LANES: usize = 1024 / 8;
for lane in 0..LANES {
// First kernel: take in chunks of W values from the lane, and apply the same
// operation. Being careful to shift off each time.
let a = input[128 * 0 + lane] & MASK;
let b = input[128 * 1 + lane] & MASK;
macro_rules! make_encoding_ids_rec {
($count:expr) => {};
($count:expr, $encoding:ident $($tts:tt)*) => {
pub const $encoding: u16 = $count;
make_encoding_ids_rec!(($count+1u16) $($tts)*);
}
}
macro_rules! make_encoding_ids {
($($encodings:ident),*) => {
@a10y
a10y / send-zc.c
Created August 15, 2024 14:39
io_uring zerocopy example
/* SPDX-License-Identifier: MIT */
/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
/* gcc -luring -O2 -o send-zc ./send-zc.c */
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <error.h>
#include <limits.h>
@a10y
a10y / eclipse.py
Created March 31, 2024 00:50
Marimo notebook of 2024 Solar Eclipse
import marimo
__generated_with = "0.1.76"
app = marimo.App()
@app.cell
def __():
from astropy.time import Time
from astropy.coordinates import EarthLocation, get_sun, get_moon, AltAz