Skip to content

Instantly share code, notes, and snippets.

@jtanx
jtanx / chunks.py
Created November 15, 2024 23:24
datetime chunking
import datetime as dt
from functools import partial
from typing import Any
import pandas as pd
from dateutil import relativedelta
def next_chunk(st: pd.Timestamp, et: pd.Timestamp, freqs: list[Any]):
if st == et:
@jtanx
jtanx / polars_multi_collect.py
Created September 28, 2024 11:38
Single pass collection of multiple polars lazyframes
import polars as pl
df = pl.LazyFrame({"A": range(1, 100), "B": range(100, 1, -1)})
df = df.filter(pl.col("A") > 20, pl.col("B") < 50)
df1 = df.with_columns(pl.col("A") * 2)
df2 = df.with_columns(pl.col("B") / 2)
# https://github.com/pola-rs/polars/issues/13065
# This will result in re-evaluating the initial filter twice
@jtanx
jtanx / CMakeLists.txt
Created July 5, 2022 11:16
Working example of writing a Parquet file in C++
cmake_minimum_required(VERSION 3.10)
project(parq)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
find_package(Arrow REQUIRED)
add_executable(parq park.cc)
@jtanx
jtanx / enctest.c
Created March 21, 2021 10:04
fontforge encoding comparison
#include <stdio.h>
#include <utype.h>
#include <chardata.h>
#include <encoding.h>
static int umodenc(int enc, int modtype)
{
if (modtype == -1)
return (-1);
@jtanx
jtanx / splitbins.py
Created March 21, 2021 09:17
3 vs 4-level lookup tables for unicode data
# Based on the algorithm in https://github.com/python/cpython/blob/master/Tools/unicode/makeunicodedata.py
import math
import sys
def getsize(data):
# return smallest possible integer size for the given array
maxdata = max(data)
if maxdata < 256:
return 1
@jtanx
jtanx / pad.cmake
Last active May 26, 2022 23:11
Padding a string in cmake
function(pad_string output str padchar length)
string(LENGTH "${str}" _strlen)
math(EXPR _strlen "${length} - ${_strlen}")
if(_strlen GREATER 0)
if(${CMAKE_VERSION} VERSION_LESS "3.14")
unset(_pad)
foreach(_i RANGE 1 ${_strlen}) # inclusive
string(APPEND _pad ${padchar})
endforeach()
@jtanx
jtanx / conns.yml
Created March 9, 2020 10:16
Service discovery descriptor
config:
min_port: 1000
max_port: 2000
shmem:
default:
- proc_a
- proc_b
tcp:
default:
- proc_a
@jtanx
jtanx / CustomInstallPath.cmake
Created August 3, 2019 07:22
Custom library install path for MacOS and CMake
# Suppose under certain circumstances you want to set the library path explicitly to something, but
# in the usual case, you would like it to remain as an rpath.
# This adds a variable to the install script that can be overridden
install(CODE "
if(NOT DEFINED CUSTOM_LIBRARY_PREFIX)
set(CUSTOM_LIBRARY_PREFIX \"@rpath\")
endif()
")
set_property(TARGET my_target PROPERTY INSTALL_NAME_DIR "\${CUSTOM_LIBRARY_PREFIX}")
@jtanx
jtanx / makecombiners.h
Created July 21, 2019 02:38
Combiners parsing for combiners.h
#!/usr/bin/env python3
import sys,os,re
# http://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
COMBIN = {
0x000: '0',
0x001: 'FF_UNICODE_Overstrike',
0x202: 'FF_UNICODE_Below|FF_UNICODE_Touching',
0x214: 'FF_UNICODE_Above|FF_UNICODE_Touching',
@jtanx
jtanx / gb12345.txt
Created April 6, 2019 09:32
gb12345 FontForge
# gb12345 based on https://github.com/fontforge/fontforge/blob/35de97a/plugins/gb12345.c
0x0 0x0
0x1 0x1
0x2 0x2
0x3 0x3
0x4 0x4
0x5 0x5
0x6 0x6
0x7 0x7
0x8 0x8