thehappycheese’s gists

thehappycheese / calculate_nickmapbi_offset.py

Created March 28, 2023 05:04

Function to calculate approximate lane offset from carriageway and xsp

	def calculate_nickmapbi_offset(carriageway, xsp):
	return {
	"L":{
	f"L{num+1}": +3.5/2 - 3.5*num for num in range(0,7)
	},
	"R":{
	f"R{num+1}": -3.5/2 + 3.5*num for num in range(0,7)
	},
	"S":{
	f"L{num+1}": -3.5/2 - 3.5*num for num in range(0,7)

thehappycheese / fuzzy_column_name_match.py

Created January 24, 2023 08:13

Helper function to generate dictionary map to rename columns in one pandas dataframe to match those in another dataframe

	def fuzzy_column_name_match(list1, list2):
	from fuzzywuzzy import process
	threshold = 60
	response = {}
	for name_to_find in list1:
	resp_match = process.extractOne(name_to_find ,list2)
	if resp_match[1] > threshold:
	response[name_to_find] = resp_match[0]
	return response

thehappycheese / blocking_to_async_wrapper.py

Created January 24, 2023 08:11

A hack to turn a legacy blocking funciton into an async function

	def run_in_executor(f):
	"""
	This is a hack to turn a legacy blocking funciton into an async function.
	Thanks to balki https://stackoverflow.com/a/53719009/1782370

	Example:

	The following example shows how to use use a blocking
	`azure.identity` credential type with `pandas.read_parquet()`.
	Pandas normally requires that you use one of the limited

thehappycheese / find_nearest.py

Last active January 19, 2023 02:50

Fill blank row with nearest populated row with data using a chainage from/to location

	for group_index, group in df.groupby(["road_number","cway"]):

	blank_rows = group[group["cluster"].isna()]
	filled_rows = group[group["cluster"].notna()]

	for blank_row_index, blank_row in blank_rows.iterrows():

	# find distance by looing for minimum "signed overlap"
	overlap_min = np.maximum(filled_rows["slk_from"], blank_row["slk_from"])
	overlap_max = np.minimum(filled_rows["slk_to"], blank_row["slk_to"])

thehappycheese / use pandas to read azure storage.py

Created November 2, 2022 06:13

Use pandas to read azure cloud storage

	import pandas as pd
	from azure.identity.aio import DefaultAzureCredential
	CONTAINER = "..."
	STORAGE_ACCOUNT_NAME = "..."
	pd.read_parquet(
	path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net/some/path/example.parquet",
	storage_options = {"credential":DefaultAzureCredential()}
	)

thehappycheese / using_fsspec.py

Created November 2, 2022 06:11

How to use fsspec with Azure Blob Storage Account

	# Must use the async io variant of Azure Credentials
	from azure.identity.aio import DefaultAzureCredential
	# fsspec directly, use this library which implements it and gives better type hints and autocompletion
	import adlfs

	cloud_filesystem = adlfs.AzureBlobFileSystem(
	account_name="<STORAGE_ACCOUNT_NAME>",
	credential=DefaultAzureCredential()
	)

thehappycheese / compile.py

Created July 28, 2022 03:04

Compile a python function from string, and return the function

	# Take a snippet of python code that defines at least one function at the top-level, then returns the last defined function.
	import ast
	def compile_function(source:str):
	# parse first, so we can automatically find the funciton name later
	parsed = ast.parse(source)
	# compile in specified `scope` dictionary
	exec(compile(source, "", "exec"), scope:={})
	# return the last function definition
	for item in reversed(parsed.body):
	if isinstance(item, ast.FunctionDef):

thehappycheese / overlap.py

Last active July 28, 2022 03:14

Interval overlap (signed distance) for numpy lists of intervals

	import numpy as np
	from numpy import typing as npt
	def overlap(a:npt.NDArray, b:npt.NDArray, x:npt.NDArray, y:npt.NDArray):
	"""Compute the signed distance between lists of intervals"""
	overlap_min = np.maximum(a, x.reshape(-1,1))
	overlap_max = np.minimum(b, y.reshape(-1,1))
	signed_overlap_len = overlap_max - overlap_min
	return signed_overlap_len

thehappycheese / load_from_zip_of_csv.py

Created June 3, 2022 03:11

Load CSV files from Zip and Stack

	import pandas
	from zipfile import ZipFile
	zip_file_path = "some_zip.zip"
	# some_zip.zip/
	# ├─ part1.csv
	# ├─ part2.csv
	# ├─ part3.csv
	zip_file = ZipFile(zip_file_path)
	extracted_data = pd.concat([
	pandas.read_csv(

thehappycheese / print_columns.py

Last active January 17, 2023 09:05

Print a list in nice columns

	# inspired by https://stackoverflow.com/questions/1524126/how-to-print-a-list-more-nicely

	# needs refinement before I post as answer though. I'll update this at some point

	from typing import Iterable, Literal
	def print_columns(data:Iterable, columns:int=3, sep:str=" ", alignment:Literal[">","<","^"]=">"):
	"""Prints a list of objects in columns.

	`data` should be an iterable object, such as a list. Each element of data will be converted to a string using the built in `str()`
	`sep` is a string used to separate the columns. defaults to `' '`