This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calculate_nickmapbi_offset(carriageway, xsp): | |
return { | |
"L":{ | |
f"L{num+1}": +3.5/2 - 3.5*num for num in range(0,7) | |
}, | |
"R":{ | |
f"R{num+1}": -3.5/2 + 3.5*num for num in range(0,7) | |
}, | |
"S":{ | |
f"L{num+1}": -3.5/2 - 3.5*num for num in range(0,7) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fuzzy_column_name_match(list1, list2): | |
from fuzzywuzzy import process | |
threshold = 60 | |
response = {} | |
for name_to_find in list1: | |
resp_match = process.extractOne(name_to_find ,list2) | |
if resp_match[1] > threshold: | |
response[name_to_find] = resp_match[0] | |
return response | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def run_in_executor(f): | |
""" | |
This is a hack to turn a legacy blocking funciton into an async function. | |
Thanks to balki https://stackoverflow.com/a/53719009/1782370 | |
Example: | |
The following example shows how to use use a blocking | |
`azure.identity` credential type with `pandas.read_parquet()`. | |
Pandas normally requires that you use one of the limited |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for group_index, group in df.groupby(["road_number","cway"]): | |
blank_rows = group[group["cluster"].isna()] | |
filled_rows = group[group["cluster"].notna()] | |
for blank_row_index, blank_row in blank_rows.iterrows(): | |
# find distance by looing for minimum "signed overlap" | |
overlap_min = np.maximum(filled_rows["slk_from"], blank_row["slk_from"]) | |
overlap_max = np.minimum(filled_rows["slk_to"], blank_row["slk_to"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from azure.identity.aio import DefaultAzureCredential | |
CONTAINER = "..." | |
STORAGE_ACCOUNT_NAME = "..." | |
pd.read_parquet( | |
path = f"abfss://{CONTAINER}@{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net/some/path/example.parquet", | |
storage_options = {"credential":DefaultAzureCredential()} | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Must use the async io variant of Azure Credentials | |
from azure.identity.aio import DefaultAzureCredential | |
# fsspec directly, use this library which implements it and gives better type hints and autocompletion | |
import adlfs | |
cloud_filesystem = adlfs.AzureBlobFileSystem( | |
account_name="<STORAGE_ACCOUNT_NAME>", | |
credential=DefaultAzureCredential() | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Take a snippet of python code that defines at least one function at the top-level, then returns the last defined function. | |
import ast | |
def compile_function(source:str): | |
# parse first, so we can automatically find the funciton name later | |
parsed = ast.parse(source) | |
# compile in specified `scope` dictionary | |
exec(compile(source, "", "exec"), scope:={}) | |
# return the last function definition | |
for item in reversed(parsed.body): | |
if isinstance(item, ast.FunctionDef): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy import typing as npt | |
def overlap(a:npt.NDArray, b:npt.NDArray, x:npt.NDArray, y:npt.NDArray): | |
"""Compute the signed distance between lists of intervals""" | |
overlap_min = np.maximum(a, x.reshape(-1,1)) | |
overlap_max = np.minimum(b, y.reshape(-1,1)) | |
signed_overlap_len = overlap_max - overlap_min | |
return signed_overlap_len | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
from zipfile import ZipFile | |
zip_file_path = "some_zip.zip" | |
# some_zip.zip/ | |
# ├─ part1.csv | |
# ├─ part2.csv | |
# ├─ part3.csv | |
zip_file = ZipFile(zip_file_path) | |
extracted_data = pd.concat([ | |
pandas.read_csv( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# inspired by https://stackoverflow.com/questions/1524126/how-to-print-a-list-more-nicely | |
# needs refinement before I post as answer though. I'll update this at some point | |
from typing import Iterable, Literal | |
def print_columns(data:Iterable, columns:int=3, sep:str=" ", alignment:Literal[">","<","^"]=">"): | |
"""Prints a list of objects in columns. | |
`data` should be an iterable object, such as a list. Each element of data will be converted to a string using the built in `str()` | |
`sep` is a string used to separate the columns. defaults to `' '` |