Skip to content

Instantly share code, notes, and snippets.

View wassname2's full-sized avatar
🤖

Michael.Clark2 wassname2

🤖
View GitHub Profile
import pandas as pd
import os
from anycache import anycache
from loguru import logger
import time
def chunking_by_time(startTime: pd.Timestamp, endTime: pd.Timestamp, freqs=['YE', 'ME', 'D']):
"""
How do we cache timespans? We want to take year chunks, then for the remainder months, and so on.
@wassname2
wassname2 / caching_pattern_for_long_running.py
Last active July 8, 2024 01:34
caching_pattern_for_long_running.py
from pathlib import Path
import pandas as pd
from tqdm.auto import tqdm
experiments = {
'exp1': dict(lr=1e-4),
]
cache_dir = Path('../data/30_processed/results_cache1')
cache_dir.mkdir(exists_ok=True)
@wassname2
wassname2 / pd_util.py
Last active May 23, 2024 02:00
pandas utils
import pandas as pd
import numpy as np
def pandas_groupby_agg_mixed_dtype_df(df, groupby: list, agg='max', str_agg='first'):
"""
resample a df with numeric and strings
"""
numerics_cols = list(df.select_dtypes(include='number').columns)+groupby
other_cols = list(df.select_dtypes(exclude='number').columns)+groupby
@wassname2
wassname2 / time_chunking.py
Last active May 14, 2024 02:29
time_chunking.py a good way to chunk time, in human readable chunks of decreasing size
import pandas as pd
def chunking_by_time(startTime: pd.Timestamp, endTime: pd.Timestamp, freqs=['Y', 'M', 'D']):
"""
How do we cache timespans? We want to take year chunks, then for the remainder months, and so on.
That way old data is cached in big chunks, and new data is rechunked as needed
```py
startTime = pd.to_datetime('2023-01-02 01:01')