Skip to content

Instantly share code, notes, and snippets.

# how to pool processes for prophet
# given chunked data from data_chunker.py
# https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
# given prophet function wrapper
# https://gist.github.com/justinhchae/8ef78743f13f50051ad1aca2106eaa1a
# dependencies
from tqdm import tqdm
from fbprophet import Prophet
import pandas as pd
import os
# https://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python
# https://medium.com/spikelab/forecasting-multiples-time-series-using-prophet-in-parallel-2515abd1a245
# https://facebook.github.io/prophet/docs/quick_start.html#python-api
class suppress_stdout_stderr(object):
# how to pool processes for arima
# given chunked data from data_chunker.py
# https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
# given arima function from run_arima.py
# https://gist.github.com/justinhchae/d2a2dc8b71b5f5fbbb0f7eabf68b6850
# dependencies
from tqdm import tqdm
def run_arima(chunked_data, price_col='y', n_prediction_units=1):
# consume chunked data from https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
# supress trivial warnings from ARIMA
warnings.simplefilter('ignore', ConvergenceWarning)
# initialize a list to hold results (a list of dataframes)
results = []
# numerate through a list of chunked tuples, each having a pair of dataframes
for idx, (x_i, y_i) in enumerate(chunked_data):
import numpy as np
import math
# helper functions to chunk data for time series forecasting
def index_marks(nrows, chunk_size):
"""
a helper function for split()
return an index of chunk size
https://yaoyao.codes/pandas/2018/01/23/pandas-split-a-dataframe-into-chunks
import pandas as pd
import streamlit as st
from collections import defaultdict
from streamlit.report_thread import get_report_ctx
# implementation
# https://share.streamlit.io/justinhchae/app_helper/main/app.py
class SomeClass():
# given a dataframe
def reduce_precision(df):
import numpy as np
"""
usage: give a dataframe, this fuction returns an optimized dataframe
df = reduce_precision(df)
reference: https://gist.github.com/enamoria/fa9baa906f23d1636c002e7186516a7b
import pandas as pd
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'
df = pd.read_csv(gitcsv)
# some columns that are supposed to be bool
cols = ['flag1', 'flag2', 'flag3']
# use np.where to find and match, then replace
import pandas as pd
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'
df = pd.read_csv(gitcsv)
print(df.head()
"""
results of printing dataframe head
category flag1 flag2 flag3
import pandas as pd
import plotly.graph_objects as go
import plotly_express as px
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
df = pd.read_csv(gitcsv, index_col=0)
df['dates'] = pd.to_datetime(df['dates'])
freq='M' # or D or Y