justinhchae

14 followers · 8 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

justinhchae / mp_prophet.py

Created February 28, 2021 16:38

	# how to pool processes for prophet

	# given chunked data from data_chunker.py
	# https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09

	# given prophet function wrapper
	# https://gist.github.com/justinhchae/8ef78743f13f50051ad1aca2106eaa1a

	# dependencies
	from tqdm import tqdm

justinhchae / run_prophet.py

Last active February 28, 2021 16:37

	from fbprophet import Prophet
	import pandas as pd
	import os

	# https://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python
	# https://medium.com/spikelab/forecasting-multiples-time-series-using-prophet-in-parallel-2515abd1a245
	# https://facebook.github.io/prophet/docs/quick_start.html#python-api


	class suppress_stdout_stderr(object):

justinhchae / mp_arima.py

Created February 28, 2021 16:12

	# how to pool processes for arima

	# given chunked data from data_chunker.py
	# https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09

	# given arima function from run_arima.py
	# https://gist.github.com/justinhchae/d2a2dc8b71b5f5fbbb0f7eabf68b6850

	# dependencies
	from tqdm import tqdm

justinhchae / run_arima.py

Created February 28, 2021 15:33

	def run_arima(chunked_data, price_col='y', n_prediction_units=1):
	# consume chunked data from https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
	# supress trivial warnings from ARIMA
	warnings.simplefilter('ignore', ConvergenceWarning)

	# initialize a list to hold results (a list of dataframes)
	results = []

	# numerate through a list of chunked tuples, each having a pair of dataframes
	for idx, (x_i, y_i) in enumerate(chunked_data):

justinhchae / data_chunker.py

Created February 28, 2021 15:25

	import numpy as np
	import math

	# helper functions to chunk data for time series forecasting

	def index_marks(nrows, chunk_size):
	"""
	a helper function for split()
	return an index of chunk size
	https://yaoyao.codes/pandas/2018/01/23/pandas-split-a-dataframe-into-chunks

justinhchae / persist_dict_streamlit.py

Created January 25, 2021 14:47

	import pandas as pd
	import streamlit as st

	from collections import defaultdict
	from streamlit.report_thread import get_report_ctx

	# implementation
	# https://share.streamlit.io/justinhchae/app_helper/main/app.py

	class SomeClass():

justinhchae / reduce_pandas_precision.py

Created January 19, 2021 20:46

	# given a dataframe

	def reduce_precision(df):
	import numpy as np
	"""
	usage: give a dataframe, this fuction returns an optimized dataframe

	df = reduce_precision(df)

	reference: https://gist.github.com/enamoria/fa9baa906f23d1636c002e7186516a7b

justinhchae / bools_example2.py

Last active January 13, 2021 14:22

	import pandas as pd

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'

	df = pd.read_csv(gitcsv)

	# some columns that are supposed to be bool
	cols = ['flag1', 'flag2', 'flag3']

	# use np.where to find and match, then replace

justinhchae / bools_example1.py

Last active January 13, 2021 07:10

	import pandas as pd

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'

	df = pd.read_csv(gitcsv)

	print(df.head()
	"""
	results of printing dataframe head
	category flag1 flag2 flag3

justinhchae / plotly_time_regression.py

Created January 12, 2021 15:41

	import pandas as pd
	import plotly.graph_objects as go
	import plotly_express as px

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
	df = pd.read_csv(gitcsv, index_col=0)

	df['dates'] = pd.to_datetime(df['dates'])

	freq='M' # or D or Y