justinhchae

14 followers · 8 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

justinhchae / grouper_example3.py

Last active January 12, 2021 15:11

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
	df = pd.read_csv(gitcsv, index_col=0)

	df['dates'] = pd.to_datetime(df['dates'])

	freq='M'

	df = df.groupby(['types', pd.Grouper(key='dates', freq=freq)])['types'].agg(['count']).reset_index()

justinhchae / grouper_example4.py

Last active January 12, 2021 14:59

	import plotly.graph_objects as go
	import plotly_express as px

	# group the dataframe
	group = df.groupby('types')

	# create a blank canvas
	fig = go.Figure()

	# each group iteration returns a tuple

justinhchae / grouper_example5.py

Last active January 12, 2021 15:27

	import plotly.graph_objects as go
	import plotly_express as px

	# group the dataframe
	group = df.groupby('types')

	# create a blank canvas
	fig = go.Figure()

	# each group iteration returns a tuple

justinhchae / plotly_time_regression.py

Created January 12, 2021 15:41

	import pandas as pd
	import plotly.graph_objects as go
	import plotly_express as px

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
	df = pd.read_csv(gitcsv, index_col=0)

	df['dates'] = pd.to_datetime(df['dates'])

	freq='M' # or D or Y

justinhchae / bools_example1.py

Last active January 13, 2021 07:10

	import pandas as pd

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'

	df = pd.read_csv(gitcsv)

	print(df.head()
	"""
	results of printing dataframe head
	category flag1 flag2 flag3

justinhchae / bools_example2.py

Last active January 13, 2021 14:22

	import pandas as pd

	gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'

	df = pd.read_csv(gitcsv)

	# some columns that are supposed to be bool
	cols = ['flag1', 'flag2', 'flag3']

	# use np.where to find and match, then replace

justinhchae / reduce_pandas_precision.py

Created January 19, 2021 20:46

	# given a dataframe

	def reduce_precision(df):
	import numpy as np
	"""
	usage: give a dataframe, this fuction returns an optimized dataframe

	df = reduce_precision(df)

	reference: https://gist.github.com/enamoria/fa9baa906f23d1636c002e7186516a7b

justinhchae / persist_dict_streamlit.py

Created January 25, 2021 14:47

	import pandas as pd
	import streamlit as st

	from collections import defaultdict
	from streamlit.report_thread import get_report_ctx

	# implementation
	# https://share.streamlit.io/justinhchae/app_helper/main/app.py

	class SomeClass():

justinhchae / data_chunker.py

Created February 28, 2021 15:25

	import numpy as np
	import math

	# helper functions to chunk data for time series forecasting

	def index_marks(nrows, chunk_size):
	"""
	a helper function for split()
	return an index of chunk size
	https://yaoyao.codes/pandas/2018/01/23/pandas-split-a-dataframe-into-chunks

justinhchae / run_arima.py

Created February 28, 2021 15:33

	def run_arima(chunked_data, price_col='y', n_prediction_units=1):
	# consume chunked data from https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
	# supress trivial warnings from ARIMA
	warnings.simplefilter('ignore', ConvergenceWarning)

	# initialize a list to hold results (a list of dataframes)
	results = []

	# numerate through a list of chunked tuples, each having a pair of dataframes
	for idx, (x_i, y_i) in enumerate(chunked_data):