Skip to content

Instantly share code, notes, and snippets.

gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
df = pd.read_csv(gitcsv, index_col=0)
df['dates'] = pd.to_datetime(df['dates'])
freq='M'
df = df.groupby(['types', pd.Grouper(key='dates', freq=freq)])['types'].agg(['count']).reset_index()
import plotly.graph_objects as go
import plotly_express as px
# group the dataframe
group = df.groupby('types')
# create a blank canvas
fig = go.Figure()
# each group iteration returns a tuple
import plotly.graph_objects as go
import plotly_express as px
# group the dataframe
group = df.groupby('types')
# create a blank canvas
fig = go.Figure()
# each group iteration returns a tuple
import pandas as pd
import plotly.graph_objects as go
import plotly_express as px
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
df = pd.read_csv(gitcsv, index_col=0)
df['dates'] = pd.to_datetime(df['dates'])
freq='M' # or D or Y
import pandas as pd
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'
df = pd.read_csv(gitcsv)
print(df.head()
"""
results of printing dataframe head
category flag1 flag2 flag3
import pandas as pd
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/bools.csv'
df = pd.read_csv(gitcsv)
# some columns that are supposed to be bool
cols = ['flag1', 'flag2', 'flag3']
# use np.where to find and match, then replace
# given a dataframe
def reduce_precision(df):
import numpy as np
"""
usage: give a dataframe, this fuction returns an optimized dataframe
df = reduce_precision(df)
reference: https://gist.github.com/enamoria/fa9baa906f23d1636c002e7186516a7b
import pandas as pd
import streamlit as st
from collections import defaultdict
from streamlit.report_thread import get_report_ctx
# implementation
# https://share.streamlit.io/justinhchae/app_helper/main/app.py
class SomeClass():
import numpy as np
import math
# helper functions to chunk data for time series forecasting
def index_marks(nrows, chunk_size):
"""
a helper function for split()
return an index of chunk size
https://yaoyao.codes/pandas/2018/01/23/pandas-split-a-dataframe-into-chunks
def run_arima(chunked_data, price_col='y', n_prediction_units=1):
# consume chunked data from https://gist.github.com/justinhchae/13d246e8e2e2d521a8d2cce20eb09a09
# supress trivial warnings from ARIMA
warnings.simplefilter('ignore', ConvergenceWarning)
# initialize a list to hold results (a list of dataframes)
results = []
# numerate through a list of chunked tuples, each having a pair of dataframes
for idx, (x_i, y_i) in enumerate(chunked_data):