Skip to content

Instantly share code, notes, and snippets.

import plotly.graph_objects as go
import plotly_express as px
# group the dataframe
group = df.groupby('types')
# create a blank canvas
fig = go.Figure()
# each group iteration returns a tuple
import plotly.graph_objects as go
import plotly_express as px
# group the dataframe
group = df.groupby('types')
# create a blank canvas
fig = go.Figure()
# each group iteration returns a tuple
gitcsv = 'https://raw.githubusercontent.com/justinhchae/medium/main/sample.csv'
df = pd.read_csv(gitcsv, index_col=0)
df['dates'] = pd.to_datetime(df['dates'])
freq='M'
df = df.groupby(['types', pd.Grouper(key='dates', freq=freq)])['types'].agg(['count']).reset_index()
freq='M'
# or 'D' or 'Y'
df = df[['dates', 'types']].groupby([pd.Grouper(key='dates', freq=freq)]).agg('count').reset_index()
"""
dates count
2 2012-07-31 0
1 2012-06-30 1
3 2012-08-31 1
0 2012-05-31 2
data = {'dates':
['2012-05-04',
'2012-05-04',
'2012-06-04',
'2012-08-08'],
'types':
['a',
'a',
'z',
'z',],
@justinhchae
justinhchae / simple_lambdas_example.py
Last active January 1, 2021 02:15
Another example of a lambdas that changes all values
# replace all values with the same thing
df[col1] = df[col1].apply(lambda x: x.replace(year=2109, month=1, day=1))
print(df[col1].head(2))
# output
"""
event_date
2109-01-01
2109-01-01
@justinhchae
justinhchae / simple_lambdas_ondf.py
Created December 30, 2020 22:14
A simple, do nothing lambdas function on a DataFrame
# copy col2 value into a new column
col_new = str(col1 + '_new')
df[col_new] = df.apply(lambda x: x[col2] , axis=1)
print(df[[col1, col_new, col2]].head(2))
# output
"""
event_date event_date_new received_date
2011-01-05 2011-01-31 2011-01-31
@justinhchae
justinhchae / lambda_pandas_one_conditiona.py
Last active January 1, 2021 02:15
Lambdas on Pandas DF with apply and one conditional
# replace col1 year with col2 year on a condition
# if does not meet condition, use the original col1 value
df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year)
if x[col1].year > curr_year else x[col1]
, axis=1)
# filter df where year is greater than current year
df = df[(df[col1].dt.year > curr_year)]
print(df[[col1, col_new, col2]].head(2))
# output
@justinhchae
justinhchae / lambda_pandas_two_conditionals.py
Last active January 3, 2021 20:24
Lambda function with two conditional statements
# lambda with two conditional statements
df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year
else x[col1].replace(year=x[col2].year) if x[col1].year < past_year
else x[col1]
, axis=1)
@justinhchae
justinhchae / impute_lambdas_changelog.py
Last active January 1, 2021 02:17
impute dates with apply and lambdas with change log
# store the lambda function as an object
impute = lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year \
else x[col1].replace(year=x[col2].year) if x[col1].year < past_year \
else x[col1]
# simplify the code later by calling impute
df[col_new] = df.apply(impute, axis=1)
# a new dataframe called change_log
change_log = df[(df[col1].dt.year > curr_year)]