justinhchae’s gists

justinhchae / impute_dates.py

Last active January 3, 2021 20:22

impute_dates

	import pandas as pd


	col1 = 'event_date'
	col_new = str(col1 + '_new')
	col2 = 'received_date'
	curr_year = 2020
	past_year = 2010

	# given a dataframe df with col1 and col2 as datetime columns

justinhchae / impute_dates_iteration.py

Created December 30, 2020 22:01

impute with iteration

	# iterate through a DataFrame with iterrows()
	curr_year = 2020
	past_year = 2010
	# loop through index and rows of df
	for idx, row in df.iterrows():
	# compare years as integers
	if row[col1].year > curr_year:

	# return received_date's year as integer
	new_year = row[col2].year

justinhchae / impute_dates_iter_change_log.py

Last active December 30, 2020 22:07

impute dates with iterative solution and change log

	# same iterrows() solution with change log
	curr_year = 2020
	past_year = 2010
	change_log = []
	# loop through index and rows of df
	for idx, row in df.iterrows():
	if row[col1].year > curr_year:
	new_year = row[col2].year
	# save reference data as tuple
	records = tuple((row['case_id']

justinhchae / impute_lambdas_changelog.py

Last active January 1, 2021 02:17

impute dates with apply and lambdas with change log

	# store the lambda function as an object
	impute = lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year \
	else x[col1].replace(year=x[col2].year) if x[col1].year < past_year \
	else x[col1]

	# simplify the code later by calling impute
	df[col_new] = df.apply(impute, axis=1)

	# a new dataframe called change_log
	change_log = df[(df[col1].dt.year > curr_year)]

justinhchae / lambda_pandas_two_conditionals.py

Last active January 3, 2021 20:24

Lambda function with two conditional statements

	# lambda with two conditional statements
	df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year
	else x[col1].replace(year=x[col2].year) if x[col1].year < past_year
	else x[col1]
	, axis=1)

justinhchae / lambda_pandas_one_conditiona.py

Last active January 1, 2021 02:15

Lambdas on Pandas DF with apply and one conditional

	# replace col1 year with col2 year on a condition
	# if does not meet condition, use the original col1 value
	df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year)
	if x[col1].year > curr_year else x[col1]
	, axis=1)

	# filter df where year is greater than current year
	df = df[(df[col1].dt.year > curr_year)]
	print(df[[col1, col_new, col2]].head(2))
	# output

justinhchae / simple_lambdas_ondf.py

Created December 30, 2020 22:14

A simple, do nothing lambdas function on a DataFrame

	# copy col2 value into a new column
	col_new = str(col1 + '_new')
	df[col_new] = df.apply(lambda x: x[col2] , axis=1)
	print(df[[col1, col_new, col2]].head(2))

	# output

	"""
	event_date event_date_new received_date
	2011-01-05 2011-01-31 2011-01-31

justinhchae / simple_lambdas_example.py

Last active January 1, 2021 02:15

Another example of a lambdas that changes all values

	# replace all values with the same thing
	df[col1] = df[col1].apply(lambda x: x.replace(year=2109, month=1, day=1))
	print(df[col1].head(2))

	# output

	"""
	event_date
	2109-01-01
	2109-01-01

justinhchae / grouper_example1.py

Last active January 12, 2021 15:45

justinhchae / grouper_example2.py

Last active January 12, 2021 02:59

	freq='M'
	# or 'D' or 'Y'
	df = df[['dates', 'types']].groupby([pd.Grouper(key='dates', freq=freq)]).agg('count').reset_index()

	"""
	dates count
	2 2012-07-31 0
	1 2012-06-30 1
	3 2012-08-31 1
	0 2012-05-31 2