Skip to content

Instantly share code, notes, and snippets.

@skrawcz
Last active November 13, 2021 06:45
Show Gist options
  • Save skrawcz/7a9aee5b0840b1ad0506e150fdd00769 to your computer and use it in GitHub Desktop.
Save skrawcz/7a9aee5b0840b1ad0506e150fdd00769 to your computer and use it in GitHub Desktop.
Hamilton with a single value node
# --- my_functions.py
import pandas as pd
def avg_3wk_spend(spend: pd.Series) -> pd.Series:
"""Rolling 3 week average spend."""
return spend.rolling(3).mean()
def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series:
"""The cost per signup in relation to spend."""
return spend / signups
def single_avg(spend: pd.Series) -> float:
"""Function that returns a single value"""
return spend.mean()
def spend_per_sigup_time_average(single_avg: float, spend_per_signup: pd.Series) -> pd.Series:
"""Function that uses an intermediary single value as input"""
return single_avg * spend_per_signup
# --- my_script.py -- Run this
import importlib
import logging
import sys
import pandas as pd
from hamilton import driver
logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout)
initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.
'signups': pd.Series([1, 10, 50, 100, 200, 400]),
'spend': pd.Series([10, 10, 20, 40, 40, 50]),
}
# we need to tell hamilton where to load function definitions from
module_name = 'my_functions'
module = importlib.import_module(module_name)
dr = driver.Driver(initial_columns, module) # can pass in multiple modules
# we need to specify what we want in the final dataframe.
output_columns = [
'spend',
'signups',
'avg_3wk_spend',
'spend_per_signup',
'spend_per_sigup_time_average' # <--- this is the column we want.
]
# let's create the dataframe!
df = dr.execute(output_columns, display_graph=True)
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment