Last active
November 13, 2021 06:45
-
-
Save skrawcz/7a9aee5b0840b1ad0506e150fdd00769 to your computer and use it in GitHub Desktop.
Hamilton with a single value node
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --- my_functions.py | |
import pandas as pd | |
def avg_3wk_spend(spend: pd.Series) -> pd.Series: | |
"""Rolling 3 week average spend.""" | |
return spend.rolling(3).mean() | |
def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series: | |
"""The cost per signup in relation to spend.""" | |
return spend / signups | |
def single_avg(spend: pd.Series) -> float: | |
"""Function that returns a single value""" | |
return spend.mean() | |
def spend_per_sigup_time_average(single_avg: float, spend_per_signup: pd.Series) -> pd.Series: | |
"""Function that uses an intermediary single value as input""" | |
return single_avg * spend_per_signup | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --- my_script.py -- Run this | |
import importlib | |
import logging | |
import sys | |
import pandas as pd | |
from hamilton import driver | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(stream=sys.stdout) | |
initial_columns = { # load from actuals or wherever -- this is our initial data we use as input. | |
'signups': pd.Series([1, 10, 50, 100, 200, 400]), | |
'spend': pd.Series([10, 10, 20, 40, 40, 50]), | |
} | |
# we need to tell hamilton where to load function definitions from | |
module_name = 'my_functions' | |
module = importlib.import_module(module_name) | |
dr = driver.Driver(initial_columns, module) # can pass in multiple modules | |
# we need to specify what we want in the final dataframe. | |
output_columns = [ | |
'spend', | |
'signups', | |
'avg_3wk_spend', | |
'spend_per_signup', | |
'spend_per_sigup_time_average' # <--- this is the column we want. | |
] | |
# let's create the dataframe! | |
df = dr.execute(output_columns, display_graph=True) | |
print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment