Skip to content

Instantly share code, notes, and snippets.

@elijahbenizzy
Created July 13, 2023 14:21
Show Gist options
  • Save elijahbenizzy/7ddd44af73d1cbff5eb65d5e01f71bb8 to your computer and use it in GitHub Desktop.
Save elijahbenizzy/7ddd44af73d1cbff5eb65d5e01f71bb8 to your computer and use it in GitHub Desktop.
from typing import Dict, Any, List
from hamilton.ad_hoc_utils import create_temporary_module
from hamilton import driver
from hamilton.driver import Variable
from hamilton.function_modifiers import parameterize_sources
import pandas as pd
foos = ['a', 'b', 'c']
bars = ['d', 'e', 'f']
@parameterize_sources(
**{
f'{foo}_{bar}_sum': dict(
column_1=f'{foo}_{bar}_column',
column_2=f'{bar}_{foo}_column'
)
for foo in foos for bar in bars if foo != bar
}
)
def sum_columns(column_1: pd.Series, column_2: pd.Series) -> pd.Series:
"""
Computes the sum of two columns
"""
return column_1 + column_2
def list_executable_variables(dr: driver.Driver, inputs: Dict[str, Any]) -> List[Variable]:
"""Lists executable variables, given a set of inputs. This does
not include overrides or config yet (both of which can be accessed by a node,
but that would be easy to add"
:param dr: Driver we've instantiated
:param inputs: Inputs provided at runtime
:return: List of variables that we have inputs for
"""
dr_vars = dr.list_available_variables()
external_inputs_provided = [item.name for item in dr_vars if
item.is_external_input and item.name in inputs]
return dr.what_is_downstream_of(*external_inputs_provided)
if __name__ == '__main__':
df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6]],
columns=['a_d_column', 'd_a_column', 'b_e_column', 'e_b_column'])
module = create_temporary_module(sum_columns)
dr = driver.Driver(df, module)
print(list_executable_variables(dr, df))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment