Last active
April 18, 2021 21:37
-
-
Save kvnkho/a4a7b429aa21fb055bb4d3b51eede8cc to your computer and use it in GitHub Desktop.
Fugue Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import statements | |
from fugue import FugueWorkflow, SparkExecutionEngine | |
from typing import List, Any, Dict, Iterable | |
# Area code to state | |
area_to_state = {"217": "IL", "312": "IL", "415": "CA", "352": "FL"} | |
# schema: *, inferred_state:str | |
def fill_location(df:Iterable[Dict[str,Any]]) -> Iterable[Dict[str,Any]]: | |
for row in df: | |
potential_values = [row["home_state"], row["work_state"], | |
area_to_state[row["phone"][0:3]]] | |
# Return first non-null value. Returns None if all are None. | |
row["inferred_state"] = next((val for val in potential_values | |
if val is not None), None) | |
yield row | |
with FugueWorkflow() as dag: | |
df = dag.df(df).transform(fill_location) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment