This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import statements | |
from fugue import FugueWorkflow, SparkExecutionEngine | |
from typing import List, Any, Dict, Iterable | |
# Area code to state | |
area_to_state = {"217": "IL", "312": "IL", "415": "CA", "352": "FL"} | |
# schema: *, inferred_state:str | |
def fill_location(df:Iterable[Dict[str,Any]]) -> Iterable[Dict[str,Any]]: | |
for row in df: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Comparison of creating inferred_state column | |
area_to_state = {"217": "IL", "312": "IL", "415": "CA", "352": "FL"} | |
# Pandas implementation | |
df['inferred_state'] = df['home_state']\ | |
.fillna(df['work_state'])\ | |
.fillna(df['phone'].str.slice(0,3).map(area_to_state)) | |
# Spark implementation | |
from pyspark.sql.functions import coalesce, col, substring, create_map, lit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#schema: user_id:int, measurement:int | |
def get_median(df:pd.DataFrame) -> pd.DataFrame: | |
return pd.DataFrame({'user_id': [df.iloc[0]['user_id']], | |
'median' : [df[['measurement']].median()]}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandera as pa | |
price_check = pa.DataFrameSchema({ | |
"Price": pa.Column(pa.Int, pa.Check.in_range(min_value=5,max_value=20)), | |
}) | |
# schema: * | |
def price_validation(df:pd.DataFrame) -> pd.DataFrame: | |
price_check.validate(df) | |
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandera as pa | |
price_check = pa.DataFrameSchema({ | |
"Price": pa.Column(pa.Int, pa.Check.in_range(min_value=5,max_value=20)), | |
}) | |
# schema: * | |
def price_validation(df:pd.DataFrame) -> pd.DataFrame: | |
price_check.validate(df) | |
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.DataFrame({'State': ['FL','FL','FL','CA','CA','CA'], | |
'City': ['Tampa', 'Orlando', 'Miami', 'Oakland', 'San Francisco', 'San Jose'], | |
'Price': [8, 12, 10, 16, 20, 16]}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandera as pa | |
from pandera import Column, Check, DataFrameSchema | |
from fugue import FugueWorkflow | |
from fugue_spark import SparkExecutionEngine | |
price_check_FL = pa.DataFrameSchema({ | |
"Price": Column(pa.Float, Check.in_range(min_value=7,max_value=13)), | |
}) | |
price_check_CA = pa.DataFrameSchema({ |
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pycaret.datasets import get_data | |
df = get_data("titanic") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pycaret.classification import * | |
clf = setup(data = df, | |
target = "Survived", | |
session_id = 123, | |
silent = True, | |
verbose = False, | |
html = False) | |
models = compare_models(fold = 5, |
OlderNewer