Skip to content

Instantly share code, notes, and snippets.

@gvyshnya
Created July 1, 2020 09:52
Show Gist options
  • Save gvyshnya/1db09b61267233d8a1dbf6b2bd1dcb3d to your computer and use it in GitHub Desktop.
Save gvyshnya/1db09b61267233d8a1dbf6b2bd1dcb3d to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import pdpipe as pdp
# ... data reading code goes here
# set up a transformation pipeline
pipeline_1 = pdp.ApplyByCols(
['lat', 'lon', 'lat_inspection_location', 'lon_inspection_location'],
lambda col: pd.to_numeric(col)
)
pipeline_1 += pdp.ColRename({'lat': 'lat_quote_location', 'lon': 'lon_quote_location'})
pipeline_1 += pdp.ApplyByCols(
['lead_created_at', 'booking_created', 'booking_appointment_date'],
lambda col: pd.to_datetime(col).tz_localize(None)) # need to check out timezone
pipeline_1 += pdp.ApplyToRows(
lambda row: (row['min_price'] + row['max_price'])*0.5,
colname='mean_price') # 'mean_price
pipeline_1 += pdp.ApplyByCols(['distance'], pd.isna, result_columns=['distance_nan'], drop=False)
pipeline_1 += pdp.Bin({'distance': [0,2,4,10,20,50,100]}, drop=False)
pipeline_1 += pdp.ApplyToRows(
lambda row: row['lead_created_at'].year - row['year'], colname='c_age')
# apply the transformation pipeline
df = pipeline_1.apply(df)
# select ML relevant features
selected_cols = [
'make', 'model', 'trim', 'year', 'c_age', 'day_appointment_quote',
'day_appointment_created', 'mean_price',
'distance_nan', 'distance_bin','booking_location', 'city', 'show_up'
]
# calculate the list of columns to be dropped from the ML-ready dataframe
cols_to_drop = [
col for col in df.columns if col not in selected_cols
]
pipeline_drop_non_ml_cols = pdp.ColDrop(cols_to_drop)
df = pipeline_drop_non_ml_cols.apply(df)
# apply one-hot encoding, drop unnecessary cols
list_dummie = []
list_dummie += ['distance_nan','distance_bin','make','model','trim','booking_location', 'city']
pipeline_3 = pdp.OneHotEncode(list_dummie)
XY_onehot = pipeline_3.apply(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment