Created
October 21, 2019 11:24
-
-
Save analyticsindiamagazine/1652d80524e09caf81e562664575fd08 to your computer and use it in GitHub Desktop.
CreatingTabularList.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Defining the keyword arguments for fastai's TabularList | |
#Path / default location for saving/loading models | |
path = '' | |
#The dependent variable/target | |
dep_var = 'Price' | |
#The list of categorical features in the dataset | |
cat_names = ['Brand', 'Model', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type'] | |
#The list of continuous features in the dataset | |
#Exclude the Dependent variable 'Price' | |
cont_names =['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'New_Price'] | |
#List of Processes/transforms to be applied to the dataset | |
procs = [FillMissing, Categorify, Normalize] | |
#Start index for creating a validation set from train_data | |
start_indx = len(train_data) - int(len(train_data) * 0.2) | |
#End index for creating a validation set from train_data | |
end_indx = len(train_data) | |
#TabularList for Validation | |
val = (TabularList.from_df(train_data.iloc[start_indx:end_indx].copy(), path=path, cat_names=cat_names, cont_names=cont_names)) | |
test = (TabularList.from_df(test_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)) | |
#TabularList for training | |
data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs) | |
.split_by_idx(list(range(start_indx,end_indx))) | |
.label_from_df(cols=dep_var) | |
.add_test(test) | |
.databunch()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment