analyticsindiamagazine · October 21, 2019 11:24
diff --git a/CreatingTabularList.py b/CreatingTabularList.py
 #Defining the keyword arguments for fastai's TabularList

 #Path / default location for saving/loading models
 path = ''

 #The dependent variable/target
 dep_var = 'Price'

 #The list of categorical features in the dataset
 cat_names = ['Brand', 'Model', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type'] 

 #The list of continuous features in the dataset
 #Exclude the Dependent variable 'Price'
 cont_names =['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'New_Price'] 

 #List of Processes/transforms to be applied to the dataset
 procs = [FillMissing, Categorify, Normalize]

 #Start index for creating a validation set from train_data
 start_indx = len(train_data) - int(len(train_data) * 0.2)

 #End index for creating a validation set from train_data
 end_indx = len(train_data)


 #TabularList for Validation
 val = (TabularList.from_df(train_data.iloc[start_indx:end_indx].copy(), path=path, cat_names=cat_names, cont_names=cont_names))

 test = (TabularList.from_df(test_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs))

 #TabularList for training
 data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(start_indx,end_indx)))
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())
	#Defining the keyword arguments for fastai's TabularList

	#Path / default location for saving/loading models
	path = ''

	#The dependent variable/target
	dep_var = 'Price'

	#The list of categorical features in the dataset
	cat_names = ['Brand', 'Model', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type']

	#The list of continuous features in the dataset
	#Exclude the Dependent variable 'Price'
	cont_names =['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Seats', 'New_Price']

	#List of Processes/transforms to be applied to the dataset
	procs = [FillMissing, Categorify, Normalize]

	#Start index for creating a validation set from train_data
	start_indx = len(train_data) - int(len(train_data) * 0.2)

	#End index for creating a validation set from train_data
	end_indx = len(train_data)


	#TabularList for Validation
	val = (TabularList.from_df(train_data.iloc[start_indx:end_indx].copy(), path=path, cat_names=cat_names, cont_names=cont_names))

	test = (TabularList.from_df(test_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs))

	#TabularList for training
	data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
	.split_by_idx(list(range(start_indx,end_indx)))
	.label_from_df(cols=dep_var)
	.add_test(test)
	.databunch())