Eligijus112 · June 2, 2022 04:14
diff --git a/sklearn_ft_importance.py b/sklearn_ft_importance.py
 # Loading the data
 _cali_data = fetch_california_housing(as_frame=True)
 X, y = _cali_data.data, _cali_data.target

 # Droping the geo coordinate featuress
 X = X.drop(columns=['Latitude', 'Longitude'])

 # Droping the population feature; In real life modeling, this could be used as weight. 
 # For educational and inference purposes, we drop it.
 X = X.drop(columns=['Population'])

 # Saving the feature names 
 features = X.columns.tolist()

 # Train test split 
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

 # Defining the hyper parameters 
 hps = {
    'max_depth': 3,
    'min_samples_split': 4,
    'random_state': 0
 }

 # Loading the tree object 
 tree = DecisionTreeRegressor(**hps)

 # Fitting on the training data 
 tree.fit(X_train, y_train)

 # Extracting the importances by sklearn 
 importances_sk = tree.feature_importances_

 # Creating a dataframe with the feature importance by sklearn
 feature_importance_sk = {}
 for i, feature in enumerate(features):
    feature_importance_sk[feature] = round(importances_sk[i], 3)

 print(f"Feature importance by sklearn: {feature_importance_sk}")
	# Loading the data
	_cali_data = fetch_california_housing(as_frame=True)
	X, y = _cali_data.data, _cali_data.target

	# Droping the geo coordinate featuress
	X = X.drop(columns=['Latitude', 'Longitude'])

	# Droping the population feature; In real life modeling, this could be used as weight.
	# For educational and inference purposes, we drop it.
	X = X.drop(columns=['Population'])

	# Saving the feature names
	features = X.columns.tolist()

	# Train test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

	# Defining the hyper parameters
	hps = {
	'max_depth': 3,
	'min_samples_split': 4,
	'random_state': 0
	}

	# Loading the tree object
	tree = DecisionTreeRegressor(**hps)

	# Fitting on the training data
	tree.fit(X_train, y_train)

	# Extracting the importances by sklearn
	importances_sk = tree.feature_importances_

	# Creating a dataframe with the feature importance by sklearn
	feature_importance_sk = {}
	for i, feature in enumerate(features):
	feature_importance_sk[feature] = round(importances_sk[i], 3)

	print(f"Feature importance by sklearn: {feature_importance_sk}")