floew2 · August 5, 2023 14:48
diff --git a/satellite_image_classification.py b/satellite_image_classification.py
 '''
 A workflow for classifying Landsat remote sensing imagery 
 involves utilizing soft voting with SVM and Gradient Boosting classifiers. 
 The Isolation Forest algorithm is applied to identify outliers in the 
 training data. Feature selection is performed through a Recursive Feature 
 Elimination approach. The outcome consists of classification and classification 
 probability raster images saved in TIF format.

 Author: Fabian Löw
 Date: June 2023
 '''

 # Import required modules
 import skimage.io as io
 import numpy as np
 from sklearn.feature_selection import RFE
 from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
 from sklearn.externals import joblib
 from sklearn.model_selection  import train_test_split
 import numpy.ma as ma
 import os, shutil

 def Classification():
 	raster = path_to_tiff + ".tif"
 	samples = path_to_tiff + "_samples.tif"
 	tfw_old = path_to_tiff + ".tfw"

 	classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"
 					
 	# read Landsat data as TIF
 	img_ds = io.imread(raster)
 	img = np.array(img_ds, dtype='uint16')

 	# read training samples as TIF with same dimensions as the Landsat image
 	roi_ds = io.imread(samples)
 	roi = np.array(roi_ds, dtype='uint8')

 	labels = np.unique(roi[roi > 0])
 	print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))

 	X = img[roi > 0, :]
 	Y = roi[roi > 0]

 	# splitting of training & test data in 80% - 20% for outlier analysis
 	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
 	iso = IsolationForest(n_estimators = 500,
 				max_samples = 1.0,
 				contamination = 0.001,
 				max_features = 1.0).fit(X_train)

 	# Outliers are flagged and labeled as "-1"

 	X_train_outlier = iso.predict(X_train)
 	mask = ma.masked_equal(X_train_outlier, -1)
 	Y_mask = mask * Y_train

 	# further splitting of new training data, cleaned from outliers in 80% - 20%
 	X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)

 	gbt = GradientBoostingClassifier(n_estimators = 100,
 					min_samples_leaf = 1,
 					min_samples_split = 10,
 					max_depth = 10,
 					max_features = 'sqrt',
 					learning_rate = 0.1,
 					subsample = 0.8,
 					random_state = None,
 					warm_start = True).fit(X_train_new, Y_train_new)

 	svm = SVC(C = 1.0,
 		kernel = 'rbf',
 		gamma = 1.0,
 		tol = 0.005,
 		probability = True,
 		class_weight = 'balanced',
 		cache_size = 8000,
 		decision_function_shape = 'ovr',
 		max_iter = 1000).fit(X_train_new, Y_train_new)

 	# Voting classifier for Gradient Boosting and SVM
 	clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)

 	# Feature Importances of the Gradient Boosting classifier
 	print gbt.feature_importances_

 	# Feature Selection method, e.g. 10 features/bands
 	selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
 	print selector.score(X_test_new, Y_test_new), raster

 	# save the classification model
 	model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
 	joblib.dump(selector, model)

 	# call the classification model
 	clf = joblib.load(model)

 	# reshaping of the array with 10 features/bands
 	new_arr = (img.shape[0] * img.shape[1], img.shape[2])
 	new_img = img[:, :, :10].reshape(new_arr)

 	row = img.shape[0]
 	col = img.shape[1]

 	# calculating classification probability, e.g. in this case with 7 classes
 	class_estimation = clf.predict_proba(new_img)

 	for n in range(0,7):
 		idx = str(n + 1)
 		class_prob = prob[:, n]
 		class_prob = class_prob.reshape((row, col))

 		probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
 		io.imsave(probability, class_prob)

 		tfw_new = probability.split(".tif")[0] + ".tfw"
 		shutil.copy(tfw_old, tfw_new)

 	# saving the classification results
 	class_prediction = clf.predict(new_img)
 	class_prediction = class_prediction.reshape(img[:, :, 0].shape)

 	io.imsave(classification, class_prediction)

 	tfw_new = classification.split(".tif")[0] + ".tfw"
 	shutil.copy(tfw_old, tfw_new)
	'''
	A workflow for classifying Landsat remote sensing imagery
	involves utilizing soft voting with SVM and Gradient Boosting classifiers.
	The Isolation Forest algorithm is applied to identify outliers in the
	training data. Feature selection is performed through a Recursive Feature
	Elimination approach. The outcome consists of classification and classification
	probability raster images saved in TIF format.

	Author: Fabian Löw
	Date: June 2023
	'''

	# Import required modules
	import skimage.io as io
	import numpy as np
	from sklearn.feature_selection import RFE
	from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
	from sklearn.externals import joblib
	from sklearn.model_selection import train_test_split
	import numpy.ma as ma
	import os, shutil

	def Classification():
	raster = path_to_tiff + ".tif"
	samples = path_to_tiff + "_samples.tif"
	tfw_old = path_to_tiff + ".tfw"

	classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"

	# read Landsat data as TIF
	img_ds = io.imread(raster)
	img = np.array(img_ds, dtype='uint16')

	# read training samples as TIF with same dimensions as the Landsat image
	roi_ds = io.imread(samples)
	roi = np.array(roi_ds, dtype='uint8')

	labels = np.unique(roi[roi > 0])
	print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))

	X = img[roi > 0, :]
	Y = roi[roi > 0]

	# splitting of training & test data in 80% - 20% for outlier analysis
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
	iso = IsolationForest(n_estimators = 500,
	max_samples = 1.0,
	contamination = 0.001,
	max_features = 1.0).fit(X_train)

	# Outliers are flagged and labeled as "-1"

	X_train_outlier = iso.predict(X_train)
	mask = ma.masked_equal(X_train_outlier, -1)
	Y_mask = mask * Y_train

	# further splitting of new training data, cleaned from outliers in 80% - 20%
	X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)

	gbt = GradientBoostingClassifier(n_estimators = 100,
	min_samples_leaf = 1,
	min_samples_split = 10,
	max_depth = 10,
	max_features = 'sqrt',
	learning_rate = 0.1,
	subsample = 0.8,
	random_state = None,
	warm_start = True).fit(X_train_new, Y_train_new)

	svm = SVC(C = 1.0,
	kernel = 'rbf',
	gamma = 1.0,
	tol = 0.005,
	probability = True,
	class_weight = 'balanced',
	cache_size = 8000,
	decision_function_shape = 'ovr',
	max_iter = 1000).fit(X_train_new, Y_train_new)

	# Voting classifier for Gradient Boosting and SVM
	clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)

	# Feature Importances of the Gradient Boosting classifier
	print gbt.feature_importances_

	# Feature Selection method, e.g. 10 features/bands
	selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
	print selector.score(X_test_new, Y_test_new), raster

	# save the classification model
	model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
	joblib.dump(selector, model)

	# call the classification model
	clf = joblib.load(model)

	# reshaping of the array with 10 features/bands
	new_arr = (img.shape[0] * img.shape[1], img.shape[2])
	new_img = img[:, :, :10].reshape(new_arr)

	row = img.shape[0]
	col = img.shape[1]

	# calculating classification probability, e.g. in this case with 7 classes
	class_estimation = clf.predict_proba(new_img)

	for n in range(0,7):
	idx = str(n + 1)
	class_prob = prob[:, n]
	class_prob = class_prob.reshape((row, col))

	probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
	io.imsave(probability, class_prob)

	tfw_new = probability.split(".tif")[0] + ".tfw"
	shutil.copy(tfw_old, tfw_new)

	# saving the classification results
	class_prediction = clf.predict(new_img)
	class_prediction = class_prediction.reshape(img[:, :, 0].shape)

	io.imsave(classification, class_prediction)

	tfw_new = classification.split(".tif")[0] + ".tfw"
	shutil.copy(tfw_old, tfw_new)