Last active
August 5, 2023 14:48
-
-
Save floew2/a42906e7e2ac0444ac0c909f93b82a58 to your computer and use it in GitHub Desktop.
A workflow for classifying Landsat satellite images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
A workflow for classifying Landsat remote sensing imagery | |
involves utilizing soft voting with SVM and Gradient Boosting classifiers. | |
The Isolation Forest algorithm is applied to identify outliers in the | |
training data. Feature selection is performed through a Recursive Feature | |
Elimination approach. The outcome consists of classification and classification | |
probability raster images saved in TIF format. | |
Author: Fabian Löw | |
Date: June 2023 | |
''' | |
# Import required modules | |
import skimage.io as io | |
import numpy as np | |
from sklearn.feature_selection import RFE | |
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest | |
from sklearn.externals import joblib | |
from sklearn.model_selection import train_test_split | |
import numpy.ma as ma | |
import os, shutil | |
def Classification(): | |
raster = path_to_tiff + ".tif" | |
samples = path_to_tiff + "_samples.tif" | |
tfw_old = path_to_tiff + ".tfw" | |
classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif" | |
# read Landsat data as TIF | |
img_ds = io.imread(raster) | |
img = np.array(img_ds, dtype='uint16') | |
# read training samples as TIF with same dimensions as the Landsat image | |
roi_ds = io.imread(samples) | |
roi = np.array(roi_ds, dtype='uint8') | |
labels = np.unique(roi[roi > 0]) | |
print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels)) | |
X = img[roi > 0, :] | |
Y = roi[roi > 0] | |
# splitting of training & test data in 80% - 20% for outlier analysis | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None) | |
iso = IsolationForest(n_estimators = 500, | |
max_samples = 1.0, | |
contamination = 0.001, | |
max_features = 1.0).fit(X_train) | |
# Outliers are flagged and labeled as "-1" | |
X_train_outlier = iso.predict(X_train) | |
mask = ma.masked_equal(X_train_outlier, -1) | |
Y_mask = mask * Y_train | |
# further splitting of new training data, cleaned from outliers in 80% - 20% | |
X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None) | |
gbt = GradientBoostingClassifier(n_estimators = 100, | |
min_samples_leaf = 1, | |
min_samples_split = 10, | |
max_depth = 10, | |
max_features = 'sqrt', | |
learning_rate = 0.1, | |
subsample = 0.8, | |
random_state = None, | |
warm_start = True).fit(X_train_new, Y_train_new) | |
svm = SVC(C = 1.0, | |
kernel = 'rbf', | |
gamma = 1.0, | |
tol = 0.005, | |
probability = True, | |
class_weight = 'balanced', | |
cache_size = 8000, | |
decision_function_shape = 'ovr', | |
max_iter = 1000).fit(X_train_new, Y_train_new) | |
# Voting classifier for Gradient Boosting and SVM | |
clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train) | |
# Feature Importances of the Gradient Boosting classifier | |
print gbt.feature_importances_ | |
# Feature Selection method, e.g. 10 features/bands | |
selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new) | |
print selector.score(X_test_new, Y_test_new), raster | |
# save the classification model | |
model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl" | |
joblib.dump(selector, model) | |
# call the classification model | |
clf = joblib.load(model) | |
# reshaping of the array with 10 features/bands | |
new_arr = (img.shape[0] * img.shape[1], img.shape[2]) | |
new_img = img[:, :, :10].reshape(new_arr) | |
row = img.shape[0] | |
col = img.shape[1] | |
# calculating classification probability, e.g. in this case with 7 classes | |
class_estimation = clf.predict_proba(new_img) | |
for n in range(0,7): | |
idx = str(n + 1) | |
class_prob = prob[:, n] | |
class_prob = class_prob.reshape((row, col)) | |
probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif" | |
io.imsave(probability, class_prob) | |
tfw_new = probability.split(".tif")[0] + ".tfw" | |
shutil.copy(tfw_old, tfw_new) | |
# saving the classification results | |
class_prediction = clf.predict(new_img) | |
class_prediction = class_prediction.reshape(img[:, :, 0].shape) | |
io.imsave(classification, class_prediction) | |
tfw_new = classification.split(".tif")[0] + ".tfw" | |
shutil.copy(tfw_old, tfw_new) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment