Skip to content

Instantly share code, notes, and snippets.

@floew2
Last active August 5, 2023 14:48
Show Gist options
  • Save floew2/a42906e7e2ac0444ac0c909f93b82a58 to your computer and use it in GitHub Desktop.
Save floew2/a42906e7e2ac0444ac0c909f93b82a58 to your computer and use it in GitHub Desktop.
A workflow for classifying Landsat satellite images
'''
A workflow for classifying Landsat remote sensing imagery
involves utilizing soft voting with SVM and Gradient Boosting classifiers.
The Isolation Forest algorithm is applied to identify outliers in the
training data. Feature selection is performed through a Recursive Feature
Elimination approach. The outcome consists of classification and classification
probability raster images saved in TIF format.
Author: Fabian Löw
Date: June 2023
'''
# Import required modules
import skimage.io as io
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
import numpy.ma as ma
import os, shutil
def Classification():
raster = path_to_tiff + ".tif"
samples = path_to_tiff + "_samples.tif"
tfw_old = path_to_tiff + ".tfw"
classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"
# read Landsat data as TIF
img_ds = io.imread(raster)
img = np.array(img_ds, dtype='uint16')
# read training samples as TIF with same dimensions as the Landsat image
roi_ds = io.imread(samples)
roi = np.array(roi_ds, dtype='uint8')
labels = np.unique(roi[roi > 0])
print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))
X = img[roi > 0, :]
Y = roi[roi > 0]
# splitting of training & test data in 80% - 20% for outlier analysis
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
iso = IsolationForest(n_estimators = 500,
max_samples = 1.0,
contamination = 0.001,
max_features = 1.0).fit(X_train)
# Outliers are flagged and labeled as "-1"
X_train_outlier = iso.predict(X_train)
mask = ma.masked_equal(X_train_outlier, -1)
Y_mask = mask * Y_train
# further splitting of new training data, cleaned from outliers in 80% - 20%
X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)
gbt = GradientBoostingClassifier(n_estimators = 100,
min_samples_leaf = 1,
min_samples_split = 10,
max_depth = 10,
max_features = 'sqrt',
learning_rate = 0.1,
subsample = 0.8,
random_state = None,
warm_start = True).fit(X_train_new, Y_train_new)
svm = SVC(C = 1.0,
kernel = 'rbf',
gamma = 1.0,
tol = 0.005,
probability = True,
class_weight = 'balanced',
cache_size = 8000,
decision_function_shape = 'ovr',
max_iter = 1000).fit(X_train_new, Y_train_new)
# Voting classifier for Gradient Boosting and SVM
clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)
# Feature Importances of the Gradient Boosting classifier
print gbt.feature_importances_
# Feature Selection method, e.g. 10 features/bands
selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
print selector.score(X_test_new, Y_test_new), raster
# save the classification model
model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
joblib.dump(selector, model)
# call the classification model
clf = joblib.load(model)
# reshaping of the array with 10 features/bands
new_arr = (img.shape[0] * img.shape[1], img.shape[2])
new_img = img[:, :, :10].reshape(new_arr)
row = img.shape[0]
col = img.shape[1]
# calculating classification probability, e.g. in this case with 7 classes
class_estimation = clf.predict_proba(new_img)
for n in range(0,7):
idx = str(n + 1)
class_prob = prob[:, n]
class_prob = class_prob.reshape((row, col))
probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
io.imsave(probability, class_prob)
tfw_new = probability.split(".tif")[0] + ".tfw"
shutil.copy(tfw_old, tfw_new)
# saving the classification results
class_prediction = clf.predict(new_img)
class_prediction = class_prediction.reshape(img[:, :, 0].shape)
io.imsave(classification, class_prediction)
tfw_new = classification.split(".tif")[0] + ".tfw"
shutil.copy(tfw_old, tfw_new)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment