Created
July 23, 2020 14:18
-
-
Save Huud/8e0823fa7be2dcd1bb9f3c418cb94c19 to your computer and use it in GitHub Desktop.
How to calculate the mean and standard deviation of an image dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate mean and std deviation | |
from pathlib import Path | |
import cv2 | |
imageFilesDir = Path(r'C:\your\dataset\dir\here\trainData') | |
files = list(imageFilesDir.rglob('*.png')) | |
# Since the std can't be calculated by simply finding it for each image and averaging like | |
# the mean can be, to get the std we first calculate the overall mean in a first run then | |
# run it again to get the std. | |
mean = np.array([0.,0.,0.]) | |
stdTemp = np.array([0.,0.,0.]) | |
std = np.array([0.,0.,0.]) | |
numSamples = len(files) | |
for i in range(numSamples): | |
im = cv2.imread(str(files[i])) | |
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) | |
im = im.astype(float) / 255. | |
for j in range(3): | |
mean[j] += np.mean(im[:,:,j]) | |
mean = (mean/numSamples) | |
for i in range(numSamples): | |
im = cv2.imread(str(files[i])) | |
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) | |
im = im.astype(float) / 255. | |
for j in range(3): | |
stdTemp[j] += ((im[:,:,j] - mean[j])**2).sum()/(im.shape[0]*im.shape[1]) | |
std = np.sqrt(stdTemp/numSamples) | |
print(mean) | |
print(std) | |
# out: | |
#[0.50707516 0.48654887 0.44091784] | |
#[0.26733429 0.25643846 0.27615047] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment