Last active
April 28, 2019 20:55
-
-
Save mark-szabo/2d828615098d64971435dfae8988c0db to your computer and use it in GitHub Desktop.
Preprocessing camera images for MNIST-based neural networks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using SixLabors.ImageSharp; | |
using SixLabors.ImageSharp.PixelFormats; | |
using SixLabors.ImageSharp.Processing; | |
using SixLabors.Primitives; | |
using System; | |
using System.IO; | |
namespace ImagePreprocessingService | |
{ | |
public class MNISTPreprocessor | |
{ | |
private static readonly Rgba32 _backgroundColor = Rgba32.White; | |
private static readonly Rgba32 _foregroundColor = Rgba32.Black; | |
/// <summary> | |
/// Preprocess camera images for MNIST-based neural networks. | |
/// </summary> | |
/// <param name="image">Source image in a byte array.</param> | |
/// <returns>Preprocessed image in a byte array.</returns> | |
public static byte[] Preprocess(byte[] input) | |
{ | |
Image<Rgba32> image = Image.Load(input); | |
image = Preprocess(image); | |
var stream = new MemoryStream(); | |
image.SaveAsPng(stream); | |
return stream.ToArray(); | |
} | |
/// <summary> | |
/// Preprocess camera images for MNIST-based neural networks. | |
/// </summary> | |
/// <param name="image">Source image in a file format agnostic structure in memory as a series of Rgba32 pixels.</param> | |
/// <returns>Preprocessed image in a file format agnostic structure in memory as a series of Rgba32 pixels.</returns> | |
public static Image<Rgba32> Preprocess(Image<Rgba32> image) | |
{ | |
// Step 1: Apply a grayscale filter | |
image.Mutate(i => i.Grayscale()); | |
// Step 2: Apply a white vignette on the corners to remove shadow marks | |
image.Mutate(i => i.Vignette(Rgba32.White)); | |
// Step 3: Separate foreground and background with a threshold and set the correct colors | |
image.Mutate(i => i.BinaryThreshold(0.6f, _backgroundColor, _foregroundColor)); | |
// Step 4: Crop to bounding box | |
var boundingBox = FindBoundingBox(image); | |
image.Mutate(i => i.Crop(boundingBox)); | |
// Step 5: Make the image a square | |
var maxWidthHeight = Math.Max(image.Width, image.Height); | |
image.Mutate(i => i.Pad(maxWidthHeight, maxWidthHeight).BackgroundColor(_backgroundColor)); | |
// Step 6: Downscale to 20x20 | |
image.Mutate(i => i.Resize(20, 20)); | |
// Step 7: Add 4 pixel margin | |
image.Mutate(i => i.Pad(28, 28).BackgroundColor(_backgroundColor)); | |
return image; | |
} | |
private static Rectangle FindBoundingBox(Image<Rgba32> image) | |
{ | |
// ➡ | |
var topLeftX = F(0, 0, x => x < image.Width, y => y < image.Height, true, 1); | |
// ⬇ | |
var topLeftY = F(0, 0, y => y < image.Height, x => x < image.Width, false, 1); | |
// ⬅ | |
var bottomRightX = F(image.Width - 1, image.Height - 1, x => x >= 0, y => y >= 0, true, -1); | |
// ⬆ | |
var bottomRightY = F(image.Height - 1, image.Width - 1, y => y >= 0, x => x >= 0, false, -1); | |
return new Rectangle(topLeftX, topLeftY, bottomRightX - topLeftX, bottomRightY - topLeftY); | |
int F(int coordinateI, int coordinateJ, Func<int, bool> comparerI, Func<int, bool> comparerJ, bool horizontal, int increment) | |
{ | |
var limit = 0; | |
for (int i = coordinateI; comparerI(i); i += increment) | |
{ | |
bool foundForegroundPixel = false; | |
for (int j = coordinateJ; comparerJ(j); j += increment) | |
{ | |
var pixel = horizontal ? image[i, j] : image[j, i]; | |
if (pixel != _backgroundColor) | |
{ | |
foundForegroundPixel = true; | |
break; | |
} | |
} | |
if (foundForegroundPixel) break; | |
limit = i; | |
} | |
return limit; | |
} | |
} | |
public static int[] ConvertImageToArray(Image<Rgba32> image) | |
{ | |
var pixels = new int[784]; | |
var i = 0; | |
for (int j = 0; j < image.Height; j++) | |
{ | |
for (int k = 0; k < image.Width; k++) | |
{ | |
pixels[i] = 255 - ((image[k, j].R + image[k, j].G + image[k, j].B) / 3); | |
i++; | |
} | |
} | |
return pixels; | |
} | |
private void PrintToConsole(Image<Rgba32> image) | |
{ | |
var pixels = ConvertImageToArray(image); | |
for (int i = 0; i < 784; i++) | |
{ | |
Console.Write(pixels[i].ToString("D3")); | |
if ((i + 1) % 28 == 0) Console.WriteLine(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment