Last active
June 26, 2024 18:29
-
-
Save HybridDog/dd95a99d411972f030fb18543280ad60 to your computer and use it in GitHub Desktop.
SSIM-based perceptual image downscaling for PNG images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SPDX-License-Identifier: MIT | |
// | |
// This program contains an implementation of SSIM-based perceptual image | |
// downscaling for PNG images. | |
// The program can be twice as fast when compiled with -Ofast. | |
// The program behaviour can be adjusted with predefined preprocessor macros: | |
// * -DTILEABLE: Assume images wrap around at corners. This should be enabled | |
// when downscaling tileable textures. | |
// * -DGAMMA_INCORRECT: Downscale without applying the sRGB EOTF and OETF. | |
// When downscaling images with symbolic meaning, e.g. screenshots of text or | |
// function plots, disabled gamma correction can look subjectively better. | |
// | |
// The algorithm is based on "Perceptually Based Downscaling of Images" | |
// by A. Cengiz Öztireli and Markus Gross | |
// https://www.cl.cam.ac.uk/~aco41/Files/Sig15PerceptualDownscaling.pdf | |
#include <stdlib.h> // malloc, EXIT_* | |
#include <string.h> // memset | |
#include <math.h> | |
#include <png.h> | |
#define SQR_NP 2 // squareroot of the patch size, recommended: 2 | |
#define EXIT_PNG(F) if (!F) { \ | |
fprintf(stderr, "%s\n", bild.message); \ | |
return EXIT_FAILURE; \ | |
} | |
#define CLAMP(V, A, B) ((V) < (A) ? (A) : (V) > (B) ? (B) : (V)) | |
#define MIN(V, R) ((V) < (R) ? (V) : (R)) | |
#define MAX(V, R) ((V) > (R) ? (V) : (R)) | |
#define INDEX(X, Y, STRIDE) ((Y) * (STRIDE) + (X)) | |
#define u8 unsigned char | |
#define f32 float | |
struct pixel { | |
u8 r; | |
u8 g; | |
u8 b; | |
u8 a; | |
}; | |
#define PIXELBYTES 4 | |
struct matrix { | |
int w; | |
int h; | |
f32 *data; | |
}; | |
struct image { | |
int w; | |
int h; | |
struct pixel *pixels; | |
}; | |
#if !GAMMA_INCORRECT | |
/*! \brief linear to sRGB conversion | |
* | |
* taken from https://github.com/tobspr/GLSL-Color-Spaces/ | |
*/ | |
f32 linear_to_srgb(f32 v) | |
{ | |
if (v > 0.0031308f) | |
return 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; | |
return 12.92f * v; | |
} | |
f32 srgb_to_linear(f32 v) | |
{ | |
if (v > 0.04045f) | |
return powf((v + 0.055f) / 1.055f, 2.4f); | |
return v / 12.92f; | |
} | |
/*! \brief sRGB to linear table for a slight performance increase | |
*/ | |
static f32 *srgb2lin; | |
static void get_srgb2lin_map() | |
{ | |
srgb2lin = malloc(256 * sizeof(f32)); | |
f32 divider = 1.0f / 255.0f; | |
for (int i = 0; i < 256; ++i) | |
srgb2lin[i] = srgb_to_linear(i * divider); | |
} | |
#endif // !GAMMA_INCORRECT | |
/*! \brief get y, cb and cr values each in [0;1] from u8 r, g and b values | |
* | |
* there's gamma correction, | |
* see http://www.ericbrasseur.org/gamma.html?i=1#Assume_a_gamma_of_2.2 | |
* 0.5 is added to cb and cr to have them in [0;1] | |
*/ | |
static void rgb2ycbcr(u8 or, u8 og, u8 ob, f32 *y, f32 *cb, f32 *cr) | |
{ | |
#if GAMMA_INCORRECT | |
f32 r = or / 255.0f; | |
f32 g = og / 255.0f; | |
f32 b = ob / 255.0f; | |
#else | |
f32 r = srgb2lin[or]; | |
f32 g = srgb2lin[og]; | |
f32 b = srgb2lin[ob]; | |
#endif | |
*y = (0.299f * r + 0.587f * g + 0.114f * b); | |
*cb = (-0.168736f * r - 0.331264f * g + 0.5f * b) + 0.5f; | |
*cr = (0.5f * r - 0.418688f * g - 0.081312f * b) + 0.5f; | |
} | |
/*! \brief the inverse of the function above | |
* | |
* numbers from http://www.equasys.de/colorconversion.html | |
* if values are too big or small, they're clamped | |
*/ | |
static void ycbcr2rgb(f32 y, f32 cb, f32 cr, u8 *r, u8 *g, u8 *b) | |
{ | |
f32 vr = (y + 1.402f * (cr - 0.5f)); | |
f32 vg = (y - 0.344136f * (cb - 0.5f) - 0.714136f * (cr - 0.5f)); | |
f32 vb = (y + 1.772f * (cb - 0.5f)); | |
#if !GAMMA_INCORRECT | |
vr = linear_to_srgb(vr); | |
vg = linear_to_srgb(vg); | |
vb = linear_to_srgb(vb); | |
#endif | |
*r = CLAMP(vr * 255.0f, 0, 255); | |
*g = CLAMP(vg * 255.0f, 0, 255); | |
*b = CLAMP(vb * 255.0f, 0, 255); | |
} | |
/*! \brief Convert an rgba image to 4 ycbcr matrices with values in [0, 1] | |
*/ | |
static struct matrix *image_to_matrices(struct image *bild) | |
{ | |
int w = bild->w; | |
int h = bild->h; | |
struct matrix *matrices = malloc( | |
PIXELBYTES * sizeof(struct matrix)); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
matrices[i].w = w; | |
matrices[i].h = h; | |
matrices[i].data = malloc(w * h * sizeof(f32)); | |
} | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel px = bild->pixels[i]; | |
// put y, cb, cr and transpatency into the matrices | |
rgb2ycbcr(px.r, px.g, px.b, | |
&matrices[0].data[i], &matrices[1].data[i], &matrices[2].data[i]); | |
f32 divider = 1.0f / 255.0f; | |
matrices[3].data[i] = px.a * divider; | |
} | |
return matrices; | |
} | |
/*! \brief Convert 4 matrices to an rgba image | |
* | |
* Note that matrices becomes freed. | |
*/ | |
static struct image *matrices_to_image(struct matrix *matrices) | |
{ | |
struct image *bild = malloc(sizeof(struct image)); | |
int w = matrices[0].w; | |
int h = matrices[0].h; | |
bild->w = w; | |
bild->h = h; | |
struct pixel *pixels = malloc(w * h * PIXELBYTES); | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel *px = &pixels[i]; | |
ycbcr2rgb(matrices[0].data[i], matrices[1].data[i], matrices[2].data[i], | |
&px->r, &px->g, &px->b); | |
f32 a = matrices[3].data[i] * 255; | |
px->a = CLAMP(a, 0, 255); | |
} | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
free(matrices[i].data); | |
} | |
free(matrices); | |
bild->pixels = pixels; | |
return bild; | |
} | |
/*! \brief The actual downscaling algorithm | |
* | |
* \param mat The 4 matrices obtained form image_to_matrices. | |
* \param s The factor by which the image should become downscaled. | |
*/ | |
static void downscale_perc(struct matrix *mat, int s) | |
{ | |
// preparation | |
int w = mat->w; // input width | |
int h = mat->h; | |
f32 *input = mat->data; | |
int w2 = w / s; // output width | |
int h2 = h / s; | |
int input_size = w * h * sizeof(f32); | |
int output_size = input_size / (s * s); | |
//~ fprintf(stderr, "w, h, s: %d, %d, %d\n", w,h,s); | |
f32 *l = malloc(output_size); | |
f32 *l2 = malloc(output_size); | |
f32 *m_all = malloc(output_size); | |
f32 *r_all = malloc(output_size); | |
f32 *d = malloc(output_size); | |
// get l and l2, the input image and it's size are used only here | |
f32 divider_s = 1.0f / (s * s); | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
// x_start and y_start are coordinates for the subsampled image | |
int x = x_start * s; | |
int y = y_start * s; | |
f32 acc = 0; | |
f32 acc2 = 0; | |
for (int yc = y; yc < y + s; ++yc) { | |
for (int xc = x; xc < x + s; ++xc) { | |
// xc, yc are always inside bounds | |
f32 v = input[INDEX(xc, yc, w)]; | |
acc += v; | |
acc2 += v * v; | |
} | |
} | |
int i = INDEX(x_start, y_start, w2); | |
l[i] = acc * divider_s; | |
l2[i] = acc2 * divider_s; | |
} | |
} | |
f32 patch_sz_div = 1.0f / (SQR_NP * SQR_NP); | |
// Calculate m and r for all patch offsets | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
f32 acc_m = 0; | |
f32 acc_r_1 = 0; | |
f32 acc_r_2 = 0; | |
for (int y = y_start; y < y_start + SQR_NP; ++y) { | |
for (int x = x_start; x < x_start + SQR_NP; ++x) { | |
int xi = x; | |
int yi = y; | |
#if TILEABLE | |
xi = xi % w2; | |
yi = yi % h2; | |
#else | |
xi = MIN(xi, w2-1); | |
yi = MIN(yi, h2-1); | |
#endif | |
int i = INDEX(xi, yi, w2); | |
acc_m += l[i]; | |
acc_r_1 += l[i] * l[i]; | |
acc_r_2 += l2[i]; | |
} | |
} | |
f32 mv = acc_m * patch_sz_div; | |
f32 slv = acc_r_1 * patch_sz_div - mv * mv; | |
f32 shv = acc_r_2 * patch_sz_div - mv * mv; | |
int i = INDEX(x_start, y_start, w2); | |
m_all[i] = mv; | |
if (slv >= 0.000001f) // epsilon is 10⁻⁶ | |
r_all[i] = sqrtf(shv / slv); | |
else | |
r_all[i] = 2.0f; | |
} | |
} | |
// Calculate the average of the results of all possible patch sets | |
// d is the output | |
for (int y = 0; y < h2; ++y) { | |
for (int x = 0; x < w2; ++x) { | |
int i = INDEX(x, y, w2); | |
f32 liner_scaled = l[i]; | |
f32 acc_d = 0; | |
for (int y_offset = 0; y_offset > -SQR_NP; --y_offset) { | |
for (int x_offset = 0; x_offset > -SQR_NP; --x_offset) { | |
int x_patch_off = x + x_offset; | |
int y_patch_off = y + y_offset; | |
#if TILEABLE | |
x_patch_off = (x_patch_off + w2) % w2; | |
y_patch_off = (y_patch_off + h2) % h2; | |
#else | |
x_patch_off = MAX(x_patch_off, 0); | |
y_patch_off = MAX(y_patch_off, 0); | |
#endif | |
int i_patch_off = INDEX(x_patch_off, y_patch_off, w2); | |
f32 mv = m_all[i_patch_off]; | |
f32 rv = r_all[i_patch_off]; | |
acc_d += mv + rv * liner_scaled - rv * mv; | |
} | |
} | |
d[i] = acc_d * patch_sz_div; | |
} | |
} | |
// update the matrix | |
mat->data = d; | |
mat->w = w2; | |
mat->h = h2; | |
// tidy up | |
free(input); | |
free(l); | |
free(l2); | |
free(m_all); | |
free(r_all); | |
} | |
/*! \brief Function which calls functions for downscaling | |
* | |
* \param bild The image, it's content is changed when finished. | |
* \param downscale_factor Must be a natural number. | |
*/ | |
void downscale_an_image(struct image **bild, int downscale_factor) | |
{ | |
struct matrix *matrices = image_to_matrices(*bild); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
downscale_perc(&(matrices[i]), downscale_factor); | |
} | |
*bild = matrices_to_image(matrices); | |
} | |
int main(int argc, char **args) | |
{ | |
if (argc != 2) { | |
fprintf(stderr, "Missing arguments, usage: <cmdname> " | |
"<downscaling_factor>\n" | |
"A png image is read from Stdin and written to Stdout.\n"); | |
return EXIT_FAILURE; | |
} | |
int downscaling_factor = atoi(args[1]); | |
if (downscaling_factor < 2) { | |
fprintf(stderr, "Invalid downscaling factor: %d\n", | |
downscaling_factor); | |
return EXIT_FAILURE; | |
} | |
png_image bild; | |
memset(&bild, 0, sizeof(bild)); | |
bild.version = PNG_IMAGE_VERSION; | |
EXIT_PNG(png_image_begin_read_from_stdio(&bild, stdin)) | |
int w = bild.width; | |
int h = bild.height; | |
bild.format = PNG_FORMAT_RGBA; | |
struct pixel *pixels = malloc(w * h * 4); | |
EXIT_PNG(png_image_finish_read(&bild, NULL, pixels, 0, NULL)) | |
if (w % downscaling_factor || h % downscaling_factor) { | |
fprintf(stderr, "Image size is not a multiple of the downscaling " | |
"factor; %d,%d pixels will be discarded from the right,bottom " | |
"borders\n", w % downscaling_factor, h % downscaling_factor); | |
} | |
#if !GAMMA_INCORRECT | |
get_srgb2lin_map(); | |
#endif | |
struct image origpic = {w = w, h = h, pixels = pixels}; | |
struct image *newpic = &origpic; | |
downscale_an_image(&newpic, downscaling_factor); | |
bild.width = newpic->w; | |
bild.height = newpic->h; | |
free(pixels); | |
pixels = newpic->pixels; | |
free(newpic); | |
EXIT_PNG(png_image_write_to_stdio(&bild, stdout, 0, pixels, 0, NULL)); | |
free(pixels); // redundant free to feed valgrind | |
return EXIT_SUCCESS; | |
} |
Ultimately I ended up switching over to implementing this: https://dl.acm.org/doi/pdf/10.1145/2980179.2980239 since it's creative-commons/implementation is BSD 3-Clause.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've added an SPDX-License-Identifier line to specify that it has MIT license now.
I think it's the IMAGE PROCESSING SYSTEM FOR DOWNSCALING IMAGES USING PERCEPTUAL DOWNSCALING METHOD WO2017017584A1 patent.
I haven't read the whole patent and don't fully understand the WHAT IS CLAIMED IS section on page 29.
It first mentions downscaling an input image to a second image and then upscaling this second image such that the result has a third of the resolution of the input image, which sounds different than the algorithm I have implemented.
It also mentions a recursive adjustment of values until an image perception value matches a value within a threshold, which sounds like an iterative optimisation, which is also not part of my implementation.
Perhaps the patent covers only a subset of the possible ways to implement the downscaling.