-
-
Save HybridDog/dd95a99d411972f030fb18543280ad60 to your computer and use it in GitHub Desktop.
// SPDX-License-Identifier: MIT | |
// | |
// This program contains an implementation of SSIM-based perceptual image | |
// downscaling for PNG images. | |
// The program can be twice as fast when compiled with -Ofast. | |
// The program behaviour can be adjusted with predefined preprocessor macros: | |
// * -DTILEABLE: Assume images wrap around at corners. This should be enabled | |
// when downscaling tileable textures. | |
// * -DGAMMA_INCORRECT: Downscale without applying the sRGB EOTF and OETF. | |
// When downscaling images with symbolic meaning, e.g. screenshots of text or | |
// function plots, disabled gamma correction can look subjectively better. | |
// | |
// The algorithm is based on "Perceptually Based Downscaling of Images" | |
// by A. Cengiz Öztireli and Markus Gross | |
// https://www.cl.cam.ac.uk/~aco41/Files/Sig15PerceptualDownscaling.pdf | |
#include <stdlib.h> // malloc, EXIT_* | |
#include <string.h> // memset | |
#include <math.h> | |
#include <png.h> | |
#define SQR_NP 2 // squareroot of the patch size, recommended: 2 | |
#define EXIT_PNG(F) if (!F) { \ | |
fprintf(stderr, "%s\n", bild.message); \ | |
return EXIT_FAILURE; \ | |
} | |
#define CLAMP(V, A, B) ((V) < (A) ? (A) : (V) > (B) ? (B) : (V)) | |
#define MIN(V, R) ((V) < (R) ? (V) : (R)) | |
#define MAX(V, R) ((V) > (R) ? (V) : (R)) | |
#define INDEX(X, Y, STRIDE) ((Y) * (STRIDE) + (X)) | |
#define u8 unsigned char | |
#define f32 float | |
struct pixel { | |
u8 r; | |
u8 g; | |
u8 b; | |
u8 a; | |
}; | |
#define PIXELBYTES 4 | |
struct matrix { | |
int w; | |
int h; | |
f32 *data; | |
}; | |
struct image { | |
int w; | |
int h; | |
struct pixel *pixels; | |
}; | |
#if !GAMMA_INCORRECT | |
/*! \brief linear to sRGB conversion | |
* | |
* taken from https://github.com/tobspr/GLSL-Color-Spaces/ | |
*/ | |
f32 linear_to_srgb(f32 v) | |
{ | |
if (v > 0.0031308f) | |
return 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; | |
return 12.92f * v; | |
} | |
f32 srgb_to_linear(f32 v) | |
{ | |
if (v > 0.04045f) | |
return powf((v + 0.055f) / 1.055f, 2.4f); | |
return v / 12.92f; | |
} | |
/*! \brief sRGB to linear table for a slight performance increase | |
*/ | |
static f32 *srgb2lin; | |
static void get_srgb2lin_map() | |
{ | |
srgb2lin = malloc(256 * sizeof(f32)); | |
f32 divider = 1.0f / 255.0f; | |
for (int i = 0; i < 256; ++i) | |
srgb2lin[i] = srgb_to_linear(i * divider); | |
} | |
#endif // !GAMMA_INCORRECT | |
/*! \brief get y, cb and cr values each in [0;1] from u8 r, g and b values | |
* | |
* there's gamma correction, | |
* see http://www.ericbrasseur.org/gamma.html?i=1#Assume_a_gamma_of_2.2 | |
* 0.5 is added to cb and cr to have them in [0;1] | |
*/ | |
static void rgb2ycbcr(u8 or, u8 og, u8 ob, f32 *y, f32 *cb, f32 *cr) | |
{ | |
#if GAMMA_INCORRECT | |
f32 r = or / 255.0f; | |
f32 g = og / 255.0f; | |
f32 b = ob / 255.0f; | |
#else | |
f32 r = srgb2lin[or]; | |
f32 g = srgb2lin[og]; | |
f32 b = srgb2lin[ob]; | |
#endif | |
*y = (0.299f * r + 0.587f * g + 0.114f * b); | |
*cb = (-0.168736f * r - 0.331264f * g + 0.5f * b) + 0.5f; | |
*cr = (0.5f * r - 0.418688f * g - 0.081312f * b) + 0.5f; | |
} | |
/*! \brief the inverse of the function above | |
* | |
* numbers from http://www.equasys.de/colorconversion.html | |
* if values are too big or small, they're clamped | |
*/ | |
static void ycbcr2rgb(f32 y, f32 cb, f32 cr, u8 *r, u8 *g, u8 *b) | |
{ | |
f32 vr = (y + 1.402f * (cr - 0.5f)); | |
f32 vg = (y - 0.344136f * (cb - 0.5f) - 0.714136f * (cr - 0.5f)); | |
f32 vb = (y + 1.772f * (cb - 0.5f)); | |
#if !GAMMA_INCORRECT | |
vr = linear_to_srgb(vr); | |
vg = linear_to_srgb(vg); | |
vb = linear_to_srgb(vb); | |
#endif | |
*r = CLAMP(vr * 255.0f, 0, 255); | |
*g = CLAMP(vg * 255.0f, 0, 255); | |
*b = CLAMP(vb * 255.0f, 0, 255); | |
} | |
/*! \brief Convert an rgba image to 4 ycbcr matrices with values in [0, 1] | |
*/ | |
static struct matrix *image_to_matrices(struct image *bild) | |
{ | |
int w = bild->w; | |
int h = bild->h; | |
struct matrix *matrices = malloc( | |
PIXELBYTES * sizeof(struct matrix)); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
matrices[i].w = w; | |
matrices[i].h = h; | |
matrices[i].data = malloc(w * h * sizeof(f32)); | |
} | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel px = bild->pixels[i]; | |
// put y, cb, cr and transpatency into the matrices | |
rgb2ycbcr(px.r, px.g, px.b, | |
&matrices[0].data[i], &matrices[1].data[i], &matrices[2].data[i]); | |
f32 divider = 1.0f / 255.0f; | |
matrices[3].data[i] = px.a * divider; | |
} | |
return matrices; | |
} | |
/*! \brief Convert 4 matrices to an rgba image | |
* | |
* Note that matrices becomes freed. | |
*/ | |
static struct image *matrices_to_image(struct matrix *matrices) | |
{ | |
struct image *bild = malloc(sizeof(struct image)); | |
int w = matrices[0].w; | |
int h = matrices[0].h; | |
bild->w = w; | |
bild->h = h; | |
struct pixel *pixels = malloc(w * h * PIXELBYTES); | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel *px = &pixels[i]; | |
ycbcr2rgb(matrices[0].data[i], matrices[1].data[i], matrices[2].data[i], | |
&px->r, &px->g, &px->b); | |
f32 a = matrices[3].data[i] * 255; | |
px->a = CLAMP(a, 0, 255); | |
} | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
free(matrices[i].data); | |
} | |
free(matrices); | |
bild->pixels = pixels; | |
return bild; | |
} | |
/*! \brief The actual downscaling algorithm | |
* | |
* \param mat The 4 matrices obtained form image_to_matrices. | |
* \param s The factor by which the image should become downscaled. | |
*/ | |
static void downscale_perc(struct matrix *mat, int s) | |
{ | |
// preparation | |
int w = mat->w; // input width | |
int h = mat->h; | |
f32 *input = mat->data; | |
int w2 = w / s; // output width | |
int h2 = h / s; | |
int input_size = w * h * sizeof(f32); | |
int output_size = input_size / (s * s); | |
//~ fprintf(stderr, "w, h, s: %d, %d, %d\n", w,h,s); | |
f32 *l = malloc(output_size); | |
f32 *l2 = malloc(output_size); | |
f32 *m_all = malloc(output_size); | |
f32 *r_all = malloc(output_size); | |
f32 *d = malloc(output_size); | |
// get l and l2, the input image and it's size are used only here | |
f32 divider_s = 1.0f / (s * s); | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
// x_start and y_start are coordinates for the subsampled image | |
int x = x_start * s; | |
int y = y_start * s; | |
f32 acc = 0; | |
f32 acc2 = 0; | |
for (int yc = y; yc < y + s; ++yc) { | |
for (int xc = x; xc < x + s; ++xc) { | |
// xc, yc are always inside bounds | |
f32 v = input[INDEX(xc, yc, w)]; | |
acc += v; | |
acc2 += v * v; | |
} | |
} | |
int i = INDEX(x_start, y_start, w2); | |
l[i] = acc * divider_s; | |
l2[i] = acc2 * divider_s; | |
} | |
} | |
f32 patch_sz_div = 1.0f / (SQR_NP * SQR_NP); | |
// Calculate m and r for all patch offsets | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
f32 acc_m = 0; | |
f32 acc_r_1 = 0; | |
f32 acc_r_2 = 0; | |
for (int y = y_start; y < y_start + SQR_NP; ++y) { | |
for (int x = x_start; x < x_start + SQR_NP; ++x) { | |
int xi = x; | |
int yi = y; | |
#if TILEABLE | |
xi = xi % w2; | |
yi = yi % h2; | |
#else | |
xi = MIN(xi, w2-1); | |
yi = MIN(yi, h2-1); | |
#endif | |
int i = INDEX(xi, yi, w2); | |
acc_m += l[i]; | |
acc_r_1 += l[i] * l[i]; | |
acc_r_2 += l2[i]; | |
} | |
} | |
f32 mv = acc_m * patch_sz_div; | |
f32 slv = acc_r_1 * patch_sz_div - mv * mv; | |
f32 shv = acc_r_2 * patch_sz_div - mv * mv; | |
int i = INDEX(x_start, y_start, w2); | |
m_all[i] = mv; | |
if (slv >= 0.000001f) // epsilon is 10⁻⁶ | |
r_all[i] = sqrtf(shv / slv); | |
else | |
r_all[i] = 2.0f; | |
} | |
} | |
// Calculate the average of the results of all possible patch sets | |
// d is the output | |
for (int y = 0; y < h2; ++y) { | |
for (int x = 0; x < w2; ++x) { | |
int i = INDEX(x, y, w2); | |
f32 liner_scaled = l[i]; | |
f32 acc_d = 0; | |
for (int y_offset = 0; y_offset > -SQR_NP; --y_offset) { | |
for (int x_offset = 0; x_offset > -SQR_NP; --x_offset) { | |
int x_patch_off = x + x_offset; | |
int y_patch_off = y + y_offset; | |
#if TILEABLE | |
x_patch_off = (x_patch_off + w2) % w2; | |
y_patch_off = (y_patch_off + h2) % h2; | |
#else | |
x_patch_off = MAX(x_patch_off, 0); | |
y_patch_off = MAX(y_patch_off, 0); | |
#endif | |
int i_patch_off = INDEX(x_patch_off, y_patch_off, w2); | |
f32 mv = m_all[i_patch_off]; | |
f32 rv = r_all[i_patch_off]; | |
acc_d += mv + rv * liner_scaled - rv * mv; | |
} | |
} | |
d[i] = acc_d * patch_sz_div; | |
} | |
} | |
// update the matrix | |
mat->data = d; | |
mat->w = w2; | |
mat->h = h2; | |
// tidy up | |
free(input); | |
free(l); | |
free(l2); | |
free(m_all); | |
free(r_all); | |
} | |
/*! \brief Function which calls functions for downscaling | |
* | |
* \param bild The image, it's content is changed when finished. | |
* \param downscale_factor Must be a natural number. | |
*/ | |
void downscale_an_image(struct image **bild, int downscale_factor) | |
{ | |
struct matrix *matrices = image_to_matrices(*bild); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
downscale_perc(&(matrices[i]), downscale_factor); | |
} | |
*bild = matrices_to_image(matrices); | |
} | |
int main(int argc, char **args) | |
{ | |
if (argc != 2) { | |
fprintf(stderr, "Missing arguments, usage: <cmdname> " | |
"<downscaling_factor>\n" | |
"A png image is read from Stdin and written to Stdout.\n"); | |
return EXIT_FAILURE; | |
} | |
int downscaling_factor = atoi(args[1]); | |
if (downscaling_factor < 2) { | |
fprintf(stderr, "Invalid downscaling factor: %d\n", | |
downscaling_factor); | |
return EXIT_FAILURE; | |
} | |
png_image bild; | |
memset(&bild, 0, sizeof(bild)); | |
bild.version = PNG_IMAGE_VERSION; | |
EXIT_PNG(png_image_begin_read_from_stdio(&bild, stdin)) | |
int w = bild.width; | |
int h = bild.height; | |
bild.format = PNG_FORMAT_RGBA; | |
struct pixel *pixels = malloc(w * h * 4); | |
EXIT_PNG(png_image_finish_read(&bild, NULL, pixels, 0, NULL)) | |
if (w % downscaling_factor || h % downscaling_factor) { | |
fprintf(stderr, "Image size is not a multiple of the downscaling " | |
"factor; %d,%d pixels will be discarded from the right,bottom " | |
"borders\n", w % downscaling_factor, h % downscaling_factor); | |
} | |
#if !GAMMA_INCORRECT | |
get_srgb2lin_map(); | |
#endif | |
struct image origpic = {w = w, h = h, pixels = pixels}; | |
struct image *newpic = &origpic; | |
downscale_an_image(&newpic, downscaling_factor); | |
bild.width = newpic->w; | |
bild.height = newpic->h; | |
free(pixels); | |
pixels = newpic->pixels; | |
free(newpic); | |
EXIT_PNG(png_image_write_to_stdio(&bild, stdout, 0, pixels, 0, NULL)); | |
free(pixels); // redundant free to feed valgrind | |
return EXIT_SUCCESS; | |
} |
Awesome! Yeah, I ended up rewriting it in C# for Unity and customizing a bunch of it (mostly to make it more appropriate for data textures). I've reached out to the original author of the paper since I saw someone mentioning a patent, but I haven't been able to find any other evidence of that?
This+SSAA might be excellent for VR purposes actually, someone should give that a shot. I forget what exactly SteamVR uses.
Would you mind slapping a MIT license precursor up at the top just to square everything away?
Just so you're aware, this does appear to be patented, held by a company called Percim: https://percim.com/#intro
I've added an SPDX-License-Identifier line to specify that it has MIT license now.
Just so you're aware, this does appear to be patented, held by a company called Percim: https://percim.com/#intro
I think it's the IMAGE PROCESSING SYSTEM FOR DOWNSCALING IMAGES USING PERCEPTUAL DOWNSCALING METHOD WO2017017584A1 patent.
I haven't read the whole patent and don't fully understand the WHAT IS CLAIMED IS section on page 29.
It first mentions downscaling an input image to a second image and then upscaling this second image such that the result has a third of the resolution of the input image, which sounds different than the algorithm I have implemented.
It also mentions a recursive adjustment of values until an image perception value matches a value within a threshold, which sounds like an iterative optimisation, which is also not part of my implementation.
Perhaps the patent covers only a subset of the possible ways to implement the downscaling.
Ultimately I ended up switching over to implementing this: https://dl.acm.org/doi/pdf/10.1145/2980179.2980239 since it's creative-commons/implementation is BSD 3-Clause.
Yes, I'd be glad to see more widespread use of the algorithm.
I've also modified it for mipmap generation in Minetest: luanti-org/luanti#14144
If I remember correctly, my first intent was to have a good image downscaling algorithm for myself, so I've implemented it in C and later uploaded it to this gist.
In my opinion the algorithm also works well for SSAA, so I've also implemented it in GLSL for Minetest: luanti-org/luanti#13959
The algorithm is explained in this paper: "Perceptually Based Downscaling of Images" by A. Cengiz Öztireli and Markus Gross