-
-
Save HybridDog/dd95a99d411972f030fb18543280ad60 to your computer and use it in GitHub Desktop.
// SPDX-License-Identifier: MIT | |
// | |
// This program contains an implementation of SSIM-based perceptual image | |
// downscaling for PNG images. | |
// The program can be twice as fast when compiled with -Ofast. | |
// The program behaviour can be adjusted with predefined preprocessor macros: | |
// * -DTILEABLE: Assume images wrap around at corners. This should be enabled | |
// when downscaling tileable textures. | |
// * -DGAMMA_INCORRECT: Downscale without applying the sRGB EOTF and OETF. | |
// When downscaling images with symbolic meaning, e.g. screenshots of text or | |
// function plots, disabled gamma correction can look subjectively better. | |
// | |
// The algorithm is based on "Perceptually Based Downscaling of Images" | |
// by A. Cengiz Öztireli and Markus Gross | |
// https://www.cl.cam.ac.uk/~aco41/Files/Sig15PerceptualDownscaling.pdf | |
#include <stdlib.h> // malloc, EXIT_* | |
#include <string.h> // memset | |
#include <math.h> | |
#include <png.h> | |
#define SQR_NP 2 // squareroot of the patch size, recommended: 2 | |
#define EXIT_PNG(F) if (!F) { \ | |
fprintf(stderr, "%s\n", bild.message); \ | |
return EXIT_FAILURE; \ | |
} | |
#define CLAMP(V, A, B) ((V) < (A) ? (A) : (V) > (B) ? (B) : (V)) | |
#define MIN(V, R) ((V) < (R) ? (V) : (R)) | |
#define MAX(V, R) ((V) > (R) ? (V) : (R)) | |
#define INDEX(X, Y, STRIDE) ((Y) * (STRIDE) + (X)) | |
#define u8 unsigned char | |
#define f32 float | |
struct pixel { | |
u8 r; | |
u8 g; | |
u8 b; | |
u8 a; | |
}; | |
#define PIXELBYTES 4 | |
struct matrix { | |
int w; | |
int h; | |
f32 *data; | |
}; | |
struct image { | |
int w; | |
int h; | |
struct pixel *pixels; | |
}; | |
#if !GAMMA_INCORRECT | |
/*! \brief linear to sRGB conversion | |
* | |
* taken from https://github.com/tobspr/GLSL-Color-Spaces/ | |
*/ | |
f32 linear_to_srgb(f32 v) | |
{ | |
if (v > 0.0031308f) | |
return 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; | |
return 12.92f * v; | |
} | |
f32 srgb_to_linear(f32 v) | |
{ | |
if (v > 0.04045f) | |
return powf((v + 0.055f) / 1.055f, 2.4f); | |
return v / 12.92f; | |
} | |
/*! \brief sRGB to linear table for a slight performance increase | |
*/ | |
static f32 *srgb2lin; | |
static void get_srgb2lin_map() | |
{ | |
srgb2lin = malloc(256 * sizeof(f32)); | |
f32 divider = 1.0f / 255.0f; | |
for (int i = 0; i < 256; ++i) | |
srgb2lin[i] = srgb_to_linear(i * divider); | |
} | |
#endif // !GAMMA_INCORRECT | |
/*! \brief get y, cb and cr values each in [0;1] from u8 r, g and b values | |
* | |
* there's gamma correction, | |
* see http://www.ericbrasseur.org/gamma.html?i=1#Assume_a_gamma_of_2.2 | |
* 0.5 is added to cb and cr to have them in [0;1] | |
*/ | |
static void rgb2ycbcr(u8 or, u8 og, u8 ob, f32 *y, f32 *cb, f32 *cr) | |
{ | |
#if GAMMA_INCORRECT | |
f32 r = or / 255.0f; | |
f32 g = og / 255.0f; | |
f32 b = ob / 255.0f; | |
#else | |
f32 r = srgb2lin[or]; | |
f32 g = srgb2lin[og]; | |
f32 b = srgb2lin[ob]; | |
#endif | |
*y = (0.299f * r + 0.587f * g + 0.114f * b); | |
*cb = (-0.168736f * r - 0.331264f * g + 0.5f * b) + 0.5f; | |
*cr = (0.5f * r - 0.418688f * g - 0.081312f * b) + 0.5f; | |
} | |
/*! \brief the inverse of the function above | |
* | |
* numbers from http://www.equasys.de/colorconversion.html | |
* if values are too big or small, they're clamped | |
*/ | |
static void ycbcr2rgb(f32 y, f32 cb, f32 cr, u8 *r, u8 *g, u8 *b) | |
{ | |
f32 vr = (y + 1.402f * (cr - 0.5f)); | |
f32 vg = (y - 0.344136f * (cb - 0.5f) - 0.714136f * (cr - 0.5f)); | |
f32 vb = (y + 1.772f * (cb - 0.5f)); | |
#if !GAMMA_INCORRECT | |
vr = linear_to_srgb(vr); | |
vg = linear_to_srgb(vg); | |
vb = linear_to_srgb(vb); | |
#endif | |
*r = CLAMP(vr * 255.0f, 0, 255); | |
*g = CLAMP(vg * 255.0f, 0, 255); | |
*b = CLAMP(vb * 255.0f, 0, 255); | |
} | |
/*! \brief Convert an rgba image to 4 ycbcr matrices with values in [0, 1] | |
*/ | |
static struct matrix *image_to_matrices(struct image *bild) | |
{ | |
int w = bild->w; | |
int h = bild->h; | |
struct matrix *matrices = malloc( | |
PIXELBYTES * sizeof(struct matrix)); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
matrices[i].w = w; | |
matrices[i].h = h; | |
matrices[i].data = malloc(w * h * sizeof(f32)); | |
} | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel px = bild->pixels[i]; | |
// put y, cb, cr and transpatency into the matrices | |
rgb2ycbcr(px.r, px.g, px.b, | |
&matrices[0].data[i], &matrices[1].data[i], &matrices[2].data[i]); | |
f32 divider = 1.0f / 255.0f; | |
matrices[3].data[i] = px.a * divider; | |
} | |
return matrices; | |
} | |
/*! \brief Convert 4 matrices to an rgba image | |
* | |
* Note that matrices becomes freed. | |
*/ | |
static struct image *matrices_to_image(struct matrix *matrices) | |
{ | |
struct image *bild = malloc(sizeof(struct image)); | |
int w = matrices[0].w; | |
int h = matrices[0].h; | |
bild->w = w; | |
bild->h = h; | |
struct pixel *pixels = malloc(w * h * PIXELBYTES); | |
for (int i = 0; i < w * h; ++i) { | |
struct pixel *px = &pixels[i]; | |
ycbcr2rgb(matrices[0].data[i], matrices[1].data[i], matrices[2].data[i], | |
&px->r, &px->g, &px->b); | |
f32 a = matrices[3].data[i] * 255; | |
px->a = CLAMP(a, 0, 255); | |
} | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
free(matrices[i].data); | |
} | |
free(matrices); | |
bild->pixels = pixels; | |
return bild; | |
} | |
/*! \brief The actual downscaling algorithm | |
* | |
* \param mat The 4 matrices obtained form image_to_matrices. | |
* \param s The factor by which the image should become downscaled. | |
*/ | |
static void downscale_perc(struct matrix *mat, int s) | |
{ | |
// preparation | |
int w = mat->w; // input width | |
int h = mat->h; | |
f32 *input = mat->data; | |
int w2 = w / s; // output width | |
int h2 = h / s; | |
int input_size = w * h * sizeof(f32); | |
int output_size = input_size / (s * s); | |
//~ fprintf(stderr, "w, h, s: %d, %d, %d\n", w,h,s); | |
f32 *l = malloc(output_size); | |
f32 *l2 = malloc(output_size); | |
f32 *m_all = malloc(output_size); | |
f32 *r_all = malloc(output_size); | |
f32 *d = malloc(output_size); | |
// get l and l2, the input image and it's size are used only here | |
f32 divider_s = 1.0f / (s * s); | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
// x_start and y_start are coordinates for the subsampled image | |
int x = x_start * s; | |
int y = y_start * s; | |
f32 acc = 0; | |
f32 acc2 = 0; | |
for (int yc = y; yc < y + s; ++yc) { | |
for (int xc = x; xc < x + s; ++xc) { | |
// xc, yc are always inside bounds | |
f32 v = input[INDEX(xc, yc, w)]; | |
acc += v; | |
acc2 += v * v; | |
} | |
} | |
int i = INDEX(x_start, y_start, w2); | |
l[i] = acc * divider_s; | |
l2[i] = acc2 * divider_s; | |
} | |
} | |
f32 patch_sz_div = 1.0f / (SQR_NP * SQR_NP); | |
// Calculate m and r for all patch offsets | |
for (int y_start = 0; y_start < h2; ++y_start) { | |
for (int x_start = 0; x_start < w2; ++x_start) { | |
f32 acc_m = 0; | |
f32 acc_r_1 = 0; | |
f32 acc_r_2 = 0; | |
for (int y = y_start; y < y_start + SQR_NP; ++y) { | |
for (int x = x_start; x < x_start + SQR_NP; ++x) { | |
int xi = x; | |
int yi = y; | |
#if TILEABLE | |
xi = xi % w2; | |
yi = yi % h2; | |
#else | |
xi = MIN(xi, w2-1); | |
yi = MIN(yi, h2-1); | |
#endif | |
int i = INDEX(xi, yi, w2); | |
acc_m += l[i]; | |
acc_r_1 += l[i] * l[i]; | |
acc_r_2 += l2[i]; | |
} | |
} | |
f32 mv = acc_m * patch_sz_div; | |
f32 slv = acc_r_1 * patch_sz_div - mv * mv; | |
f32 shv = acc_r_2 * patch_sz_div - mv * mv; | |
int i = INDEX(x_start, y_start, w2); | |
m_all[i] = mv; | |
if (slv >= 0.000001f) // epsilon is 10⁻⁶ | |
r_all[i] = sqrtf(shv / slv); | |
else | |
r_all[i] = 2.0f; | |
} | |
} | |
// Calculate the average of the results of all possible patch sets | |
// d is the output | |
for (int y = 0; y < h2; ++y) { | |
for (int x = 0; x < w2; ++x) { | |
int i = INDEX(x, y, w2); | |
f32 liner_scaled = l[i]; | |
f32 acc_d = 0; | |
for (int y_offset = 0; y_offset > -SQR_NP; --y_offset) { | |
for (int x_offset = 0; x_offset > -SQR_NP; --x_offset) { | |
int x_patch_off = x + x_offset; | |
int y_patch_off = y + y_offset; | |
#if TILEABLE | |
x_patch_off = (x_patch_off + w2) % w2; | |
y_patch_off = (y_patch_off + h2) % h2; | |
#else | |
x_patch_off = MAX(x_patch_off, 0); | |
y_patch_off = MAX(y_patch_off, 0); | |
#endif | |
int i_patch_off = INDEX(x_patch_off, y_patch_off, w2); | |
f32 mv = m_all[i_patch_off]; | |
f32 rv = r_all[i_patch_off]; | |
acc_d += mv + rv * liner_scaled - rv * mv; | |
} | |
} | |
d[i] = acc_d * patch_sz_div; | |
} | |
} | |
// update the matrix | |
mat->data = d; | |
mat->w = w2; | |
mat->h = h2; | |
// tidy up | |
free(input); | |
free(l); | |
free(l2); | |
free(m_all); | |
free(r_all); | |
} | |
/*! \brief Function which calls functions for downscaling | |
* | |
* \param bild The image, it's content is changed when finished. | |
* \param downscale_factor Must be a natural number. | |
*/ | |
void downscale_an_image(struct image **bild, int downscale_factor) | |
{ | |
struct matrix *matrices = image_to_matrices(*bild); | |
for (int i = 0; i < PIXELBYTES; ++i) { | |
downscale_perc(&(matrices[i]), downscale_factor); | |
} | |
*bild = matrices_to_image(matrices); | |
} | |
int main(int argc, char **args) | |
{ | |
if (argc != 2) { | |
fprintf(stderr, "Missing arguments, usage: <cmdname> " | |
"<downscaling_factor>\n" | |
"A png image is read from Stdin and written to Stdout.\n"); | |
return EXIT_FAILURE; | |
} | |
int downscaling_factor = atoi(args[1]); | |
if (downscaling_factor < 2) { | |
fprintf(stderr, "Invalid downscaling factor: %d\n", | |
downscaling_factor); | |
return EXIT_FAILURE; | |
} | |
png_image bild; | |
memset(&bild, 0, sizeof(bild)); | |
bild.version = PNG_IMAGE_VERSION; | |
EXIT_PNG(png_image_begin_read_from_stdio(&bild, stdin)) | |
int w = bild.width; | |
int h = bild.height; | |
bild.format = PNG_FORMAT_RGBA; | |
struct pixel *pixels = malloc(w * h * 4); | |
EXIT_PNG(png_image_finish_read(&bild, NULL, pixels, 0, NULL)) | |
if (w % downscaling_factor || h % downscaling_factor) { | |
fprintf(stderr, "Image size is not a multiple of the downscaling " | |
"factor; %d,%d pixels will be discarded from the right,bottom " | |
"borders\n", w % downscaling_factor, h % downscaling_factor); | |
} | |
#if !GAMMA_INCORRECT | |
get_srgb2lin_map(); | |
#endif | |
struct image origpic = {w = w, h = h, pixels = pixels}; | |
struct image *newpic = &origpic; | |
downscale_an_image(&newpic, downscaling_factor); | |
bild.width = newpic->w; | |
bild.height = newpic->h; | |
free(pixels); | |
pixels = newpic->pixels; | |
free(newpic); | |
EXIT_PNG(png_image_write_to_stdio(&bild, stdout, 0, pixels, 0, NULL)); | |
free(pixels); // redundant free to feed valgrind | |
return EXIT_SUCCESS; | |
} |
I've simplified the algorithm; I don't understand why the tileable case differs from the previous one
Heya @HybridDog -
Tried to find your email but didn't have any luck - Would you be okay with us shipping a derivative of this in VRChat? From digging around it looks like the original intent of this was to ship in GDK-PixBuf (and therefore MIT), but it never actually landed. I was able to hackily use this for mipmap generation with super promising results!
Yes, I'd be glad to see more widespread use of the algorithm.
I've also modified it for mipmap generation in Minetest: luanti-org/luanti#14144
If I remember correctly, my first intent was to have a good image downscaling algorithm for myself, so I've implemented it in C and later uploaded it to this gist.
In my opinion the algorithm also works well for SSAA, so I've also implemented it in GLSL for Minetest: luanti-org/luanti#13959
The algorithm is explained in this paper: "Perceptually Based Downscaling of Images" by A. Cengiz Öztireli and Markus Gross
Awesome! Yeah, I ended up rewriting it in C# for Unity and customizing a bunch of it (mostly to make it more appropriate for data textures). I've reached out to the original author of the paper since I saw someone mentioning a patent, but I haven't been able to find any other evidence of that?
This+SSAA might be excellent for VR purposes actually, someone should give that a shot. I forget what exactly SteamVR uses.
Would you mind slapping a MIT license precursor up at the top just to square everything away?
Just so you're aware, this does appear to be patented, held by a company called Percim: https://percim.com/#intro
I've added an SPDX-License-Identifier line to specify that it has MIT license now.
Just so you're aware, this does appear to be patented, held by a company called Percim: https://percim.com/#intro
I think it's the IMAGE PROCESSING SYSTEM FOR DOWNSCALING IMAGES USING PERCEPTUAL DOWNSCALING METHOD WO2017017584A1 patent.
I haven't read the whole patent and don't fully understand the WHAT IS CLAIMED IS section on page 29.
It first mentions downscaling an input image to a second image and then upscaling this second image such that the result has a third of the resolution of the input image, which sounds different than the algorithm I have implemented.
It also mentions a recursive adjustment of values until an image perception value matches a value within a threshold, which sounds like an iterative optimisation, which is also not part of my implementation.
Perhaps the patent covers only a subset of the possible ways to implement the downscaling.
Ultimately I ended up switching over to implementing this: https://dl.acm.org/doi/pdf/10.1145/2980179.2980239 since it's creative-commons/implementation is BSD 3-Clause.
Mi 29. Aug 13:27:29 CEST 2018