Skip to content

Instantly share code, notes, and snippets.

@jef-sure
Last active February 20, 2026 17:06
Show Gist options
  • Select an option

  • Save jef-sure/64fb93774aca34c92cd5b1807eabcddd to your computer and use it in GitHub Desktop.

Select an option

Save jef-sure/64fb93774aca34c92cd5b1807eabcddd to your computer and use it in GitHub Desktop.
/* -*- C++ -*-
* File: dht_nn_demosaic.cpp
* Copyright 2026 Anton Petrusevich
* Created: Tue Feb 10, 2026
*
* This code is licensed under one of two licenses as you choose:
*
* 1. GNU LESSER GENERAL PUBLIC LICENSE version 2.1
* (See file LICENSE.LGPL provided in LibRaw distribution archive for
* details).
*
* 2. COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
* (See file LICENSE.CDDL provided in LibRaw distribution archive for
* details).
*
*/
#include "../../internal/dmp_include.h"
#include "../../internal/dht_nn_network.h"
#include "../../internal/dht_nn_cl.h"
#include <cmath>
#include <vector>
#include <algorithm>
#include <random>
#include <string>
const float RawChannelMaximum = 65535.0f;
const float FloatChannelMaximum = 16.0f;
/* Build filename like "dht_nn_v10_p5_L4.weights" or "dht_nn_v10_p5_L4.ckpt" */
static std::string dht_nn_filename(const DHTNNNetwork &net, const char *ext)
{
char buf[128];
snprintf(buf, sizeof(buf), "dht_nn_v%u_p%d_L%d.%s",
DHTNNNetwork::VERSION, net.patch_r, net.n_hidden, ext);
return std::string(buf);
}
static inline float raw_to_float(ushort raw)
{
const static ushort RT = 8;
const static float k = 1.06066017177982f;
return (raw < 8 ? k * sqrtf(raw) : log2f(raw)) / FloatChannelMaximum;
}
static inline ushort float_to_raw(float val)
{
const static float k = 1.06066017177982f;
const static float threshold = 3.0f; // k * sqrt(8) = log2(8) = 3.0
float unnormalized = val * FloatChannelMaximum;
return unnormalized < threshold ? (ushort)((unnormalized * unnormalized) / (k * k)) : (ushort)exp2f(unnormalized);
}
static inline float calc_dist(float a, float b) { return fabsf(a - b); }
static inline float safe_inv_dist(float d) { return d > 1e-6f ? 1.f / d : 1e6f; }
struct DHTNNF
{
int nr_height, nr_width;
static const int nr_topmargin = 4, nr_leftmargin = 4;
float (*nraw)[3];
float channel_maximum[3];
float channel_minimum[3];
LibRaw &libraw;
enum
{
HVSH = 1,
HOR = 2,
VER = 4,
HORSH = HOR | HVSH,
VERSH = VER | HVSH,
DIASH = 8,
LURD = 16,
RULD = 32,
LURDSH = LURD | DIASH,
RULDSH = RULD | DIASH,
HOT = 64
};
static inline bool TresholdHot(float a, float b) throw()
{
return fabsf(a - b) > 0.375f; // 6 stops between values
}
static inline float Tg(void) throw()
{
return 0.5f; // 8 stops between values
}
static inline float T(void) throw()
{
// Diagonal edge sharpness threshold. In the original DHT algorithm (integer raw space),
// this was approximately sqrt(2) ≈ 1.4 representing a brightness ratio threshold.
// In normalized float space (0-1, log₂-encoded), 0.1 corresponds to ~1.6 stops or 3× brightness.
return 0.1f;
}
char *ndir;
float (*nn_feat)[16]; // precomputed per-pixel features: nraw[3], bayer[3], hue_h, hue_v, hue_change_h, hue_change_v, ld_ud, ld_lr, ld_lu, ld_ld, ld_ru, ld_rd
bool *nn_green_written; // true for pixels where nn_blend_green already wrote green (make_gline skips these)
inline int nr_offset(int row, int col) throw() { return (row * nr_width + col); }
int get_hv_grb(int x, int y, int kc)
{
float hv1 = nraw[nr_offset(y - 1, x)][1] - (nraw[nr_offset(y - 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
float hv2 = nraw[nr_offset(y + 1, x)][1] - (nraw[nr_offset(y + 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
float kv = calc_dist(hv1, hv2) *
calc_dist(nraw[nr_offset(y, x)][kc] * nraw[nr_offset(y, x)][kc], (nraw[nr_offset(y - 2, x)][kc] * nraw[nr_offset(y + 2, x)][kc]));
kv *= kv;
kv *= kv;
kv *= kv;
float dv = kv * calc_dist(nraw[nr_offset(y - 3, x)][1] * nraw[nr_offset(y + 3, x)][1], nraw[nr_offset(y - 1, x)][1] * nraw[nr_offset(y + 1, x)][1]);
float hh1 = nraw[nr_offset(y, x - 1)][1] - (nraw[nr_offset(y, x - 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
float hh2 = nraw[nr_offset(y, x + 1)][1] - (nraw[nr_offset(y, x + 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
float kh = calc_dist(hh1, hh2) *
calc_dist(nraw[nr_offset(y, x)][kc] * nraw[nr_offset(y, x)][kc], (nraw[nr_offset(y, x - 2)][kc] * nraw[nr_offset(y, x + 2)][kc]));
kh *= kh;
kh *= kh;
kh *= kh;
float dh = kh * calc_dist(nraw[nr_offset(y, x - 3)][1] * nraw[nr_offset(y, x + 3)][1], nraw[nr_offset(y, x - 1)][1] * nraw[nr_offset(y, x + 1)][1]);
float e = calc_dist(dh, dv);
char d = dh < dv ? (e > Tg() ? HORSH : HOR) : (e > Tg() ? VERSH : VER);
return d;
}
int get_hv_rbg(int x, int y, int hc)
{
float hv1 = nraw[nr_offset(y - 1, x)][hc ^ 2] - (nraw[nr_offset(y - 2, x)][1] + nraw[nr_offset(y, x)][1]) * 0.5f;
float hv2 = nraw[nr_offset(y + 1, x)][hc ^ 2] - (nraw[nr_offset(y + 2, x)][1] + nraw[nr_offset(y, x)][1]) * 0.5f;
float kv =
calc_dist(hv1, hv2) * calc_dist(nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1], (nraw[nr_offset(y - 2, x)][1] * nraw[nr_offset(y + 2, x)][1]));
kv *= kv;
kv *= kv;
kv *= kv;
float dv = kv * calc_dist(nraw[nr_offset(y - 3, x)][hc ^ 2] * nraw[nr_offset(y + 3, x)][hc ^ 2],
nraw[nr_offset(y - 1, x)][hc ^ 2] * nraw[nr_offset(y + 1, x)][hc ^ 2]);
float hh1 = nraw[nr_offset(y, x - 1)][hc] - (nraw[nr_offset(y, x - 2)][1] + nraw[nr_offset(y, x)][1]) * 0.5f;
float hh2 = nraw[nr_offset(y, x + 1)][hc] - (nraw[nr_offset(y, x + 2)][1] + nraw[nr_offset(y, x)][1]) * 0.5f;
float kh =
calc_dist(hh1, hh2) * calc_dist(nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1], (nraw[nr_offset(y, x - 2)][1] * nraw[nr_offset(y, x + 2)][1]));
kh *= kh;
kh *= kh;
kh *= kh;
float dh = kh * calc_dist(nraw[nr_offset(y, x - 3)][hc] * nraw[nr_offset(y, x + 3)][hc], nraw[nr_offset(y, x - 1)][hc] * nraw[nr_offset(y, x + 1)][hc]);
float e = calc_dist(dh, dv);
char d = dh < dv ? (e > Tg() ? HORSH : HOR) : (e > Tg() ? VERSH : VER);
return d;
}
/**
* @brief Determines the optimal diagonal interpolation direction for non-green (red/blue) pixel locations.
*
* This function analyzes two competing diagonal directions at a Bayer pattern location where
* a non-green color (red or blue, specified by kc) is known, to decide which diagonal path
* provides the smoothest gradient for interpolation purposes.
*
* The two diagonal directions evaluated are:
* - LURD (Left-Up to Right-Down): compares pixels at (y-1,x-1) and (y+1,x+1)
* - RULD (Right-Up to Left-Down): compares pixels at (y-1,x+1) and (y+1,x-1)
*
* Direction selection methodology:
* 1. Computes green-to-color ratios at diagonal neighbors to assess color consistency
* 2. Multiplies ratio differences by geometric mean differences of green values
* 3. Selects the direction with the SMALLER metric (smoother gradient)
* 4. If the difference between metrics exceeds T() threshold, marks edge as "sharp" (DIASH flag)
*
* The T() threshold controls diagonal edge sharpness detection. In the original DHT algorithm
* (integer raw space 0-65535), this was approximately sqrt(2) ≈ 1.4, representing a brightness
* ratio threshold. In the current normalized float space (0-1, log₂-encoded), the equivalent
* value needs recalculation based on the perceptual encoding used by raw_to_float().
*
* @param x Horizontal coordinate in the normalized raw array (includes margin offset)
* @param y Vertical coordinate in the normalized raw array (includes margin offset)
* @param kc Color channel index (0=red, 2=blue) of the known color at position (y,x)
*
* @return Direction flags: LURD or RULD for the selected diagonal, with DIASH bit set
* if the edge is sharp (|dlurd - druld| > T())
*
* @see get_diag_rbg() — Diagonal direction detection for green pixel locations
* @see get_hv_grb() — Horizontal/vertical direction detection for non-green pixels
* @see T() — Threshold function for diagonal edge sharpness (currently 0.1 = ~1.6 stops)
*/
int get_diag_grb(int x, int y, int kc)
{
float hlu = nraw[nr_offset(y - 1, x - 1)][1] / nraw[nr_offset(y - 1, x - 1)][kc];
float hrd = nraw[nr_offset(y + 1, x + 1)][1] / nraw[nr_offset(y + 1, x + 1)][kc];
float dlurd = calc_dist(hlu, hrd) *
calc_dist(nraw[nr_offset(y - 1, x - 1)][1] * nraw[nr_offset(y + 1, x + 1)][1], nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1]);
float druld = calc_dist(hlu, hrd) *
calc_dist(nraw[nr_offset(y - 1, x + 1)][1] * nraw[nr_offset(y + 1, x - 1)][1], nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1]);
float e = calc_dist(dlurd, druld);
char d = druld < dlurd ? (e > T() ? RULDSH : RULD) : (e > T() ? LURDSH : LURD);
return d;
}
int get_diag_rbg(int x, int y, int /* hc */)
{
float dlurd = calc_dist(nraw[nr_offset(y - 1, x - 1)][1] * nraw[nr_offset(y + 1, x + 1)][1], nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1]);
float druld = calc_dist(nraw[nr_offset(y - 1, x + 1)][1] * nraw[nr_offset(y + 1, x - 1)][1], nraw[nr_offset(y, x)][1] * nraw[nr_offset(y, x)][1]);
float e = calc_dist(dlurd, druld);
char d = druld < dlurd ? (e > T() ? RULDSH : RULD) : (e > T() ? LURDSH : LURD);
return d;
}
static inline float scale_over(float ec, float base)
{
float s = base * .4f;
float o = ec - base;
return base + sqrt(s * (o + s)) - s;
}
static inline float scale_under(float ec, float base)
{
float s = base * .6f;
float o = base - ec;
return base - sqrt(s * (o + s)) + s;
}
~DHTNNF();
DHTNNF(LibRaw &_libraw);
void copy_to_image();
void make_greens();
void make_diag_dirs();
void make_hv_dirs();
void refine_hv_dirs(int i, int js, const char *ndir_src = NULL);
void refine_diag_dirs(int i, int js, const char *ndir_src = NULL);
void refine_ihv_dirs(int i, const char *ndir_src = NULL);
void refine_idiag_dirs(int i, const char *ndir_src = NULL);
void illustrate_dirs();
void illustrate_dline(int i);
void make_hv_dline(int i);
void make_diag_dline(int i);
void make_gline(int i);
void make_rbdiag(int i);
void make_rbhv(int i);
void make_rb();
void hide_hots();
void restore_hots();
void build_nn_features();
void extract_patch(int i, int j, float *patch, int patch_r);
float compute_green_forced(int i, int j, int kc, bool use_ver);
void nn_refine_hv_dirs(DHTNNNetwork &net);
void nn_blend_green(DHTNNNetwork &net);
void train_nn(DHTNNNetwork &net, float lr, float (*gt_rgb)[3], int gt_w, int gt_h);
void benchmark_nn(DHTNNNetwork &net);
};
typedef float float3[3];
/*
* информация о цветах копируется во float в общем то с одной целью -- чтобы
* вместо 0 можно было писать 0.5, что больше подходит для вычисления яркостной
* разницы. причина: в целых числах разница в 1 стоп составляет ряд 8,4,2,1,0 --
* последнее число должно быть 0.5, которое непредствамио в целых числах. так же
* это изменение позволяет не думать о специальных случаях деления на ноль.
*
* альтернативное решение: умножить все данные на 2 и в младший бит внести 1.
* правда, всё равно придётся следить, чтобы при интерпретации зелёного цвета не
* получился 0 при округлении, иначе проблема при интерпретации синих и красных.
*
*/
DHTNNF::DHTNNF(LibRaw &_libraw) : libraw(_libraw), nn_feat(NULL), nn_green_written(NULL)
{
nr_height = libraw.imgdata.sizes.iheight + nr_topmargin * 2;
nr_width = libraw.imgdata.sizes.iwidth + nr_leftmargin * 2;
nraw = (float3 *)malloc(nr_height * nr_width * sizeof(float3));
int iwidth = libraw.imgdata.sizes.iwidth;
ndir = (char *)calloc(nr_height * nr_width, 1);
channel_maximum[0] = channel_maximum[1] = channel_maximum[2] = 0.0f;
channel_minimum[0] = channel_minimum[1] = channel_minimum[2] = 1.0f;
for (int i = 0; i < nr_height * nr_width; ++i)
nraw[i][0] = nraw[i][1] = nraw[i][2] = 0;
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
int col_cache[48];
for (int j = 0; j < 48; ++j)
{
int l = libraw.COLOR(i, j);
if (l == 3) l = 1; // both greens are equal
col_cache[j] = l;
}
for (int j = 0; j < iwidth; ++j)
{
int l = col_cache[j % 48];
unsigned short c = libraw.imgdata.image[i * iwidth + j][l];
float fv = raw_to_float(c);
nraw[nr_offset(i + nr_topmargin, j + nr_leftmargin)][l] = fv;
if (c != 0)
{
if (fv > channel_maximum[l]) channel_maximum[l] = fv;
if (fv < channel_minimum[l]) channel_minimum[l] = fv;
}
}
}
}
/**
* @brief Detects and temporarily replaces hot (defective) pixels in the raw image data.
*
* Hot pixels are sensor defects that produce abnormally high or low values compared
* to their surroundings. This method identifies them by checking whether a pixel's
* value is a strict local extremum (either maximum or minimum) among all 8 of its
* nearest neighbors — including both same-color pixels at distance 2 and green
* pixels at distance 1 in the Bayer pattern.
*
* For each candidate hot pixel, an average of the 8 surrounding same-color pixels
* (at distance 2 in a cross+X pattern) is computed. If the difference between the
* pixel's value and this average exceeds the hot pixel threshold (0.375 in
* log-domain, corresponding to ~6 stops of brightness difference), the pixel is
* flagged with the HOT bit in the direction map.
*
* The flagged pixel's value is then replaced with an interpolated estimate: the
* method compares vertical and horizontal gradient consistency (using products of
* neighboring color and green values) and selects the direction with the smoother
* gradient. The replacement value is the average of the two same-color neighbors
* along that direction.
*
* This two-phase approach (non-green pixels first, then green pixels) processes
* each Bayer color plane independently. The original hot pixel values are preserved
* in the raw image buffer (libraw.imgdata.image) and can be restored later via
* @ref restore_hots() after demosaicing is complete, ensuring that the hot pixel
* suppression only aids the interpolation process without permanently altering
* the original captured data.
*
* @note This method is parallelized with OpenMP over image rows.
*
* @see restore_hots() — restores original values of pixels flagged as HOT.
* @see TresholdHot() — the threshold function used to confirm hot pixel candidates.
*/
void DHTNNF::hide_hots()
{
int iwidth = libraw.imgdata.sizes.iwidth;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
int js = libraw.COLOR(i, 0) & 1;
int kc = libraw.COLOR(i, js);
/*
* js -- начальная х-координата, которая попадает мимо известного зелёного
* kc -- известный цвет в точке интерполирования
*/
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
float c = nraw[nr_offset(y, x)][kc];
if ((c > nraw[nr_offset(y, x + 2)][kc] && c > nraw[nr_offset(y, x - 2)][kc] && c > nraw[nr_offset(y - 2, x)][kc] &&
c > nraw[nr_offset(y + 2, x)][kc] && c > nraw[nr_offset(y, x + 1)][1] && c > nraw[nr_offset(y, x - 1)][1] &&
c > nraw[nr_offset(y - 1, x)][1] && c > nraw[nr_offset(y + 1, x)][1]) ||
(c < nraw[nr_offset(y, x + 2)][kc] && c < nraw[nr_offset(y, x - 2)][kc] && c < nraw[nr_offset(y - 2, x)][kc] &&
c < nraw[nr_offset(y + 2, x)][kc] && c < nraw[nr_offset(y, x + 1)][1] && c < nraw[nr_offset(y, x - 1)][1] &&
c < nraw[nr_offset(y - 1, x)][1] && c < nraw[nr_offset(y + 1, x)][1]))
{
float avg = 0;
for (int k = -2; k < 3; k += 2)
for (int m = -2; m < 3; m += 2)
if (m == 0 && k == 0) continue;
else avg += nraw[nr_offset(y + k, x + m)][kc];
avg /= 8;
if (TresholdHot(c, avg))
{
ndir[nr_offset(y, x)] |= HOT;
float dv =
calc_dist(nraw[nr_offset(y - 2, x)][kc] * nraw[nr_offset(y - 1, x)][1], nraw[nr_offset(y + 2, x)][kc] * nraw[nr_offset(y + 1, x)][1]);
float dh =
calc_dist(nraw[nr_offset(y, x - 2)][kc] * nraw[nr_offset(y, x - 1)][1], nraw[nr_offset(y, x + 2)][kc] * nraw[nr_offset(y, x + 1)][1]);
if (dv > dh) nraw[nr_offset(y, x)][kc] = (nraw[nr_offset(y, x + 2)][kc] + nraw[nr_offset(y, x - 2)][kc]) / 2;
else nraw[nr_offset(y, x)][kc] = (nraw[nr_offset(y - 2, x)][kc] + nraw[nr_offset(y + 2, x)][kc]) / 2;
}
}
}
for (int j = js ^ 1; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
float c = nraw[nr_offset(y, x)][1];
if ((c > nraw[nr_offset(y, x + 2)][1] && c > nraw[nr_offset(y, x - 2)][1] && c > nraw[nr_offset(y - 2, x)][1] && c > nraw[nr_offset(y + 2, x)][1] &&
c > nraw[nr_offset(y, x + 1)][kc] && c > nraw[nr_offset(y, x - 1)][kc] && c > nraw[nr_offset(y - 1, x)][kc ^ 2] &&
c > nraw[nr_offset(y + 1, x)][kc ^ 2]) ||
(c < nraw[nr_offset(y, x + 2)][1] && c < nraw[nr_offset(y, x - 2)][1] && c < nraw[nr_offset(y - 2, x)][1] && c < nraw[nr_offset(y + 2, x)][1] &&
c < nraw[nr_offset(y, x + 1)][kc] && c < nraw[nr_offset(y, x - 1)][kc] && c < nraw[nr_offset(y - 1, x)][kc ^ 2] &&
c < nraw[nr_offset(y + 1, x)][kc ^ 2]))
{
float avg = 0;
for (int k = -2; k < 3; k += 2)
for (int m = -2; m < 3; m += 2)
if (k == 0 && m == 0) continue;
else avg += nraw[nr_offset(y + k, x + m)][1];
avg /= 8;
if (TresholdHot(c, avg))
{
ndir[nr_offset(y, x)] |= HOT;
float dv = calc_dist(nraw[nr_offset(y - 2, x)][1] * nraw[nr_offset(y - 1, x)][kc ^ 2],
nraw[nr_offset(y + 2, x)][1] * nraw[nr_offset(y + 1, x)][kc ^ 2]);
float dh =
calc_dist(nraw[nr_offset(y, x - 2)][1] * nraw[nr_offset(y, x - 1)][kc], nraw[nr_offset(y, x + 2)][1] * nraw[nr_offset(y, x + 1)][kc]);
if (dv > dh) nraw[nr_offset(y, x)][1] = (nraw[nr_offset(y, x + 2)][1] + nraw[nr_offset(y, x - 2)][1]) / 2;
else nraw[nr_offset(y, x)][1] = (nraw[nr_offset(y - 2, x)][1] + nraw[nr_offset(y + 2, x)][1]) / 2;
}
}
}
}
}
void DHTNNF::restore_hots()
{
int iwidth = libraw.imgdata.sizes.iwidth;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided) collapse(2)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
for (int j = 0; j < iwidth; ++j)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (ndir[nr_offset(y, x)] & HOT)
{
int l = libraw.COLOR(i, j);
if (l == 3) l = 1;
nraw[nr_offset(i + nr_topmargin, j + nr_leftmargin)][l] = raw_to_float(libraw.imgdata.image[i * iwidth + j][l]);
}
}
}
}
void DHTNNF::make_diag_dirs()
{
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
make_diag_dline(i);
}
// #if defined(LIBRAW_USE_OPENMP)
// #pragma omp parallel for schedule(guided)
// #endif
// for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i) {
// refine_diag_dirs(i, i & 1);
// }
// #if defined(LIBRAW_USE_OPENMP)
// #pragma omp parallel for schedule(guided)
// #endif
// for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i) {
// refine_diag_dirs(i, (i & 1) ^ 1);
// }
char *snap = (char *)malloc(nr_height * nr_width);
memcpy(snap, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
refine_idiag_dirs(i, snap);
}
free(snap);
}
void DHTNNF::make_hv_dirs()
{
int iheight = libraw.imgdata.sizes.iheight;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; ++i)
{
make_hv_dline(i);
}
/* Refinement passes use a snapshot for reads to avoid data races
* when parallelized: each thread reads neighbor directions from the
* snapshot while writing updated directions to ndir (Jacobi iteration). */
char *snap = (char *)malloc(nr_height * nr_width);
memcpy(snap, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; ++i)
{
refine_hv_dirs(i, i & 1, snap);
}
memcpy(snap, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; ++i)
{
refine_hv_dirs(i, (i & 1) ^ 1, snap);
}
memcpy(snap, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; ++i)
{
refine_ihv_dirs(i, snap);
}
free(snap);
}
void DHTNNF::refine_hv_dirs(int i, int js, const char *ndir_src)
{
const char *nd = ndir_src ? ndir_src : ndir;
int iwidth = libraw.imgdata.sizes.iwidth;
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (nd[nr_offset(y, x)] & HVSH) continue;
int nv = (nd[nr_offset(y - 1, x)] & VER) + (nd[nr_offset(y + 1, x)] & VER) + (nd[nr_offset(y, x - 1)] & VER) + (nd[nr_offset(y, x + 1)] & VER);
int nh = (nd[nr_offset(y - 1, x)] & HOR) + (nd[nr_offset(y + 1, x)] & HOR) + (nd[nr_offset(y, x - 1)] & HOR) + (nd[nr_offset(y, x + 1)] & HOR);
bool codir = (nd[nr_offset(y, x)] & VER) ? ((nd[nr_offset(y - 1, x)] & VER) || (nd[nr_offset(y + 1, x)] & VER))
: ((nd[nr_offset(y, x - 1)] & HOR) || (nd[nr_offset(y, x + 1)] & HOR));
nv /= VER;
nh /= HOR;
if ((nd[nr_offset(y, x)] & VER) && (nh > 2 && !codir))
{
ndir[nr_offset(y, x)] &= ~VER;
ndir[nr_offset(y, x)] |= HOR;
}
if ((nd[nr_offset(y, x)] & HOR) && (nv > 2 && !codir))
{
ndir[nr_offset(y, x)] &= ~HOR;
ndir[nr_offset(y, x)] |= VER;
}
}
}
void DHTNNF::refine_ihv_dirs(int i, const char *ndir_src)
{
const char *nd = ndir_src ? ndir_src : ndir;
int iwidth = libraw.imgdata.sizes.iwidth;
for (int j = 0; j < iwidth; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (nd[nr_offset(y, x)] & HVSH) continue;
int nv = (nd[nr_offset(y - 1, x)] & VER) + (nd[nr_offset(y + 1, x)] & VER) + (nd[nr_offset(y, x - 1)] & VER) + (nd[nr_offset(y, x + 1)] & VER);
int nh = (nd[nr_offset(y - 1, x)] & HOR) + (nd[nr_offset(y + 1, x)] & HOR) + (nd[nr_offset(y, x - 1)] & HOR) + (nd[nr_offset(y, x + 1)] & HOR);
nv /= VER;
nh /= HOR;
if ((nd[nr_offset(y, x)] & VER) && nh > 3)
{
ndir[nr_offset(y, x)] &= ~VER;
ndir[nr_offset(y, x)] |= HOR;
}
if ((nd[nr_offset(y, x)] & HOR) && nv > 3)
{
ndir[nr_offset(y, x)] &= ~HOR;
ndir[nr_offset(y, x)] |= VER;
}
}
}
/*
* для каждой точки интерполирования, которая не помечена как горизонтальная или вертикальная, вычисляем ориентированную градиентную метрику в обоих
* направлениях и сравниваем их. если метрика в одном направлении значительно больше, чем в другом, то помечаем точку как ориентированную в этом направлении.
* если же метрики примерно равны, то помечаем точку как ориентированную в обоих направлениях.
*
* для вычисления градиентной метрики используется произведение соседних цветовых и зелёных значений, что позволяет учитывать как яркостные, так и цветовые
* изменения. чем больше разница между метриками в двух направлениях, тем более уверенно можно определить ориентацию градиента.
*
* эта информация о направлении градиента будет использоваться на этапе интерполяции для выбора оптимального способа восстановления недостающих цветовых
* каналов, что поможет сохранить детали и уменьшить артефакты при демозаике.
*/
void DHTNNF::make_hv_dline(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int js = libraw.COLOR(i, 0) & 1;
int kc = libraw.COLOR(i, js);
/*
* js -- начальная х-координата, которая попадает мимо известного зелёного
* kc -- известный цвет в точке интерполирования
*/
for (int j = 0; j < iwidth; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
char d = 0;
if ((j & 1) == js)
{
d = get_hv_grb(x, y, kc);
}
else
{
d = get_hv_rbg(x, y, kc);
}
ndir[nr_offset(y, x)] |= d;
}
}
void DHTNNF::make_diag_dline(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int js = libraw.COLOR(i, 0) & 1;
int kc = libraw.COLOR(i, js);
/*
* js -- начальная х-координата, которая попадает мимо известного зелёного
* kc -- известный цвет в точке интерполирования
*/
for (int j = 0; j < iwidth; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
char d = 0;
if ((j & 1) == js)
{
d = get_diag_grb(x, y, kc);
}
else
{
d = get_diag_rbg(x, y, kc);
}
ndir[nr_offset(y, x)] |= d;
}
}
void DHTNNF::refine_diag_dirs(int i, int js, const char *ndir_src)
{
const char *nd = ndir_src ? ndir_src : ndir;
int iwidth = libraw.imgdata.sizes.iwidth;
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (nd[nr_offset(y, x)] & DIASH) continue;
int nv = (nd[nr_offset(y - 1, x)] & LURD) + (nd[nr_offset(y + 1, x)] & LURD) + (nd[nr_offset(y, x - 1)] & LURD) + (nd[nr_offset(y, x + 1)] & LURD) +
(nd[nr_offset(y - 1, x - 1)] & LURD) + (nd[nr_offset(y - 1, x + 1)] & LURD) + (nd[nr_offset(y + 1, x - 1)] & LURD) +
(nd[nr_offset(y + 1, x + 1)] & LURD);
int nh = (nd[nr_offset(y - 1, x)] & RULD) + (nd[nr_offset(y + 1, x)] & RULD) + (nd[nr_offset(y, x - 1)] & RULD) + (nd[nr_offset(y, x + 1)] & RULD) +
(nd[nr_offset(y - 1, x - 1)] & RULD) + (nd[nr_offset(y - 1, x + 1)] & RULD) + (nd[nr_offset(y + 1, x - 1)] & RULD) +
(nd[nr_offset(y + 1, x + 1)] & RULD);
bool codir = (nd[nr_offset(y, x)] & LURD) ? ((nd[nr_offset(y - 1, x - 1)] & LURD) || (nd[nr_offset(y + 1, x + 1)] & LURD))
: ((nd[nr_offset(y - 1, x + 1)] & RULD) || (nd[nr_offset(y + 1, x - 1)] & RULD));
nv /= LURD;
nh /= RULD;
if ((nd[nr_offset(y, x)] & LURD) && (nh > 4 && !codir))
{
ndir[nr_offset(y, x)] &= ~LURD;
ndir[nr_offset(y, x)] |= RULD;
}
if ((nd[nr_offset(y, x)] & RULD) && (nv > 4 && !codir))
{
ndir[nr_offset(y, x)] &= ~RULD;
ndir[nr_offset(y, x)] |= LURD;
}
}
}
void DHTNNF::refine_idiag_dirs(int i, const char *ndir_src)
{
const char *nd = ndir_src ? ndir_src : ndir;
int iwidth = libraw.imgdata.sizes.iwidth;
for (int j = 0; j < iwidth; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (nd[nr_offset(y, x)] & DIASH) continue;
int nv = (nd[nr_offset(y - 1, x)] & LURD) + (nd[nr_offset(y + 1, x)] & LURD) + (nd[nr_offset(y, x - 1)] & LURD) + (nd[nr_offset(y, x + 1)] & LURD) +
(nd[nr_offset(y - 1, x - 1)] & LURD) + (nd[nr_offset(y - 1, x + 1)] & LURD) + (nd[nr_offset(y + 1, x - 1)] & LURD) +
(nd[nr_offset(y + 1, x + 1)] & LURD);
int nh = (nd[nr_offset(y - 1, x)] & RULD) + (nd[nr_offset(y + 1, x)] & RULD) + (nd[nr_offset(y, x - 1)] & RULD) + (nd[nr_offset(y, x + 1)] & RULD) +
(nd[nr_offset(y - 1, x - 1)] & RULD) + (nd[nr_offset(y - 1, x + 1)] & RULD) + (nd[nr_offset(y + 1, x - 1)] & RULD) +
(nd[nr_offset(y + 1, x + 1)] & RULD);
nv /= LURD;
nh /= RULD;
if ((nd[nr_offset(y, x)] & LURD) && nh > 7)
{
ndir[nr_offset(y, x)] &= ~LURD;
ndir[nr_offset(y, x)] |= RULD;
}
if ((nd[nr_offset(y, x)] & RULD) && nv > 7)
{
ndir[nr_offset(y, x)] &= ~RULD;
ndir[nr_offset(y, x)] |= LURD;
}
}
}
/*
* вычисление недостающих зелёных точек.
*/
void DHTNNF::make_greens()
{
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
make_gline(i);
}
}
void DHTNNF::make_gline(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int js = libraw.COLOR(i, 0) & 1;
int kc = libraw.COLOR(i, js);
/*
* js -- начальная х-координата, которая попадает мимо известного зелёного
* kc -- известный цвет в точке интерполирования
*/
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
/* Skip pixels where nn_blend_green already wrote the blended green */
if (nn_green_written && nn_green_written[i * iwidth + j]) continue;
int dx, dy, dx2, dy2;
float d1, d2;
if (ndir[nr_offset(y, x)] & VER)
{
dx = dx2 = 0;
dy = -1;
dy2 = 1;
// log-space: difference = green − geometric mean of colors
d1 = nraw[nr_offset(y - 1, x)][1] - (nraw[nr_offset(y - 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
d2 = nraw[nr_offset(y + 1, x)][1] - (nraw[nr_offset(y + 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
}
else
{
dy = dy2 = 0;
dx = 1;
dx2 = -1;
d1 = nraw[nr_offset(y, x + 1)][1] - (nraw[nr_offset(y, x + 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
d2 = nraw[nr_offset(y, x - 1)][1] - (nraw[nr_offset(y, x - 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
}
float b1 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][kc], nraw[nr_offset(y + dy * 2, x + dx * 2)][kc]));
float b2 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][kc], nraw[nr_offset(y + dy2 * 2, x + dx2 * 2)][kc]));
b1 *= b1;
b2 *= b2;
// log-space: sum replaces product
float eg = nraw[nr_offset(y, x)][kc] + (b1 * d1 + b2 * d2) / (b1 + b2);
float min, max;
min = MIN(nraw[nr_offset(y + dy, x + dx)][1], nraw[nr_offset(y + dy2, x + dx2)][1]);
max = MAX(nraw[nr_offset(y + dy, x + dx)][1], nraw[nr_offset(y + dy2, x + dx2)][1]);
// log-space: ×1.2 → +log₂(1.2)/16, ÷1.2 → −log₂(1.2)/16
static const float margin = log2f(1.2f) / FloatChannelMaximum;
min -= margin;
max += margin;
if (eg < min) eg = min;
else if (eg > max) eg = max;
if (eg > channel_maximum[1]) eg = channel_maximum[1];
else if (eg < channel_minimum[1]) eg = channel_minimum[1];
nraw[nr_offset(y, x)][1] = eg;
}
}
/*
* отладочная функция
*/
void DHTNNF::illustrate_dirs()
{
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
illustrate_dline(i);
}
}
void DHTNNF::illustrate_dline(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
for (int j = 0; j < iwidth; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
nraw[nr_offset(y, x)][0] = nraw[nr_offset(y, x)][1] = nraw[nr_offset(y, x)][2] = 0.5;
int l = ndir[nr_offset(y, x)] & 8;
// l >>= 3; // WTF?
l = 1;
if (ndir[nr_offset(y, x)] & HOT) nraw[nr_offset(y, x)][0] = l * channel_maximum[0] / 4.f + channel_maximum[0] / 4.f;
else nraw[nr_offset(y, x)][2] = l * channel_maximum[2] / 4.f + channel_maximum[2] / 4.f;
}
}
/*
* интерполяция красных и синих.
*
* сначала интерполируются недостающие цвета, по диагональным направлениям от
* которых находятся известные, затем ситуация сводится к тому как
* интерполировались зелёные.
*/
void DHTNNF::make_rbdiag(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int js = libraw.COLOR(i, 0) & 1;
int uc = libraw.COLOR(i, js);
int cl = uc ^ 2;
/*
* js -- начальная х-координата, которая попадает на уже интерполированный
* зелёный al -- известный цвет (кроме зелёного) в точке интерполирования cl
* -- неизвестный цвет
*/
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
int dx, dy, dx2, dy2;
if (ndir[nr_offset(y, x)] & LURD)
{
dx = -1;
dx2 = 1;
dy = -1;
dy2 = 1;
}
else
{
dx = -1;
dx2 = 1;
dy = 1;
dy2 = -1;
}
float g1 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][1], nraw[nr_offset(y + dy, x + dx)][1]));
float g2 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][1], nraw[nr_offset(y + dy2, x + dx2)][1]));
g1 *= g1 * g1;
g2 *= g2 * g2;
// log-space: C/G ratio → difference, G×ratio → sum
float d1 = nraw[nr_offset(y + dy, x + dx)][cl] - nraw[nr_offset(y + dy, x + dx)][1];
float d2 = nraw[nr_offset(y + dy2, x + dx2)][cl] - nraw[nr_offset(y + dy2, x + dx2)][1];
float eg = nraw[nr_offset(y, x)][1] + (g1 * d1 + g2 * d2) / (g1 + g2);
float min, max;
min = MIN(nraw[nr_offset(y + dy, x + dx)][cl], nraw[nr_offset(y + dy2, x + dx2)][cl]);
max = MAX(nraw[nr_offset(y + dy, x + dx)][cl], nraw[nr_offset(y + dy2, x + dx2)][cl]);
static const float margin = log2f(1.2f) / FloatChannelMaximum;
min -= margin;
max += margin;
if (eg < min) eg = min;
else if (eg > max) eg = max;
if (eg > channel_maximum[cl]) eg = channel_maximum[cl];
else if (eg < channel_minimum[cl]) eg = channel_minimum[cl];
eg = std::max(0.f, std::min(1.f, eg));
nraw[nr_offset(y, x)][cl] = eg;
}
}
/*
* интерполяция красных и синих в точках где был известен только зелёный,
* направления горизонтальные или вертикальные
*/
void DHTNNF::make_rbhv(int i)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int js = (libraw.COLOR(i, 0) & 1) ^ 1;
for (int j = js; j < iwidth; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
/*
* поскольку сверху-снизу и справа-слева уже есть все необходимые красные и
* синие, то можно выбрать наилучшее направление исходя из информации по
* обоим цветам.
*/
int dx, dy, dx2, dy2;
if (ndir[nr_offset(y, x)] & VER)
{
dx = dx2 = 0;
dy = -1;
dy2 = 1;
}
else
{
dy = dy2 = 0;
dx = 1;
dx2 = -1;
}
float g1 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][1], nraw[nr_offset(y + dy, x + dx)][1]));
float g2 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][1], nraw[nr_offset(y + dy2, x + dx2)][1]));
g1 *= g1;
g2 *= g2;
// log-space: R/G and B/G ratios → differences, G×ratio → sum
float d1_r = nraw[nr_offset(y + dy, x + dx)][0] - nraw[nr_offset(y + dy, x + dx)][1];
float d2_r = nraw[nr_offset(y + dy2, x + dx2)][0] - nraw[nr_offset(y + dy2, x + dx2)][1];
float d1_b = nraw[nr_offset(y + dy, x + dx)][2] - nraw[nr_offset(y + dy, x + dx)][1];
float d2_b = nraw[nr_offset(y + dy2, x + dx2)][2] - nraw[nr_offset(y + dy2, x + dx2)][1];
float eg_r = nraw[nr_offset(y, x)][1] + (g1 * d1_r + g2 * d2_r) / (g1 + g2);
float eg_b = nraw[nr_offset(y, x)][1] + (g1 * d1_b + g2 * d2_b) / (g1 + g2);
float min_r, max_r;
min_r = MIN(nraw[nr_offset(y + dy, x + dx)][0], nraw[nr_offset(y + dy2, x + dx2)][0]);
max_r = MAX(nraw[nr_offset(y + dy, x + dx)][0], nraw[nr_offset(y + dy2, x + dx2)][0]);
float min_b, max_b;
min_b = MIN(nraw[nr_offset(y + dy, x + dx)][2], nraw[nr_offset(y + dy2, x + dx2)][2]);
max_b = MAX(nraw[nr_offset(y + dy, x + dx)][2], nraw[nr_offset(y + dy2, x + dx2)][2]);
static const float margin = log2f(1.2f) / FloatChannelMaximum;
min_r -= margin;
max_r += margin;
min_b -= margin;
max_b += margin;
if (eg_r < min_r) eg_r = min_r;
else if (eg_r > max_r) eg_r = max_r;
if (eg_b < min_b) eg_b = min_b;
else if (eg_b > max_b) eg_b = max_b;
if (eg_r > channel_maximum[0]) eg_r = channel_maximum[0];
else if (eg_r < channel_minimum[0]) eg_r = channel_minimum[0];
if (eg_b > channel_maximum[2]) eg_b = channel_maximum[2];
else if (eg_b < channel_minimum[2]) eg_b = channel_minimum[2];
eg_r = std::max(0.f, std::min(1.f, eg_r));
eg_b = std::max(0.f, std::min(1.f, eg_b));
nraw[nr_offset(y, x)][0] = eg_r;
nraw[nr_offset(y, x)][2] = eg_b;
}
}
void DHTNNF::make_rb()
{
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
make_rbdiag(i);
}
/* Implicit barrier at end of parallel for ensures make_rbdiag
* completes before make_rbhv starts (which reads its output). */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
make_rbhv(i);
}
}
/*
* перенос изображения в выходной массив
*/
void DHTNNF::copy_to_image()
{
int iwidth = libraw.imgdata.sizes.iwidth;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided) collapse(2)
#endif
for (int i = 0; i < libraw.imgdata.sizes.iheight; ++i)
{
for (int j = 0; j < iwidth; ++j)
{
int off = nr_offset(i + nr_topmargin, j + nr_leftmargin);
float r = nraw[off][0];
float g = nraw[off][1];
float b = nraw[off][2];
if (!std::isfinite(g)) g = 0.0f;
if (!std::isfinite(r)) r = g;
if (!std::isfinite(b)) b = g;
if (r < 0.0f) r = 0.0f;
if (g < 0.0f) g = 0.0f;
if (b < 0.0f) b = 0.0f;
if (r > 1.0f) r = 1.0f;
if (g > 1.0f) g = 1.0f;
if (b > 1.0f) b = 1.0f;
libraw.imgdata.image[i * iwidth + j][0] = float_to_raw(r);
libraw.imgdata.image[i * iwidth + j][2] = float_to_raw(b);
libraw.imgdata.image[i * iwidth + j][1] = libraw.imgdata.image[i * iwidth + j][3] = float_to_raw(g);
}
}
}
DHTNNF::~DHTNNF()
{
free(nraw);
free(ndir);
free(nn_feat);
free(nn_green_written);
}
/*
* Precompute per-pixel 13-feature vectors for the entire image.
* nn_feat[off][0..2] = nraw[off][0..2]
* nn_feat[off][3..5] = one-hot Bayer channel (R, G, B)
* nn_feat[off][6] = hue_h: horizontal hue at center G-mean(KC)
* nn_feat[off][7] = hue_v: vertical hue at center G-mean(KC)
* nn_feat[off][8] = hue_change_h: |hue(left)-hue(right)| horizontal hue consistency
* nn_feat[off][9] = hue_change_v: |hue(top)-hue(bottom)| vertical hue consistency
* nn_feat[off][10] = ld_ud: hue_aprox(g_u,g_d, c0,c_u2,c_d2) — cardinal vertical
* nn_feat[off][11] = ld_lr: hue_aprox(g_l,g_r, c0,c_l2,c_r2) — cardinal horizontal
* nn_feat[off][12] = ld_lu: hue_aprox(g_l,g_u, c0,c_l2,c_u2) — diagonal left-up
* nn_feat[off][13] = ld_ld: hue_aprox(g_l,g_d, c0,c_l2,c_d2) — diagonal left-down
* nn_feat[off][14] = ld_ru: hue_aprox(g_r,g_u, c0,c_r2,c_u2) — diagonal right-up
* nn_feat[off][15] = ld_rd: hue_aprox(g_r,g_d, c0,c_r2,c_d2) — diagonal right-down
*
* Must be called (or re-called) whenever ndir changes before NN inference/training.
*/
void DHTNNF::build_nn_features()
{
int npix = nr_height * nr_width;
int iwidth = libraw.imgdata.sizes.iwidth;
int iheight = libraw.imgdata.sizes.iheight;
if (!nn_feat) nn_feat = (float (*)[16])malloc(npix * 16 * sizeof(float));
/* Zero out (border pixels keep zeros) */
memset(nn_feat, 0, npix * 16 * sizeof(float));
/* Fill the active image area */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; i++)
{
/* Bayer pattern repeats every 2 rows and columns;
* precompute a row-local cache of COLOR for this row. */
int col_cache[48];
for (int c = 0; c < 48; c++)
{
int ch = libraw.COLOR(i, c);
if (ch == 3) ch = 1;
col_cache[c] = ch;
}
for (int j = 0; j < iwidth; j++)
{
int off = nr_offset(i + nr_topmargin, j + nr_leftmargin);
float *f = nn_feat[off];
f[0] = nraw[off][0];
f[1] = nraw[off][1];
f[2] = nraw[off][2];
int ch = col_cache[j % 48];
f[3] = (ch == 0) ? 1.0f : 0.0f;
f[4] = (ch == 1) ? 1.0f : 0.0f;
f[5] = (ch == 2) ? 1.0f : 0.0f;
/* Hue features in log-domain: G - mean(KC) along each direction.
* In log space, subtraction = ratio, and (a+b)/2 ≈ log(geomean).
* For green pixels: G_center - (ng_left + ng_right) / 2
* For non-green: (G_left + G_right) / 2 - KC_center */
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (ch == 1) {
/* Green pixel: non-green neighbors at distance 1 */
float ng_l = nraw[nr_offset(y, x - 1)][0] + nraw[nr_offset(y, x - 1)][2];
float ng_r = nraw[nr_offset(y, x + 1)][0] + nraw[nr_offset(y, x + 1)][2];
float ng_u = nraw[nr_offset(y - 1, x)][0] + nraw[nr_offset(y - 1, x)][2];
float ng_d = nraw[nr_offset(y + 1, x)][0] + nraw[nr_offset(y + 1, x)][2];
f[6] = nraw[off][1] - (ng_l + ng_r) * 0.5f;
f[7] = nraw[off][1] - (ng_u + ng_d) * 0.5f;
} else {
/* Non-green pixel: green neighbors at distance 1, known color at center */
f[6] = (nraw[nr_offset(y, x - 1)][1] + nraw[nr_offset(y, x + 1)][1]) * 0.5f
- nraw[off][ch];
f[7] = (nraw[nr_offset(y - 1, x)][1] + nraw[nr_offset(y + 1, x)][1]) * 0.5f
- nraw[off][ch];
}
/* Hue CHANGE: |hue(neighbor1) - hue(neighbor2)| along each axis.
* Measures hue consistency in horizontal / vertical directions.
* Same calculation as calc_dist(hh1,hh2) / calc_dist(hv1,hv2) in get_hv_grb/rbg. */
if (ch == 1) {
/* Green center: non-green at ±1, green at ±2 (same parity as center) */
float kc_l = nraw[nr_offset(y, x - 1)][0] + nraw[nr_offset(y, x - 1)][2];
float kc_r = nraw[nr_offset(y, x + 1)][0] + nraw[nr_offset(y, x + 1)][2];
float hh1 = kc_l - (nraw[nr_offset(y, x - 2)][1] + nraw[off][1]) * 0.5f;
float hh2 = kc_r - (nraw[off][1] + nraw[nr_offset(y, x + 2)][1]) * 0.5f;
f[8] = fabsf(hh1 - hh2);
float kc_u = nraw[nr_offset(y - 1, x)][0] + nraw[nr_offset(y - 1, x)][2];
float kc_d = nraw[nr_offset(y + 1, x)][0] + nraw[nr_offset(y + 1, x)][2];
float hv1 = kc_u - (nraw[nr_offset(y - 2, x)][1] + nraw[off][1]) * 0.5f;
float hv2 = kc_d - (nraw[off][1] + nraw[nr_offset(y + 2, x)][1]) * 0.5f;
f[9] = fabsf(hv1 - hv2);
} else {
/* Non-green center: green at ±1, same ch at ±2 */
float hh1 = nraw[nr_offset(y, x - 1)][1] - (nraw[nr_offset(y, x - 2)][ch] + nraw[off][ch]) * 0.5f;
float hh2 = nraw[nr_offset(y, x + 1)][1] - (nraw[off][ch] + nraw[nr_offset(y, x + 2)][ch]) * 0.5f;
f[8] = fabsf(hh1 - hh2);
float hv1 = nraw[nr_offset(y - 1, x)][1] - (nraw[nr_offset(y - 2, x)][ch] + nraw[off][ch]) * 0.5f;
float hv2 = nraw[nr_offset(y + 1, x)][1] - (nraw[off][ch] + nraw[nr_offset(y + 2, x)][ch]) * 0.5f;
f[9] = fabsf(hv1 - hv2);
}
/* Features f[10..15]: all 6 LD (lightness-directed) hue_aprox directions.
* hue_aprox(G1,G2,C0,C1,C2) = (G1+G2)/2 - (C1+C2-2*C0)/4
* G1,G2 = green at ±1 neighbors along the two chosen directions.
* C1,C2 = same-channel values at ±2 in those directions.
* Cardinal: UD, LR. Diagonal: LU, LD, RU, RD. */
{
float c0 = nraw[off][ch];
float g_u = nraw[nr_offset(y - 1, x)][1];
float g_d = nraw[nr_offset(y + 1, x)][1];
float g_l = nraw[nr_offset(y, x - 1)][1];
float g_r = nraw[nr_offset(y, x + 1)][1];
float c_u2 = nraw[nr_offset(y - 2, x)][ch];
float c_d2 = nraw[nr_offset(y + 2, x)][ch];
float c_l2 = nraw[nr_offset(y, x - 2)][ch];
float c_r2 = nraw[nr_offset(y, x + 2)][ch];
f[10] = (g_u + g_d) * 0.5f - (c_u2 + c_d2 - 2.0f * c0) * 0.25f; // ld_ud
f[11] = (g_l + g_r) * 0.5f - (c_l2 + c_r2 - 2.0f * c0) * 0.25f; // ld_lr
f[12] = (g_l + g_u) * 0.5f - (c_l2 + c_u2 - 2.0f * c0) * 0.25f; // ld_lu
f[13] = (g_l + g_d) * 0.5f - (c_l2 + c_d2 - 2.0f * c0) * 0.25f; // ld_ld
f[14] = (g_r + g_u) * 0.5f - (c_r2 + c_u2 - 2.0f * c0) * 0.25f; // ld_ru
f[15] = (g_r + g_d) * 0.5f - (c_r2 + c_d2 - 2.0f * c0) * 0.25f; // ld_rd
}
}
}
}
/*
* Extract a patch of features centered at image pixel (i,j).
* Gathers from the precomputed nn_feat array.
* patch_r: patch radius (e.g. 3 -> 7x7 patch).
* Output: patch[(2*patch_r+1)^2 * 10] in row-major order.
*/
void DHTNNF::extract_patch(int i, int j, float *patch, int patch_r)
{
int idx = 0;
for (int dy = -patch_r; dy <= patch_r; dy++)
{
int row = (i + dy) + nr_topmargin;
for (int dx = -patch_r; dx <= patch_r; dx++)
{
int off = row * nr_width + (j + dx) + nr_leftmargin;
memcpy(patch + idx, nn_feat[off], DHTNNNetwork::FEAT_PER_PX * sizeof(float));
idx += DHTNNNetwork::FEAT_PER_PX;
}
}
}
/*
* Compute interpolated green at non-green pixel (i,j) with a forced direction.
* Duplicates the make_gline formula but with explicit direction control.
* kc: known non-green channel at this pixel (0=R, 2=B).
* use_ver: true=interpolate vertically, false=horizontally.
*/
float DHTNNF::compute_green_forced(int i, int j, int kc, bool use_ver)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
int dx, dy, dx2, dy2;
float d1, d2;
if (use_ver)
{
dx = dx2 = 0;
dy = -1;
dy2 = 1;
d1 = nraw[nr_offset(y - 1, x)][1] - (nraw[nr_offset(y - 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
d2 = nraw[nr_offset(y + 1, x)][1] - (nraw[nr_offset(y + 2, x)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
}
else
{
dy = dy2 = 0;
dx = 1;
dx2 = -1;
d1 = nraw[nr_offset(y, x + 1)][1] - (nraw[nr_offset(y, x + 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
d2 = nraw[nr_offset(y, x - 1)][1] - (nraw[nr_offset(y, x - 2)][kc] + nraw[nr_offset(y, x)][kc]) * 0.5f;
}
float b1 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][kc], nraw[nr_offset(y + dy * 2, x + dx * 2)][kc]));
float b2 = safe_inv_dist(calc_dist(nraw[nr_offset(y, x)][kc], nraw[nr_offset(y + dy2 * 2, x + dx2 * 2)][kc]));
b1 *= b1;
b2 *= b2;
float eg = nraw[nr_offset(y, x)][kc] + (b1 * d1 + b2 * d2) / (b1 + b2);
float min, max;
min = MIN(nraw[nr_offset(y + dy, x + dx)][1], nraw[nr_offset(y + dy2, x + dx2)][1]);
max = MAX(nraw[nr_offset(y + dy, x + dx)][1], nraw[nr_offset(y + dy2, x + dx2)][1]);
static const float margin = log2f(1.2f) / FloatChannelMaximum;
min -= margin;
max += margin;
if (eg < min) eg = min;
else if (eg > max) eg = max;
if (eg > channel_maximum[1]) eg = channel_maximum[1];
else if (eg < channel_minimum[1]) eg = channel_minimum[1];
return eg;
}
/*
* Use neural network to refine HV directions (direction flags only, no green write).
* GPU path. CPU inference is handled by nn_blend_green.
*/
void DHTNNF::nn_refine_hv_dirs(DHTNNNetwork &net)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int iheight = libraw.imgdata.sizes.iheight;
int PR = net.patch_r;
#ifdef USE_OPENCL
{
DHTNNCLContext &cl = get_cl_context();
float *gpu_green = (float *)malloc((size_t)iwidth * iheight * sizeof(float));
if (gpu_green && cl.run_inference(nraw, ndir, gpu_green, nr_height, nr_width, iwidth, iheight, nr_topmargin, nr_leftmargin, (unsigned int)libraw.imgdata.idata.filters, channel_minimum[1], channel_maximum[1], net))
{
free(gpu_green); /* nn_refine_hv_dirs only needs ndir, discard green */
fprintf(stderr, "DHT-NN: GPU inference done (%dx%d)\n", iwidth, iheight);
return;
}
free(gpu_green);
fprintf(stderr, "DHT-NN: OpenCL failed, falling back to CPU\n");
}
#endif
/* Precompute per-pixel feature vectors for fast patch extraction */
build_nn_features();
/* CPU fallback: OpenMP-parallel per-pixel forward pass */
char *new_ndir = (char *)malloc(nr_height * nr_width);
memcpy(new_ndir, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = PR; i < iheight - PR; i++)
{
for (int j = PR; j < iwidth - PR; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (ndir[nr_offset(y, x)] & HVSH) continue;
float *patch = (float *)alloca(net.input_size * sizeof(float));
extract_patch(i, j, patch, net.patch_r);
float output[DHTNNNetwork::OUTPUT_SIZE];
int prediction = net.forward(patch, output);
new_ndir[nr_offset(y, x)] &= ~(HOR | VER);
new_ndir[nr_offset(y, x)] |= (prediction == 0) ? HOR : VER;
}
}
memcpy(ndir, new_ndir, nr_height * nr_width);
free(new_ndir);
}
/*
* NN inference with soft H/V blend:
*
* float2 hv = softmax(out[0], out[1]);
* float H_weight = hv.x, V_weight = hv.y;
* float S = sigmoid(out[2]);
* float soft = H_weight * H_val + V_weight * V_val;
* float hard = H_weight > V_weight ? H_val : V_val;
* float green = (1 - S) * soft + S * hard;
*
* For GPU path the kernel computes blended green and writes to green_out buffer.
* For CPU path, green is computed and written here; make_gline skips those pixels.
*/
void DHTNNF::nn_blend_green(DHTNNNetwork &net)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int iheight = libraw.imgdata.sizes.iheight;
int PR = net.patch_r;
#ifdef USE_OPENCL
{
/* Allocate (or reuse) per-pixel green-written flags */
if (!nn_green_written)
nn_green_written = (bool *)calloc(iwidth * iheight, sizeof(bool));
else
memset(nn_green_written, 0, iwidth * iheight * sizeof(bool));
float *gpu_green = (float *)malloc((size_t)iwidth * iheight * sizeof(float));
DHTNNCLContext &cl = get_cl_context();
if (gpu_green && cl.run_inference(nraw, ndir, gpu_green, nr_height, nr_width, iwidth, iheight, nr_topmargin, nr_leftmargin, (unsigned int)libraw.imgdata.idata.filters, channel_minimum[1], channel_maximum[1], net))
{
/* Apply GPU-computed green values to nraw */
for (int i = 0; i < iheight; i++)
for (int j = 0; j < iwidth; j++)
{
float g = gpu_green[i * iwidth + j];
if (g >= 0.0f)
{
nraw[nr_offset(i + nr_topmargin, j + nr_leftmargin)][1] = g;
nn_green_written[i * iwidth + j] = true;
}
}
free(gpu_green);
fprintf(stderr, "DHT-NN: GPU blend done (%dx%d)\n", iwidth, iheight);
return;
}
free(gpu_green);
fprintf(stderr, "DHT-NN: OpenCL failed, falling back to CPU blend\n");
}
#endif
build_nn_features();
/* Allocate (or reuse) per-pixel green-written flags */
if (!nn_green_written)
nn_green_written = (bool *)calloc(iwidth * iheight, sizeof(bool));
else
memset(nn_green_written, 0, iwidth * iheight * sizeof(bool));
char *new_ndir = (char *)malloc(nr_height * nr_width);
memcpy(new_ndir, ndir, nr_height * nr_width);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = PR; i < iheight - PR; i++)
{
int js = libraw.COLOR(i, 0) & 1;
int kc = libraw.COLOR(i, js);
int j_start = PR + (((PR & 1) != js) ? 1 : 0); // first non-green j >= PR
for (int j = j_start; j < iwidth - PR; j += 2)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (ndir[nr_offset(y, x)] & HVSH) continue;
float *patch = (float *)alloca(net.input_size * sizeof(float));
extract_patch(i, j, patch, net.patch_r);
float output[DHTNNNetwork::OUTPUT_SIZE];
net.forward(patch, output);
/* Softmax over H/V logits */
float max_hv = output[0] > output[1] ? output[0] : output[1];
float eh = expf(output[0] - max_hv);
float ev = expf(output[1] - max_hv);
float sum_hv = eh + ev;
float H_weight = eh / sum_hv;
float V_weight = ev / sum_hv;
/* Sigmoid sharpness */
float S = 1.0f / (1.0f + expf(-output[2]));
/* H and V green estimates */
float H_val = compute_green_forced(i, j, kc, false);
float V_val = compute_green_forced(i, j, kc, true);
/* Blend: S=0 → weighted avg (ambiguous), S=1 → hard select (sharp edge) */
float soft = H_weight * H_val + V_weight * V_val;
float hard = H_weight > V_weight ? H_val : V_val;
float green = (1.0f - S) * soft + S * hard;
/* Clamp */
if (green > channel_maximum[1]) green = channel_maximum[1];
if (green < channel_minimum[1]) green = channel_minimum[1];
nraw[nr_offset(y, x)][1] = green;
nn_green_written[i * iwidth + j] = true;
/* Keep ndir HOR/VER for make_rb */
new_ndir[nr_offset(y, x)] &= ~(HOR | VER);
new_ndir[nr_offset(y, x)] |= (H_weight > V_weight) ? HOR : VER;
}
}
memcpy(ndir, new_ndir, nr_height * nr_width);
free(new_ndir);
}
/*
* Train the NN using objective ground truth from a downsampled AAHD demosaic.
*
* gt_rgb: half-resolution ground truth image in perceptual float space [gt_h * gt_w][3].
* All three channels are known at every pixel (from AAHD + 2x downsample).
* gt_w, gt_h: dimensions of the ground truth image.
*
* v15 training (make_nnt_answers pattern):
* Output is 3 values: logit(H), logit(V) [softmax], logit(sharpness) [sigmoid].
* For every non-green pixel, compute all 6 LD hue_aprox estimates and compare
* each to the known gt_green. Assign soft scores (per JND threshold) and derive:
* - target_h / target_v : H/V soft labels (normalized from per-direction scores)
* - target_s : sharpness = exp(-bestDiff/JND)
* No ambiguous pixel is discarded; ALL non-green pixels get a training signal.
*/
void DHTNNF::train_nn(DHTNNNetwork &net, float lr, float (*gt_rgb)[3], int gt_w, int gt_h)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int iheight = libraw.imgdata.sizes.iheight;
int PR = net.patch_r;
build_nn_features();
static constexpr float JND_ACCEPT = 0.005f; // per-estimate acceptance radius (how close to truth = "good")
static constexpr float JND_DECISIVE = 0.010f; // cardinal gap threshold for sharpness label
const int N_EPOCHS = 3;
/* ── Phase 1: Collect training samples (parallel) ── */
struct Sample {
int i, j;
float target_h; // soft label for HOR (softmax target)
float target_v; // soft label for VER (softmax target)
float target_s; // soft label for sharpness (sigmoid target)
float confidence; // |target_h - target_v|: sample weight for gradient
};
std::vector<Sample> samples;
samples.reserve(iwidth * iheight / 2);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel
{
#endif
std::vector<Sample> local_samples;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp for schedule(guided) nowait
#endif
for (int i = PR; i < iheight - PR; i++)
{
for (int j = PR; j < iwidth - PR; j++)
{
int x = j + nr_leftmargin;
int y = i + nr_topmargin;
if (ndir[nr_offset(y, x)] & HVSH) continue;
int js = libraw.COLOR(i, 0) & 1;
if ((j & 1) != js) continue;
float gt_green = gt_rgb[i * gt_w + j][1];
/* All 6 LD green estimates from precomputed features:
* f[10]=ld_ud(VER), f[11]=ld_lr(HOR), f[12..15]=diagonals */
const float *f = nn_feat[nr_offset(i + nr_topmargin, j + nr_leftmargin)];
const float aG[6] = { f[10], f[11], f[12], f[13], f[14], f[15] };
/* Per-estimate soft score: linear-in-gap within JND_ACCEPT, else 0.
* Best estimate always gets full credit regardless. */
float score[6];
int bestI = 0;
float bestDiff = fabsf(gt_green - aG[0]);
for (int ai = 0; ai < 6; ai++)
{
float diff = fabsf(gt_green - aG[ai]);
if (diff < bestDiff) { bestI = ai; bestDiff = diff; }
score[ai] = (diff < JND_ACCEPT) ? 0.9f + 0.1f * (JND_ACCEPT - diff) / JND_ACCEPT : 0.0f;
}
score[bestI] = 1.0f; // best always gets full credit
/* H/V soft labels:
* UD(0) → purely VER, LR(1) → purely HOR, diagonals(2-5) → 0.5 each */
float h_score = score[1] + 0.5f * (score[2] + score[3] + score[4] + score[5]);
float v_score = score[0] + 0.5f * (score[2] + score[3] + score[4] + score[5]);
float sum_hv = h_score + v_score;
float target_h = (sum_hv > 1e-6f) ? h_score / sum_hv : 0.5f;
float target_v = (sum_hv > 1e-6f) ? v_score / sum_hv : 0.5f;
/* Sharpness: how decisive the cardinal H/V choice is. */
float diff_H = fabsf(gt_green - aG[1]); // ld_lr (cardinal HOR)
float diff_V = fabsf(gt_green - aG[0]); // ld_ud (cardinal VER)
float target_s = tanhf(fabsf(diff_H - diff_V) / JND_DECISIVE);
/* Confidence weight: how decisive the H/V label is.
* Ambiguous (50/50) samples get near-zero weight → less noisy gradient. */
float confidence = fabsf(target_h - target_v);
local_samples.push_back({i, j, target_h, target_v, target_s, confidence});
}
}
#if defined(LIBRAW_USE_OPENMP)
#pragma omp critical
#endif
{
samples.insert(samples.end(), local_samples.begin(), local_samples.end());
}
#if defined(LIBRAW_USE_OPENMP)
}
#endif
/* Shuffle samples to prevent catastrophic forgetting of previous rows */
std::shuffle(samples.begin(), samples.end(), std::minstd_rand(std::random_device{}()));
int total_samples = (int)samples.size();
/* Count confident samples (|target_h - target_v| > 0.2) for separate accuracy tracking */
int n_confident = 0;
for (int s = 0; s < total_samples; s++)
if (samples[s].confidence > 0.2f) n_confident++;
fprintf(stderr, "DHT-NN train: %d samples (%d confident, %d ambiguous), soft H/V+sharpness labels\n",
total_samples, n_confident, total_samples - n_confident);
/* ── Phase 2: Mini-batch parallel training ── */
int correct_total = 0;
int correct_confident = 0; // accuracy on confident samples only
long long adam_step = net.t;
int nw = net.num_weights();
const int BATCH_SIZE = 256;
if (!net.optimizer_allocated) net.alloc_optimizer();
int bp_max_act, bp_max_wt;
net.backprop_buf_sizes(bp_max_act, bp_max_wt);
int n_threads;
#if defined(LIBRAW_USE_OPENMP)
n_threads = omp_get_max_threads();
#else
n_threads = 1;
#endif
/* Per-thread buffers: forward activations, backprop temps, gradient accumulators */
float **t_patch = (float **)malloc(n_threads * sizeof(float *));
float ***t_h = (float ***)malloc(n_threads * sizeof(float **));
float **t_bp_da = (float **)malloc(n_threads * sizeof(float *));
float **t_bp_db = (float **)malloc(n_threads * sizeof(float *));
float ***t_acc_gW = (float ***)malloc(n_threads * sizeof(float **));
float ***t_acc_gb = (float ***)malloc(n_threads * sizeof(float **));
/* Separate buffers for averaged gradients (reduction target) */
float **avg_gW = (float **)malloc(nw * sizeof(float *));
float **avg_gb = (float **)malloc(nw * sizeof(float *));
int *t_correct = (int *)calloc(n_threads, sizeof(int));
int *t_correct_conf = (int *)calloc(n_threads, sizeof(int));
for (int t = 0; t < n_threads; t++)
{
t_patch[t] = (float *)malloc(net.input_size * sizeof(float));
t_h[t] = (float **)malloc(net.n_hidden * sizeof(float *));
for (int l = 0; l < net.n_hidden; l++)
t_h[t][l] = (float *)malloc(net.hidden[l] * sizeof(float));
t_bp_da[t] = (float *)malloc(bp_max_act * sizeof(float));
t_bp_db[t] = (float *)malloc(bp_max_act * sizeof(float));
t_acc_gW[t] = (float **)malloc(nw * sizeof(float *));
t_acc_gb[t] = (float **)malloc(nw * sizeof(float *));
for (int l = 0; l < nw; l++)
{
t_acc_gW[t][l] = (float *)malloc(net.weight_out(l) * net.weight_in(l) * sizeof(float));
t_acc_gb[t][l] = (float *)malloc(net.weight_out(l) * sizeof(float));
}
}
/* Allocate averaged gradient buffers */
for (int l = 0; l < nw; l++)
{
avg_gW[l] = (float *)malloc(net.weight_out(l) * net.weight_in(l) * sizeof(float));
avg_gb[l] = (float *)malloc(net.weight_out(l) * sizeof(float));
}
for (int epoch = 0; epoch < N_EPOCHS; epoch++)
{
if (epoch > 0)
std::shuffle(samples.begin(), samples.end(), std::minstd_rand(std::random_device{}()));
correct_total = 0;
correct_confident = 0;
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel
#endif
{
#if defined(LIBRAW_USE_OPENMP)
int tid = omp_get_thread_num();
int nt = omp_get_num_threads();
#else
int tid = 0;
int nt = 1;
#endif
for (int batch_start = 0; batch_start < total_samples; batch_start += BATCH_SIZE)
{
int batch_end = batch_start + BATCH_SIZE;
if (batch_end > total_samples) batch_end = total_samples;
int batch_sz = batch_end - batch_start;
/* Zero this thread's gradient accumulators */
for (int l = 0; l < nw; l++)
{
memset(t_acc_gW[tid][l], 0, net.weight_out(l) * net.weight_in(l) * sizeof(float));
memset(t_acc_gb[tid][l], 0, net.weight_out(l) * sizeof(float));
}
t_correct[tid] = 0;
t_correct_conf[tid] = 0;
/* Parallel forward + gradient accumulation */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp for schedule(static)
#endif
for (int s = batch_start; s < batch_end; s++)
{
extract_patch(samples[s].i, samples[s].j, t_patch[tid], net.patch_r);
float output[DHTNNNetwork::OUTPUT_SIZE];
int pred = net.forward_train(t_patch[tid], t_h[tid], output);
/* accuracy: argmax(H,V) matches dominant soft label */
int expected = (samples[s].target_h >= samples[s].target_v) ? 0 : 1;
if (pred == expected) {
t_correct[tid]++;
if (samples[s].confidence > 0.2f) t_correct_conf[tid]++;
}
float samp_targets[3] = { samples[s].target_h, samples[s].target_v, samples[s].target_s };
/* Weight gradient by confidence: ambiguous 50/50 samples contribute less */
float sw = 0.1f + 0.9f * samples[s].confidence; // floor=0.1 to keep some signal
net.accumulate_grads(t_patch[tid], t_h[tid], output, samp_targets,
sw,
t_acc_gW[tid], t_acc_gb[tid],
t_bp_da[tid], t_bp_db[tid]);
}
/* implicit barrier after omp for */
/* Parallel gradient reduction + averaging into separate buffers */
{
float inv_batch = 1.0f / batch_sz;
for (int l = 0; l < nw; l++)
{
int wt_sz = net.weight_out(l) * net.weight_in(l);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp for schedule(static)
#endif
for (int i = 0; i < wt_sz; i++)
{
float s = 0.0f;
for (int t = 0; t < nt; t++)
s += t_acc_gW[t][l][i];
avg_gW[l][i] = s * inv_batch;
}
/* implicit barrier */
int b_sz = net.weight_out(l);
#if defined(LIBRAW_USE_OPENMP)
#pragma omp for schedule(static)
#endif
for (int i = 0; i < b_sz; i++)
{
float s = 0.0f;
for (int t = 0; t < nt; t++)
s += t_acc_gb[t][l][i];
avg_gb[l][i] = s * inv_batch;
}
/* implicit barrier */
}
}
/* Adam update + progress reporting (single thread) */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp single
#endif
{
adam_step++;
net.adam_update_all(avg_gW, avg_gb, lr, adam_step);
for (int t = 0; t < nt; t++)
{
correct_total += t_correct[t];
correct_confident += t_correct_conf[t];
}
}
/* implicit barrier after omp single */
}
}
fprintf(stderr, "DHT-NN train: epoch %d/%d, %d samples, accuracy %.1f%% (confident: %.1f%%)\n",
epoch + 1, N_EPOCHS, total_samples,
100.0f * correct_total / (total_samples ? total_samples : 1),
100.0f * correct_confident / (n_confident ? n_confident : 1));
}
/* Cleanup per-thread buffers */
for (int t = 0; t < n_threads; t++)
{
free(t_patch[t]);
for (int l = 0; l < net.n_hidden; l++)
free(t_h[t][l]);
free(t_h[t]);
free(t_bp_da[t]); free(t_bp_db[t]);
for (int l = 0; l < nw; l++)
{
free(t_acc_gW[t][l]); free(t_acc_gb[t][l]);
}
free(t_acc_gW[t]); free(t_acc_gb[t]);
}
free(t_patch); free(t_h);
free(t_bp_da); free(t_bp_db);
free(t_acc_gW); free(t_acc_gb);
/* Cleanup averaged gradient buffers */
for (int l = 0; l < nw; l++)
{
free(avg_gW[l]); free(avg_gb[l]);
}
free(avg_gW); free(avg_gb);
free(t_correct);
free(t_correct_conf);
net.t = adam_step;
fprintf(stderr, "DHT-NN train: %d samples x %d epochs, soft labels, final accuracy %.1f%% (confident: %.1f%%)\n",
total_samples, N_EPOCHS,
100.0f * correct_total / (total_samples ? total_samples : 1),
100.0f * correct_confident / (n_confident ? n_confident : 1));
}
/*
* q=13: Inference mode.
* Loads trained weights from "dht_nn.weights", uses the NN to refine HV directions.
* Falls back to rule-based if weights file not found.
*/
void LibRaw::dht_nn_interpolate()
{
if (imgdata.idata.filters != 0x16161616 && imgdata.idata.filters != 0x61616161 && imgdata.idata.filters != 0x49494949 &&
imgdata.idata.filters != 0x94949494)
{
ahd_interpolate();
return;
}
DHTNNNetwork net;
std::string weights_path = dht_nn_filename(net, "weights");
bool have_nn = net.load(weights_path.c_str());
if (have_nn) fprintf(stderr, "DHT-NN: loaded weights from %s\n", weights_path.c_str());
else fprintf(stderr, "DHT-NN: weights not found at %s, falling back to rule-based\n", weights_path.c_str());
DHTNNF dht(*this);
dht.hide_hots();
if (have_nn)
{
/* NN path: initial detection + NN refinement */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < imgdata.sizes.iheight; ++i)
dht.make_hv_dline(i);
dht.nn_blend_green(net);
}
else
{
/* Fallback: full rule-based pipeline */
dht.make_hv_dirs();
}
dht.make_greens(); /* handles HVSH + margin pixels nn_blend_green skipped */
dht.make_diag_dirs();
dht.make_rb();
dht.restore_hots();
dht.copy_to_image();
}
/*
* q=14: Training mode.
* 1. AAHD-demosaic the full-resolution raw → full RGB at every pixel.
* 2. Downsample 2× (in linear space) → half-res ground truth with all channels known.
* 3. Re-mosaic the half-res image → synthetic Bayer raw.
* 4. Build DHTNNF from the synthetic half-res raw, run initial HV detection.
* 5. For each non-green pixel: compare H/V green interpolation to known GT → label.
* 6. Train the NN on half-res patches with objective ground truth.
* 7. Restore original raw and demosaic with NN.
*/
void LibRaw::dht_nn_interpolate_train()
{
if (imgdata.idata.filters != 0x16161616 && imgdata.idata.filters != 0x61616161 && imgdata.idata.filters != 0x49494949 &&
imgdata.idata.filters != 0x94949494)
{
ahd_interpolate();
return;
}
const float lr_fresh = 0.001f;
const float lr_resume = 0.0005f;
float lr = lr_fresh;
DHTNNNetwork net;
std::string ckpt_path = dht_nn_filename(net, "ckpt");
std::string weights_path = dht_nn_filename(net, "weights");
if (net.load_checkpoint(ckpt_path.c_str()))
{
lr = lr_resume;
fprintf(stderr, "DHT-NN: resumed from checkpoint %s (lr=%.6f)\n", ckpt_path.c_str(), lr);
}
else if (net.load(weights_path.c_str()))
{
fprintf(stderr, "DHT-NN: loaded weights from %s, starting fresh training state (lr=%.6f)\n", weights_path.c_str(), lr);
net.alloc_optimizer();
}
else
{
fprintf(stderr, "DHT-NN: no checkpoint/weights found, training from random init (lr=%.6f)\n", lr);
net.alloc_optimizer();
}
int iwidth = imgdata.sizes.iwidth;
int iheight = imgdata.sizes.iheight;
/* ── Step 1: Save original raw mosaic ── */
size_t img_size = (size_t)iwidth * iheight;
ushort(*saved_image)[4] = (ushort(*)[4])malloc(img_size * sizeof(ushort[4]));
memcpy(saved_image, imgdata.image, img_size * sizeof(ushort[4]));
/* ── Step 2: AAHD demosaic → full RGB in imgdata.image ── */
fprintf(stderr, "DHT-NN train: running AAHD demosaic for ground truth...\n");
aahd_interpolate();
/* ── Step 3: Downsample 2× in linear space → half-res ground truth ── */
int half_w = iwidth / 2;
int half_h = iheight / 2;
float3 *gt_rgb = (float3 *)malloc(half_w * half_h * sizeof(float3));
for (int i = 0; i < half_h; i++)
{
for (int j = 0; j < half_w; j++)
{
int si = i * 2, sj = j * 2;
for (int c = 0; c < 3; c++)
{
/* Average 2×2 block in linear space, convert to perceptual float.
* imgdata.image channels: [0]=R, [1]=G, [2]=B, [3]=G2; use [c] for c=0,1,2. */
float sum = 0;
sum += (float)imgdata.image[(si + 0) * iwidth + (sj + 0)][c];
sum += (float)imgdata.image[(si + 0) * iwidth + (sj + 1)][c];
sum += (float)imgdata.image[(si + 1) * iwidth + (sj + 0)][c];
sum += (float)imgdata.image[(si + 1) * iwidth + (sj + 1)][c];
gt_rgb[i * half_w + j][c] = raw_to_float((ushort)(sum * 0.25f + 0.5f));
}
}
}
fprintf(stderr, "DHT-NN train: ground truth %dx%d -> %dx%d\n", iwidth, iheight, half_w, half_h);
/* ── Step 4: Restore original raw and build half-res synthetic raw ── */
/* Re-mosaic: for each half-res pixel, the "known" channel comes from gt_rgb,
* and the Bayer pattern is preserved (it's 2×2-periodic, same at half res). */
memcpy(imgdata.image, saved_image, img_size * sizeof(ushort[4]));
/* Temporarily set half-res dimensions */
int saved_iwidth = imgdata.sizes.iwidth;
int saved_iheight = imgdata.sizes.iheight;
imgdata.sizes.iwidth = half_w;
imgdata.sizes.iheight = half_h;
/* Write synthetic half-res raw into imgdata.image.
* For each pixel, only the Bayer-known channel has a value; others are 0.
* We convert from perceptual float back to ushort for the known channel. */
memset(imgdata.image, 0, (size_t)half_w * half_h * sizeof(ushort[4]));
for (int i = 0; i < half_h; i++)
{
for (int j = 0; j < half_w; j++)
{
int ch = COLOR(i, j);
if (ch == 3) ch = 1;
imgdata.image[i * half_w + j][ch] = float_to_raw(gt_rgb[i * half_w + j][ch]);
}
}
/* ── Step 5: Build DHTNNF from half-res synthetic raw, detect initial HV dirs ── */
{
DHTNNF dht_half(*this);
/* Update channel limits from ground truth to avoid clipping errors during training.
* The synthetic raw image has discarded pixels which might contain the true extrema.
*/
for (int c = 0; c < 3; ++c)
{
dht_half.channel_maximum[c] = -1e30f;
dht_half.channel_minimum[c] = 1e30f;
}
for (int i = 0; i < half_h; i++)
{
for (int j = 0; j < half_w; j++)
{
for (int c = 0; c < 3; c++)
{
float val = gt_rgb[i * half_w + j][c];
if (val > dht_half.channel_maximum[c]) dht_half.channel_maximum[c] = val;
if (val < dht_half.channel_minimum[c]) dht_half.channel_minimum[c] = val;
}
}
}
dht_half.hide_hots();
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < half_h; ++i)
dht_half.make_hv_dline(i);
/* ── Step 6: Train NN with ground truth labels ── */
dht_half.train_nn(net, lr, gt_rgb, half_w, half_h);
}
free(gt_rgb);
/* ── Step 7: Restore original dimensions and raw ── */
imgdata.sizes.iwidth = saved_iwidth;
imgdata.sizes.iheight = saved_iheight;
memcpy(imgdata.image, saved_image, img_size * sizeof(ushort[4]));
free(saved_image);
/* Save checkpoint and inference weights */
if (net.save_checkpoint(ckpt_path.c_str())) fprintf(stderr, "DHT-NN: saved checkpoint to %s\n", ckpt_path.c_str());
if (net.save(weights_path.c_str()))
fprintf(stderr, "DHT-NN: saved weights to %s (%d floats, %zu bytes)\n", weights_path.c_str(), net.total_weights(), (size_t)net.total_weights() * sizeof(float));
/* Now demosaic the original full-res raw using the trained NN */
DHTNNF dht(*this);
dht.hide_hots();
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < iheight; ++i)
dht.make_hv_dline(i);
dht.nn_blend_green(net);
dht.make_greens(); /* handles HVSH + margin pixels nn_blend_green skipped */
dht.make_diag_dirs();
dht.make_rb();
dht.restore_hots();
dht.copy_to_image();
}
/*
* Benchmark: compare NN direction refinement against rule-based refinement.
* Runs both, prints agreement metrics, leaves ndir in rule-based state.
*/
void DHTNNF::benchmark_nn(DHTNNNetwork &net)
{
int iwidth = libraw.imgdata.sizes.iwidth;
int iheight = libraw.imgdata.sizes.iheight;
/* Save unrefined directions */
char *initial_ndir = (char *)malloc(nr_height * nr_width);
memcpy(initial_ndir, ndir, nr_height * nr_width);
/* Run NN refinement and save result */
nn_refine_hv_dirs(net);
char *nn_ndir = (char *)malloc(nr_height * nr_width);
memcpy(nn_ndir, ndir, nr_height * nr_width);
/* Restore initial and run rule-based refinement */
memcpy(ndir, initial_ndir, nr_height * nr_width);
free(initial_ndir);
for (int i = 0; i < iheight; i++)
refine_hv_dirs(i, i & 1);
for (int i = 0; i < iheight; i++)
refine_hv_dirs(i, (i & 1) ^ 1);
for (int i = 0; i < iheight; i++)
refine_ihv_dirs(i);
/* Compare NN vs rule-based (skip borders) */
long long total = 0, agree = 0;
long long hor_agree = 0, ver_agree = 0;
long long nn_hor = 0, nn_ver = 0, rb_hor = 0, rb_ver = 0;
for (int i = 3; i < iheight - 3; ++i)
{
for (int j = 3; j < iwidth - 3; ++j)
{
int off = nr_offset(i + nr_topmargin, j + nr_leftmargin);
char nn_d = nn_ndir[off];
char rb_d = ndir[off];
bool nn_is_hor = (nn_d & HOR) != 0;
bool nn_is_ver = (nn_d & VER) != 0;
bool rb_is_hor = (rb_d & HOR) != 0;
bool rb_is_ver = (rb_d & VER) != 0;
/* Skip pixels where direction is ambiguous */
if (nn_is_hor == nn_is_ver) continue;
if (rb_is_hor == rb_is_ver) continue;
total++;
if (nn_is_hor) nn_hor++;
else nn_ver++;
if (rb_is_hor) rb_hor++;
else rb_ver++;
if (nn_is_hor == rb_is_hor)
{
agree++;
if (nn_is_hor) hor_agree++;
else ver_agree++;
}
}
}
free(nn_ndir);
fprintf(stderr, "DHT-NN test: %lld decisive pixels\n", total);
if (total > 0)
{
fprintf(stderr, "DHT-NN test: agreement: %lld / %lld = %.2f%%\n", agree, total, 100.0 * agree / total);
fprintf(stderr, "DHT-NN test: NN HOR=%lld VER=%lld (%.1f%% / %.1f%%)\n", nn_hor, nn_ver, 100.0 * nn_hor / total, 100.0 * nn_ver / total);
fprintf(stderr, "DHT-NN test: RB HOR=%lld VER=%lld (%.1f%% / %.1f%%)\n", rb_hor, rb_ver, 100.0 * rb_hor / total, 100.0 * rb_ver / total);
if (rb_hor > 0) fprintf(stderr, "DHT-NN test: HOR precision: %lld / %lld = %.2f%%\n", hor_agree, rb_hor, 100.0 * hor_agree / rb_hor);
if (rb_ver > 0) fprintf(stderr, "DHT-NN test: VER precision: %lld / %lld = %.2f%%\n", ver_agree, rb_ver, 100.0 * ver_agree / rb_ver);
}
/* ndir now contains rule-based directions */
}
/*
* q=15: Test / benchmark mode.
* Loads trained weights, runs both NN and rule-based direction refinement,
* compares them and prints accuracy metrics, then demosaics using rule-based
* directions as the reference output.
*/
void LibRaw::dht_nn_interpolate_test()
{
if (imgdata.idata.filters != 0x16161616 && imgdata.idata.filters != 0x61616161 && imgdata.idata.filters != 0x49494949 &&
imgdata.idata.filters != 0x94949494)
{
ahd_interpolate();
return;
}
DHTNNNetwork net;
std::string weights_path = dht_nn_filename(net, "weights");
bool have_nn = net.load(weights_path.c_str());
if (!have_nn)
{
fprintf(stderr, "DHT-NN test: weights not found at %s, nothing to benchmark\n", weights_path.c_str());
DHTNNF dht(*this);
dht.hide_hots();
dht.make_hv_dirs();
dht.make_greens();
dht.make_diag_dirs();
dht.make_rb();
dht.restore_hots();
dht.copy_to_image();
return;
}
fprintf(stderr, "DHT-NN test: loaded weights from %s\n", weights_path.c_str());
DHTNNF dht(*this);
dht.hide_hots();
/* Initial HV detection (unrefined) */
#if defined(LIBRAW_USE_OPENMP)
#pragma omp parallel for schedule(guided)
#endif
for (int i = 0; i < imgdata.sizes.iheight; ++i)
dht.make_hv_dline(i);
/* Benchmark: NN vs rule-based, prints metrics, leaves ndir rule-based */
dht.benchmark_nn(net);
/* Demosaic using rule-based directions (reference output) */
dht.make_greens();
dht.make_diag_dirs();
dht.make_rb();
dht.restore_hots();
dht.copy_to_image();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment