Last active
February 20, 2026 17:06
-
-
Save jef-sure/c0ff8bdf894a77fb4dce4351b4076524 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* -*- C++ -*- | |
| * File: dht_nn_network.h | |
| * Copyright 2026 Anton Petrusevich | |
| * | |
| * Neural network for DHT demosaicing direction refinement. | |
| * Architecture: input_size -> N hidden layers (LeakyReLU α=0.01) -> 3 (softmax H/V + sigmoid S) | |
| * Input: (2*patch_r+1)^2 patch x 16 features/pixel | |
| * Output: logit(HOR), logit(VER) [softmax], logit(sharpness) [sigmoid] | |
| * | |
| * Patch radius, number and sizes of hidden layers are runtime-configurable. | |
| * The binary file format is self-describing (stores architecture). | |
| * | |
| * This code is licensed under one of two licenses as you choose: | |
| * | |
| * 1. GNU LESSER GENERAL PUBLIC LICENSE version 2.1 | |
| * (See file LICENSE.LGPL provided in LibRaw distribution archive for details). | |
| * | |
| * 2. COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 | |
| * (See file LICENSE.CDDL provided in LibRaw distribution archive for details). | |
| */ | |
| #ifndef DHT_NN_NETWORK_H | |
| #define DHT_NN_NETWORK_H | |
| #include <cmath> | |
| #include <cstdio> | |
| #include <cstdlib> | |
| #include <cstring> | |
| #include <cstdint> | |
| #include <alloca.h> | |
| struct DHTNNNetwork | |
| { | |
| /* Fixed constants */ | |
| static const int FEAT_PER_PX = 16; // nraw[3], one-hot bayer[3], hue_h, hue_v, hue_change_h, hue_change_v, ld_ud, ld_lr, ld_lu, ld_ld, ld_ru, ld_rd | |
| static const int OUTPUT_SIZE = 3; // HOR=0, VER=1, S=2 (sharpness) | |
| static const uint32_t MAGIC = 0x4E4E4448; // "HDNN" | |
| static const uint32_t VERSION = 16; // v16: LeakyReLU (alpha=0.01) instead of ReLU | |
| /* Runtime architecture (set at construction or load time) */ | |
| int patch_r; // e.g. 2, 3, 4, ... | |
| int patch_side; // 2*patch_r + 1 | |
| int patch_size; // patch_side^2 | |
| int input_size; // patch_size * FEAT_PER_PX | |
| int n_hidden; // number of hidden layers | |
| int *hidden; // hidden[n_hidden]: size of each hidden layer | |
| /* Dynamically allocated weight arrays (row-major). | |
| * num_weights() = n_hidden + 1 weight matrices: | |
| * W[0]: input_size -> hidden[0] | |
| * W[l]: hidden[l-1] -> hidden[l] (for 0 < l < n_hidden) | |
| * W[n_hidden]: hidden[n_hidden-1] -> OUTPUT_SIZE | |
| */ | |
| float **W, **b; | |
| /* Adam optimizer state */ | |
| float **mW, **mb; // 1st moment | |
| float **vW, **vb; // 2nd moment | |
| long long t; // timestep | |
| bool weights_allocated; | |
| bool optimizer_allocated; | |
| /* Helpers */ | |
| int num_weights() const { return n_hidden + 1; } | |
| int weight_out(int l) const { return (l < n_hidden) ? hidden[l] : OUTPUT_SIZE; } | |
| int weight_in(int l) const { return (l == 0) ? input_size : hidden[l - 1]; } | |
| /* Default: patch_r=5 (11x11), 4 hidden layers (256, 128, 64, 32) */ | |
| DHTNNNetwork(int _patch_r = 5, int _n_hidden = 4, const int *_hidden = NULL) | |
| : patch_r(0), patch_side(0), patch_size(0), input_size(0), | |
| n_hidden(0), hidden(NULL), W(NULL), b(NULL), | |
| mW(NULL), mb(NULL), vW(NULL), vb(NULL), t(0), | |
| weights_allocated(false), optimizer_allocated(false) | |
| { | |
| static const int default_hidden[] = {256, 128, 64, 32}; | |
| if (!_hidden) { _hidden = default_hidden; _n_hidden = 4; } | |
| set_arch(_patch_r, _n_hidden, _hidden); | |
| init_random(); | |
| } | |
| ~DHTNNNetwork() | |
| { | |
| free_optimizer(); | |
| free_weights(); | |
| free(hidden); | |
| } | |
| /* Reconfigure architecture — reallocates weight arrays */ | |
| void set_arch(int _patch_r, int _n_hidden, const int *_hidden) | |
| { | |
| free_weights(); | |
| patch_r = _patch_r; | |
| patch_side = 2 * patch_r + 1; | |
| patch_size = patch_side * patch_side; | |
| input_size = patch_size * FEAT_PER_PX; | |
| n_hidden = _n_hidden; | |
| free(hidden); | |
| hidden = (int *)malloc(n_hidden * sizeof(int)); | |
| for (int i = 0; i < n_hidden; i++) | |
| hidden[i] = _hidden[i]; | |
| alloc_weights(); | |
| } | |
| int total_weights() const | |
| { | |
| int total = 0; | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| total += weight_out(l) * weight_in(l) + weight_out(l); | |
| return total; | |
| } | |
| void alloc_weights() | |
| { | |
| if (weights_allocated) return; | |
| int nw = num_weights(); | |
| W = (float **)calloc(nw, sizeof(float *)); | |
| b = (float **)calloc(nw, sizeof(float *)); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| W[l] = (float *)calloc(weight_out(l) * weight_in(l), sizeof(float)); | |
| b[l] = (float *)calloc(weight_out(l), sizeof(float)); | |
| } | |
| weights_allocated = true; | |
| } | |
| void free_weights() | |
| { | |
| free_optimizer(); | |
| if (!weights_allocated) return; | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| free(W[l]); free(b[l]); | |
| } | |
| free(W); free(b); | |
| W = b = NULL; | |
| weights_allocated = false; | |
| } | |
| /* Xavier (He) initialization */ | |
| void init_random() | |
| { | |
| if (!weights_allocated) return; | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int in_sz = weight_in(l); | |
| int out_sz = weight_out(l); | |
| float scale = sqrtf(6.0f / in_sz); | |
| for (int i = 0; i < out_sz * in_sz; i++) | |
| W[l][i] = scale * ((float)rand() / RAND_MAX - 0.5f) * 2.0f; | |
| for (int i = 0; i < out_sz; i++) | |
| b[l][i] = 0.0f; | |
| } | |
| } | |
| /* | |
| * Forward pass (inference). | |
| * input[input_size] -> output[OUTPUT_SIZE] (raw logits) | |
| * Returns argmax: 0=HOR, 1=VER. | |
| * Uses two ping-pong buffers for intermediate activations. | |
| */ | |
| int forward(const float *input, float *output) const | |
| { | |
| int nw = num_weights(); | |
| int max_h = OUTPUT_SIZE; | |
| for (int i = 0; i < n_hidden; i++) | |
| if (hidden[i] > max_h) max_h = hidden[i]; | |
| float *buf[2]; | |
| buf[0] = (float *)alloca(max_h * sizeof(float)); | |
| buf[1] = (float *)alloca(max_h * sizeof(float)); | |
| const float *prev = input; | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int out_sz = weight_out(l); | |
| int in_sz = weight_in(l); | |
| float *cur = (l < nw - 1) ? buf[l & 1] : output; | |
| for (int o = 0; o < out_sz; o++) | |
| { | |
| float sum = b[l][o]; | |
| const float *row = W[l] + o * in_sz; | |
| for (int k = 0; k < in_sz; k++) | |
| sum += prev[k] * row[k]; | |
| cur[o] = (l < nw - 1) ? (sum > 0.0f ? sum : 0.01f * sum) : sum; | |
| } | |
| prev = cur; | |
| } | |
| return output[0] > output[1] ? 0 : 1; | |
| } | |
| /* | |
| * Forward pass with saved activations for backpropagation. | |
| * Caller must provide h[n_hidden] arrays, each h[l] of size hidden[l], | |
| * and output[OUTPUT_SIZE]. | |
| */ | |
| int forward_train(const float *input, float **h, float *output) const | |
| { | |
| int nw = num_weights(); | |
| const float *prev = input; | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int out_sz = weight_out(l); | |
| int in_sz = weight_in(l); | |
| float *cur = (l < n_hidden) ? h[l] : output; | |
| for (int o = 0; o < out_sz; o++) | |
| { | |
| float sum = b[l][o]; | |
| const float *row = W[l] + o * in_sz; | |
| for (int k = 0; k < in_sz; k++) | |
| sum += prev[k] * row[k]; | |
| cur[o] = (l < n_hidden) ? (sum > 0.0f ? sum : 0.01f * sum) : sum; | |
| } | |
| prev = cur; | |
| } | |
| return output[0] > output[1] ? 0 : 1; | |
| } | |
| /* Allocate Adam optimizer buffers */ | |
| void alloc_optimizer() | |
| { | |
| if (optimizer_allocated) return; | |
| if (!weights_allocated) alloc_weights(); | |
| int nw = num_weights(); | |
| mW = (float **)calloc(nw, sizeof(float *)); | |
| mb = (float **)calloc(nw, sizeof(float *)); | |
| vW = (float **)calloc(nw, sizeof(float *)); | |
| vb = (float **)calloc(nw, sizeof(float *)); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int szW = weight_out(l) * weight_in(l); | |
| int szb = weight_out(l); | |
| mW[l] = (float *)calloc(szW, sizeof(float)); | |
| mb[l] = (float *)calloc(szb, sizeof(float)); | |
| vW[l] = (float *)calloc(szW, sizeof(float)); | |
| vb[l] = (float *)calloc(szb, sizeof(float)); | |
| } | |
| optimizer_allocated = true; | |
| t = 0; | |
| } | |
| void free_optimizer() | |
| { | |
| if (!optimizer_allocated) return; | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| free(mW[l]); free(mb[l]); | |
| free(vW[l]); free(vb[l]); | |
| } | |
| free(mW); free(mb); | |
| free(vW); free(vb); | |
| mW = mb = vW = vb = NULL; | |
| optimizer_allocated = false; | |
| t = 0; | |
| } | |
| /* | |
| * Adam parameter update (single-threaded). | |
| * m[] and v[] are the global moment buffers. | |
| */ | |
| static void adam_update(float *param, float *m, float *v, const float *grad, | |
| int size, float lr_t) | |
| { | |
| const float beta1 = 0.9f; | |
| const float beta2 = 0.999f; | |
| const float eps = 1e-8f; | |
| for (int i = 0; i < size; i++) | |
| { | |
| float g = grad[i]; | |
| m[i] = beta1 * m[i] + (1.0f - beta1) * g; | |
| v[i] = beta2 * v[i] + (1.0f - beta2) * g * g; | |
| param[i] += -lr_t * m[i] / (sqrtf(v[i]) + eps); | |
| } | |
| } | |
| /* Compute the bias-corrected learning rate for a given timestep */ | |
| static float adam_lr_t(float lr, long long t) | |
| { | |
| const float beta1 = 0.9f; | |
| const float beta2 = 0.999f; | |
| float b1t = powf(beta1, (float)t); | |
| float b2t = powf(beta2, (float)t); | |
| return lr * sqrtf(1.0f - b2t) / (1.0f - b1t); | |
| } | |
| /* | |
| * Compute buffer sizes needed by backward(). | |
| * max_act: maximum activation size (for delta buffers and bias grad) | |
| * max_wt: maximum weight matrix size (for weight grad buffer) | |
| */ | |
| void backprop_buf_sizes(int &max_act, int &max_wt) const | |
| { | |
| int nw = num_weights(); | |
| max_act = OUTPUT_SIZE; | |
| max_wt = 0; | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int out_sz = weight_out(l), in_sz = weight_in(l); | |
| if (out_sz > max_act) max_act = out_sz; | |
| if (in_sz > max_act) max_act = in_sz; | |
| int wsz = out_sz * in_sz; | |
| if (wsz > max_wt) max_wt = wsz; | |
| } | |
| } | |
| /* | |
| * Backward pass with Adam update for a single sample. | |
| * thread_mW/mb/vW/vb: per-thread moment buffers (same shape as W/b). | |
| * bp_delta_a/b, bp_gW, bp_gb: pre-allocated temp buffers from backprop_buf_sizes(). | |
| * iter: global sample timestep for bias-correction (computed once per call). | |
| * targets[3]: soft labels {target_h, target_v, target_s} | |
| */ | |
| void backward(const float *input, float * const *h, const float *output, | |
| const float *targets, float lr, long long iter, | |
| float **thread_mW, float **thread_mb, | |
| float **thread_vW, float **thread_vb, | |
| float *bp_delta_a, float *bp_delta_b, | |
| float *bp_gW, float *bp_gb) | |
| { | |
| int nw = num_weights(); | |
| /* Pre-compute bias-corrected lr for this timestep (one powf pair total) */ | |
| float lr_t = adam_lr_t(lr, iter); | |
| /* Softmax of H/V logits → CE gradient; sigmoid of sharpness → BCE gradient */ | |
| float max_hv = output[0] > output[1] ? output[0] : output[1]; | |
| float exp_h = expf(output[0] - max_hv); | |
| float exp_v = expf(output[1] - max_hv); | |
| float sum_hv = exp_h + exp_v; | |
| float p_s = 1.0f / (1.0f + expf(-output[2])); | |
| /* dL/d(output): softmax CE for [0,1], sigmoid BCE for [2] */ | |
| float dout[OUTPUT_SIZE]; | |
| dout[0] = exp_h / sum_hv - targets[0]; | |
| dout[1] = exp_v / sum_hv - targets[1]; | |
| dout[2] = p_s - targets[2]; | |
| float *delta = bp_delta_a; | |
| float *new_delta = bp_delta_b; | |
| float *gW = bp_gW; | |
| float *gb = bp_gb; | |
| memcpy(delta, dout, OUTPUT_SIZE * sizeof(float)); | |
| for (int l = nw - 1; l >= 0; l--) | |
| { | |
| int out_sz = weight_out(l); | |
| int in_sz = weight_in(l); | |
| const float *layer_input = (l == 0) ? input : h[l - 1]; | |
| /* Compute weight and bias gradients */ | |
| for (int o = 0; o < out_sz; o++) | |
| { | |
| gb[o] = delta[o]; | |
| for (int k = 0; k < in_sz; k++) | |
| gW[o * in_sz + k] = delta[o] * layer_input[k]; | |
| } | |
| /* Propagate delta to previous hidden layer (before weight update) */ | |
| if (l > 0) | |
| { | |
| for (int k = 0; k < in_sz; k++) | |
| { | |
| float sum = 0.0f; | |
| for (int o = 0; o < out_sz; o++) | |
| sum += W[l][o * in_sz + k] * delta[o]; | |
| new_delta[k] = h[l - 1][k] > 0.0f ? sum : 0.01f * sum; | |
| } | |
| } | |
| /* Adam update using thread-local moments (no race on m/v) */ | |
| adam_update(W[l], thread_mW[l], thread_vW[l], gW, out_sz * in_sz, lr_t); | |
| adam_update(b[l], thread_mb[l], thread_vb[l], gb, out_sz, lr_t); | |
| /* Swap delta buffers */ | |
| if (l > 0) | |
| { | |
| float *tmp = delta; | |
| delta = new_delta; | |
| new_delta = tmp; | |
| } | |
| } | |
| } | |
| /* | |
| * Accumulate gradients for a single sample into per-thread buffers. | |
| * Thread-safe: only reads from W[] (shared), writes to acc_gW/acc_gb (thread-local). | |
| * acc_gW[l][out*in], acc_gb[l][out]: caller must zero before first call in a batch. | |
| * bp_delta_a, bp_delta_b: pre-allocated temp buffers (from backprop_buf_sizes max_act). | |
| * targets[3]: soft labels {target_h, target_v, target_s} | |
| * sample_weight: multiplier on the gradient (default 1.0f). Higher = more influence. | |
| */ | |
| void accumulate_grads(const float *input, float * const *h, const float *output, | |
| const float *targets, float sample_weight, | |
| float **acc_gW, float **acc_gb, | |
| float *bp_delta_a, float *bp_delta_b) const | |
| { | |
| int nw = num_weights(); | |
| float max_hv = output[0] > output[1] ? output[0] : output[1]; | |
| float exp_h = expf(output[0] - max_hv); | |
| float exp_v = expf(output[1] - max_hv); | |
| float sum_hv = exp_h + exp_v; | |
| float p_s = 1.0f / (1.0f + expf(-output[2])); | |
| float dout[OUTPUT_SIZE]; | |
| dout[0] = (exp_h / sum_hv - targets[0]) * sample_weight; | |
| dout[1] = (exp_v / sum_hv - targets[1]) * sample_weight; | |
| dout[2] = (p_s - targets[2]) * sample_weight; | |
| float *delta = bp_delta_a; | |
| float *new_delta = bp_delta_b; | |
| memcpy(delta, dout, OUTPUT_SIZE * sizeof(float)); | |
| for (int l = nw - 1; l >= 0; l--) | |
| { | |
| int out_sz = weight_out(l); | |
| int in_sz = weight_in(l); | |
| const float *layer_input = (l == 0) ? input : h[l - 1]; | |
| for (int o = 0; o < out_sz; o++) | |
| { | |
| float d = delta[o]; | |
| acc_gb[l][o] += d; | |
| for (int k = 0; k < in_sz; k++) | |
| acc_gW[l][o * in_sz + k] += d * layer_input[k]; | |
| } | |
| if (l > 0) | |
| { | |
| for (int k = 0; k < in_sz; k++) | |
| { | |
| float sum = 0.0f; | |
| for (int o = 0; o < out_sz; o++) | |
| sum += W[l][o * in_sz + k] * delta[o]; | |
| new_delta[k] = h[l - 1][k] > 0.0f ? sum : 0.01f * sum; | |
| } | |
| float *tmp = delta; | |
| delta = new_delta; | |
| new_delta = tmp; | |
| } | |
| } | |
| } | |
| /* | |
| * Apply Adam update to all layers using pre-averaged gradients. | |
| * avg_gW[l], avg_gb[l]: gradient arrays (already divided by batch size). | |
| * Modifies W[], b[], mW[], mb[], vW[], vb[]. | |
| */ | |
| void adam_update_all(float **avg_gW, float **avg_gb, float lr, long long iter) | |
| { | |
| float lr_t = adam_lr_t(lr, iter); | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| adam_update(W[l], mW[l], vW[l], avg_gW[l], weight_out(l) * weight_in(l), lr_t); | |
| adam_update(b[l], mb[l], vb[l], avg_gb[l], weight_out(l), lr_t); | |
| } | |
| } | |
| /* | |
| * Save inference weights (and optinal optimizer state) to binary file. | |
| * Format: magic(4) | version(4) | endian_check(4) | n_layers(4) | layer_sizes[n_layers](variable) | weights | |
| */ | |
| bool save(const char *path, bool save_optimizer = false) const | |
| { | |
| FILE *f = fopen(path, "wb"); | |
| if (!f) return false; | |
| uint32_t magic = MAGIC; | |
| uint32_t ver = VERSION | (save_optimizer && optimizer_allocated ? 0x80000000u : 0); | |
| float endian_check = 1.0f; | |
| uint32_t n_layers = (uint32_t)(n_hidden + 2); | |
| fwrite(&magic, 4, 1, f); | |
| fwrite(&ver, 4, 1, f); | |
| fwrite(&endian_check, 4, 1, f); | |
| fwrite(&n_layers, 4, 1, f); | |
| /* Layer sizes: [input_size, hidden[0], ..., hidden[n_hidden-1], OUTPUT_SIZE] */ | |
| uint32_t lsz; | |
| lsz = (uint32_t)input_size; fwrite(&lsz, 4, 1, f); | |
| for (int i = 0; i < n_hidden; i++) | |
| { lsz = (uint32_t)hidden[i]; fwrite(&lsz, 4, 1, f); } | |
| lsz = (uint32_t)OUTPUT_SIZE; fwrite(&lsz, 4, 1, f); | |
| /* Weights */ | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| fwrite(W[l], sizeof(float), weight_out(l) * weight_in(l), f); | |
| fwrite(b[l], sizeof(float), weight_out(l), f); | |
| } | |
| /* Optimizer state */ | |
| if (ver & 0x80000000u) | |
| { | |
| fwrite(&t, sizeof(long long), 1, f); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int szW = weight_out(l) * weight_in(l); | |
| int szb = weight_out(l); | |
| fwrite(mW[l], sizeof(float), szW, f); | |
| fwrite(mb[l], sizeof(float), szb, f); | |
| fwrite(vW[l], sizeof(float), szW, f); | |
| fwrite(vb[l], sizeof(float), szb, f); | |
| } | |
| } | |
| fclose(f); | |
| return true; | |
| } | |
| /* | |
| * Load inference weights from binary file. | |
| * Reconfigures architecture to match the file. | |
| * Returns false on any format error. | |
| */ | |
| bool load(const char *path) | |
| { | |
| FILE *f = fopen(path, "rb"); | |
| if (!f) return false; | |
| uint32_t magic, ver; | |
| float endian_check; | |
| if (fread(&magic, 4, 1, f) != 1 || magic != MAGIC) | |
| { fclose(f); return false; } | |
| if (fread(&ver, 4, 1, f) != 1) | |
| { fclose(f); return false; } | |
| uint32_t file_ver = ver & 0x7FFFFFFFu; | |
| bool has_optimizer = (ver & 0x80000000u) != 0; | |
| if (file_ver != VERSION) // Only v10 (hue-change features) is compatible | |
| { fclose(f); return false; } | |
| if (fread(&endian_check, 4, 1, f) != 1 || endian_check != 1.0f) | |
| { fclose(f); return false; } | |
| uint32_t n_layers; | |
| if (fread(&n_layers, 4, 1, f) != 1 || n_layers < 2) | |
| { fclose(f); return false; } | |
| uint32_t *lsizes = (uint32_t *)malloc(n_layers * sizeof(uint32_t)); | |
| if (fread(lsizes, 4, n_layers, f) != n_layers) | |
| { free(lsizes); fclose(f); return false; } | |
| if (lsizes[n_layers - 1] != (uint32_t)OUTPUT_SIZE) | |
| { free(lsizes); fclose(f); return false; } | |
| /* Derive patch_r from input_size */ | |
| int file_input = (int)lsizes[0]; | |
| if (file_input % FEAT_PER_PX != 0) | |
| { free(lsizes); fclose(f); return false; } | |
| int ps = file_input / FEAT_PER_PX; | |
| int side = (int)roundf(sqrtf((float)ps)); | |
| if (side * side != ps || side < 1 || (side & 1) == 0) | |
| { free(lsizes); fclose(f); return false; } | |
| int pr = (side - 1) / 2; | |
| int file_nh = (int)n_layers - 2; | |
| int *file_hidden = (int *)malloc(file_nh * sizeof(int)); | |
| for (int i = 0; i < file_nh; i++) | |
| file_hidden[i] = (int)lsizes[i + 1]; | |
| free(lsizes); | |
| /* Reconfigure to match file */ | |
| set_arch(pr, file_nh, file_hidden); | |
| free(file_hidden); | |
| size_t r = 0; | |
| int nw = num_weights(); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| r += fread(W[l], sizeof(float), weight_out(l) * weight_in(l), f); | |
| r += fread(b[l], sizeof(float), weight_out(l), f); | |
| } | |
| if (has_optimizer) | |
| { | |
| alloc_optimizer(); | |
| size_t r2 = 0; | |
| r2 += fread(&t, sizeof(long long), 1, f); | |
| for (int l = 0; l < nw; l++) | |
| { | |
| int szW = weight_out(l) * weight_in(l); | |
| int szb = weight_out(l); | |
| r2 += fread(mW[l], sizeof(float), szW, f); | |
| r2 += fread(mb[l], sizeof(float), szb, f); | |
| r2 += fread(vW[l], sizeof(float), szW, f); | |
| r2 += fread(vb[l], sizeof(float), szb, f); | |
| } | |
| (void)r2; | |
| } | |
| else | |
| { | |
| // If loaded v6 or v7 without optimizer, reset existing optimizer if any | |
| if (optimizer_allocated) | |
| { | |
| free_optimizer(); | |
| // Optionally re-alloc pure zeros? | |
| // alloc_optimizer() will zero init. | |
| } | |
| } | |
| fclose(f); | |
| return r == (size_t)total_weights(); // Should check total read bytes actually... | |
| } | |
| /* | |
| * Save training checkpoint (weights + optimizer state). | |
| */ | |
| bool save_checkpoint(const char *path) const | |
| { | |
| return save(path, true); | |
| } | |
| /* | |
| * Load from file — auto-detects checkpoint vs inference-only. | |
| * For v6 (vanilla SGD), checkpoints and weights files are identical. | |
| * Reconfigures architecture to match the file. | |
| */ | |
| bool load_checkpoint(const char *path) | |
| { | |
| return load(path); | |
| } | |
| }; | |
| #endif /* DHT_NN_NETWORK_H */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment