Skip to content

Instantly share code, notes, and snippets.

@leok7v
Last active October 7, 2024 05:14
Show Gist options
  • Save leok7v/aaa612e795fb3debe0ee19420761e581 to your computer and use it in GitHub Desktop.
Save leok7v/aaa612e795fb3debe0ee19420761e581 to your computer and use it in GitHub Desktop.
Fenwick tree implementation
#ifndef ft_header_included
#define ft_header_included
// Copyright (c) 2024, Leo Kuznetsov
// This code and the accompanying materials are made available under the
// terms of the BSD-3 license, which accompanies this distribution. The full
// text of the license may be found at https://opensource.org/license/bsd-3-clause
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
// https://en.wikipedia.org/wiki/Fenwick_tree
#define ft_max_bits 31
static_assert(ft_max_bits <= 31, "arrays are indexed by int32_t");
static inline int32_t ft_lsb(int32_t i) { // least significant bit only
assert(0 < i && i < (1uLL << ft_max_bits)); // 0 will lead to endless loop
return i & (~i + 1); // (i & -i)
}
static void ft_update(uint64_t tree[], size_t n, int32_t i, uint64_t inc) {
assert(2 <= n && n <= (1u << ft_max_bits));
const int32_t m = (int32_t)n;
assert(0 <= i && i < m);
i++; // 1-based indexing
assert(i < m + 1);
while (i < m + 1) {
assert(tree[i - 1] <= UINT64_MAX - inc);
tree[i - 1] += inc;
i += ft_lsb(i); // Move to the sibling
}
}
static void ft_init(uint64_t tree[], size_t n, uint64_t a[]) {
assert(2 <= n && n <= (1u << ft_max_bits));
const int32_t m = (int32_t)n;
for (int32_t i = 0; i < m; i++) { tree[i] = a[i]; }
for (int32_t i = 1; i <= m; i++) {
int32_t parent = i + ft_lsb(i);
if (parent <= m) {
assert(tree[parent - 1] < UINT64_MAX - tree[i - 1]);
tree[parent - 1] += tree[i - 1];
}
}
}
static uint64_t ft_query(const uint64_t tree[], size_t n, int32_t i) {
// ft_query() cumulative sum of all a[j] for j < i
assert(2 <= n && n <= (1u << ft_max_bits));
if (i == -1) { return 0; } // index can be -1
i++; // 1-based indexing
uint64_t sum = 0;
while (i > 0) {
if (i <= (int32_t)n) { // grandparent can be in tree when parent is not
sum += tree[i - 1];
}
i -= ft_lsb(i); // Clear lsb - move to the parent.
}
return sum;
}
static int32_t ft_index_of(uint64_t tree[], size_t n, uint64_t const sum) {
// returns index 'i' of an element such that sum of all a[j] for j < i
// is less of equal to sum.
// returns -1 if sum is less than any element of a[]
assert(2 <= n && n <= (1u << ft_max_bits));
assert((n & (n - 1)) == 0); // only works for power 2
if (sum >= tree[n - 1]) { return (int32_t)(n - 1); }
uint64_t value = sum;
uint32_t i = 0;
uint32_t mask = (uint32_t)(n >> 1);
while (mask != 0) {
uint32_t t = i + mask;
if (t <= n && value >= tree[t - 1]) {
i = t;
value -= tree[t - 1];
}
mask >>= 1;
}
return (i == 0 && value < sum) ? -1 : (int32_t)(i - 1);
}
// Why ft_max_bits is 31 and int32_t is used for array indexing?
// (for 16-bit ft_max_bits may be easily replaced by 15 or 16)
// Older Microsoft C89 compiler cl.exe for x86 used 32-bit signed
// int to index arrays (it is possible to overcome that limitation
// by using *(pointer + size_t) instead if absolutely necessary but
// it will make code a bit uglier.
#endif
#ifndef ft_test_header_included
#define ft_test_header_included
// Copyright (c) 2024, Leo Kuznetsov
// This code and the accompanying materials are made available under the terms
// of BSD-3 license, which accompanies this distribution. The full text of the
// license may be found at https://opensource.org/license/bsd-3-clause
#include "ft.h"
#include <stdio.h>
#ifndef countof
#define countof(a) (sizeof(a) / sizeof((a)[0]))
#endif
#define ft_max_test_bits 10 // because ft_test() is exponentially expensive
#define ft_max_test_n (1U << ft_max_test_bits)
static uint64_t sum_of(uint64_t* a, int i, int j) { // [i..j] inclusive freq[j]
uint64_t sum = 0;
for (int32_t k = i; k <= j; k++) { sum += a[k]; }
return sum;
}
static void ft_test(uint64_t tree[], size_t n, uint64_t* a, bool verbose) {
assert(2 <= n && n <= (1u << ft_max_bits));
const int32_t m = (int32_t)n;
ft_init(tree, n, a);
const uint64_t total = tree[n - 1];
assert(ft_query(tree, n, -1) == 0);
assert(sum_of(a, 0, -1) == 0);
for (int32_t i = 0; i < m; i++) {
if (verbose) { printf("sum_of[0,%2d]: %3lld\n", i, sum_of(a, 0, i)); }
assert(sum_of(a, 0, i) == ft_query(tree, n, i));
for (int32_t j = i + 1; j < m + 1; j++) {
assert(sum_of(a, i, j - 1) == ft_query(tree, n, j - 1) - ft_query(tree, n, i - 1));
}
}
for (uint64_t sum = 0; sum <= total; sum++) {
int32_t i = ft_index_of(tree, n, sum);
if (verbose) {
printf("sum: %3lld: sum_of[0,%2d]: %3lld ft_query(%2d): %3lld\n",
sum, i, sum_of(a, 0, i), i, ft_query(tree, n, i));
}
assert(ft_query(tree, n, i) <= sum);
if (i + 1 < (int32_t)m) {
assert(sum < ft_query(tree, n, i + 1));
}
}
if (verbose) { printf("%lld total: %lld\n", (int64_t)n, total); }
}
static uint64_t a[ft_max_test_n];
static uint64_t tree[ft_max_test_n];
static void ft_tests(bool verbose) {
static_assert(ft_max_test_bits <= ft_max_bits, "");
static_assert(ft_max_test_bits <= 12, "ft_max_bits <= 12");
for (int8_t pass = 0; pass < 2; pass++) {
for (uint32_t i = 0; i < countof(a); i++) { a[i] = i + pass; }
const size_t n = 2;
ft_init(tree, n, a);
assert(ft_index_of(tree, n, tree[n - 1] + 1) == (int32_t)(n - 1));
assert(ft_index_of(tree, n, UINT64_MAX) == (int32_t)(n - 1));
// ft_index_of() only returns -1 if all a[i] > 0
uint32_t zeros = 0;
for (uint32_t i = 0; i < countof(a); i++) { zeros += (a[i] == 0); }
if (zeros == 0) {
assert(ft_index_of(tree, n, 0) == -1);
} else {
assert(ft_index_of(tree, n, 0) == 0);
}
}
for (int8_t bits = 1; bits <= ft_max_test_bits; bits++) {
const size_t n = (size_t)(1uLL << bits);
ft_test(tree, n, a, verbose);
}
{
const size_t n = 4;
ft_test(tree, n, a, true);
}
}
#endif
#include "ft_test.h"
#include <stdbool.h>
#include <string.h>
int main(int argc, const char* argv[]) {
bool verbose = false;
for (int i = 1; i < argc && verbose; i++) {
verbose = strcmp(argv[i], "-v") == 0 ||
strcmp(argv[i], "--verbose") == 0;
}
ft_tests(verbose);
return 0;
}
BSD 3-Clause License
Copyright (c) 2024, Leo Kuznetsov
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Copyright (c) 2024, Leo Kuznetsov

References

https://en.wikipedia.org/wiki/Fenwick_tree

Code style:

There is K&R C, ANSI C, C89, C99, C11, C18, C2x and there is "C as I like it".

There is language itself, preprocessor, ancient runtime, several bickering standardization committees corporate and open source flavors and plethora of coding styles (e.g. MISRA C).

I have chosen to stick to the "C as I like it" style because it pleases my eye as long as it does not offend wider audience too much. I am aware that it is not the most popular style. I listen to critics and I am willing to improve the code to make it more readable and more portable.

  • '''if (foo) { bar(); }''' without putting the compound block at a l ine of its own might be considered less readable.
  • using enum {} for constants is at mercy of compiler int type bit width but at least it does not leak elements names into global scope as #define does.
  • The code is not always suitable for 16 and 8 bit architectures some tweaks may be required.
  • #define CONSTANTS_IN_SHOUTING_ALL_CAPS is a great tradition but not a requirement. As a person, who has first hand experience with FORTRAN in 1976, I beg to be excused for ALL_CAPS 'readability' requirements.
  • #define null ((void*)0) personally invented this decades before C++ nullptr was even discussed. It's mine and I will stick to it.
  • IMHO countof() should be part of the language and yes do pay attention to the fact that it should not be used on arrays decaying to pointers.
  • assert(bool, "printf formatted message", argv) is super easy to implement and super useful in debugging. If you don't like it #define assert(b, ...) assert(b) as a good compromise.
  • swear(bool, "printf formatted message", argv) is release mode fail fast fatal assertion.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment