Last active
December 5, 2020 08:28
-
-
Save KubaO/2b2fb35c237e66d00705d925acc868c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// complete compileable example begins | |
#include <assert.h> | |
#include <math.h> | |
#include <stdarg.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/time.h> | |
#define TRACK_MEMORY_USE 1 | |
#define TRACK_LIFETIME 0 | |
#define TRACK_VIEWS 0 | |
#define TRACK_DUMPS 0 | |
#define TRACK_ALLOCS 1 | |
#define KERNEL_SIZE 16 // 16-32 is the sweet spot usually | |
#define TRANSPOSE_B 1 // enabling it improves things very slightly | |
#if defined(__GNUC__) || defined(__clang__) | |
#define _nodiscard_ __attribute__((warn_unused_result)) | |
#else | |
#define _nodiscard_ | |
#endif | |
enum TrackEvent { | |
TE_CREATE, // param = size of object | |
TE_USE, | |
TE_DESTROY, // param = size of object | |
TE_ISEMPTY, | |
TE_COUNT, | |
TE_ALLOC, | |
TE_MAX_COUNT, | |
TE_MAX_ALLOC, | |
}; | |
#if TRACK_MEMORY_USE || TRACK_LIFETIME | |
size_t obj_track(enum TrackEvent event, const void *obj, size_t param); | |
#else | |
size_t obj_track(enum TrackEvent event, const void *obj, size_t param) { return 1; } | |
#endif | |
#define mat_use_check(M) do { \ | |
/* A view of a view must still refer directly to the shown matrix. */ \ | |
assert(!M->shown || !M->shown->shown); \ | |
assert(obj_track(TE_USE, mat_shown(M), 0)); \ | |
} while (0) | |
struct Matrix { | |
struct Matrix *shown; // a matrix being viewed, if any | |
float *ptr; | |
int n; // rows and columns in this range | |
int row_stride; // distance between beginnings of rows (units: elements) | |
int16_t tmp_count; // number of active temporary uses of this object | |
} typedef Matrix; | |
int strassen_entry_count; | |
//! Returns the matrix being shown if M is a view, otherwise return M itself | |
inline const Matrix *mat_shown(const Matrix *M) { | |
return M->shown ? M->shown : M; | |
} | |
Matrix *mat_create(int n); | |
void free_all(Matrix **M1, ...); | |
void free_temp(Matrix *M); | |
void free_all_temp(Matrix *M1, ...); | |
static bool mat_same_layouts(const Matrix *A, const Matrix *B); | |
void mat_print(const Matrix *M); | |
Matrix *mat_transpose(Matrix *M); | |
Matrix mat_block_view(const Matrix *matrix, int i, int j, int nbl); | |
Matrix mat_linear_block_view(const Matrix *matrix, int i, int j, int nbl); | |
Matrix *mat_add_to(Matrix *C, Matrix *A); | |
Matrix *mat_sub_from(Matrix *C, Matrix *A); | |
Matrix *mat_sum_to(Matrix *C, Matrix *A, Matrix *B); | |
Matrix *mat_diff_to(Matrix *C, Matrix *A, Matrix *B); | |
// | |
// Multiplication | |
// | |
static void mat_mul_impl_(float *restrict C, const float *restrict A, const float *restrict B, | |
int C_row_stride, int A_row_stride, int B_row_stride); | |
Matrix *mat_strassen_mul_impl_(Matrix *C, Matrix *A, Matrix *B, const Matrix *T) { | |
++ strassen_entry_count; | |
mat_use_check(C); | |
mat_use_check(A); | |
mat_use_check(B); | |
if (T) mat_use_check(T); | |
int const N = C->n; | |
assert(N >= KERNEL_SIZE && N == A->n && N == B->n && (!T || N <= T->n)); | |
if (N == KERNEL_SIZE) { | |
mat_mul_impl_(C->ptr, A->ptr, B->ptr, C->row_stride, A->row_stride, B->row_stride); | |
} else { | |
Matrix A11 = mat_block_view(A, 0, 0, 2); | |
Matrix A12 = mat_block_view(A, 0, 1, 2); | |
Matrix A21 = mat_block_view(A, 1, 0, 2); | |
Matrix A22 = mat_block_view(A, 1, 1, 2); | |
Matrix B11 = mat_block_view(B, 0, 0, 2); | |
#if TRANSPOSE_B | |
Matrix B12 = mat_block_view(B, 1, 0, 2); // transposed | |
Matrix B21 = mat_block_view(B, 0, 1, 2); // transposed | |
#else | |
Matrix B12 = mat_block_view(B, 0, 1, 2); | |
Matrix B21 = mat_block_view(B, 1, 0, 2); | |
#endif | |
Matrix B22 = mat_block_view(B, 1, 1, 2); | |
Matrix C11 = mat_block_view(C, 0, 0, 2); | |
Matrix C12 = mat_block_view(C, 0, 1, 2); | |
Matrix C21 = mat_block_view(C, 1, 0, 2); | |
Matrix C22 = mat_block_view(C, 1, 1, 2); | |
// T1 == C12, T2 == C21, T3,T4,T5 : new | |
// C11 = (M7) = (A12-A22) * (B21+B22) // lease T3 | |
// C22 = (M6) = (A21-A11) * (B11+B12) // lease T3 | |
// T4 = (M1) = (A11+A22) * (B11+B22) // lease T3 | |
// C11 = M7 + M1 | |
// C22 = M6 + M1 | |
// C12 = (M5) = (A11+A12) * B22 // lease T3 | |
// C11 = M7 + M1 - M5 | |
// C21 = (M2) = (A21+A22) * B11 // lease T3 | |
// C22 = M6 + M1 - M2 | |
// T4 = (M3) = A11 * (B12-B22) // lease T3 | |
// C12 = M5 + M3 | |
// C22 = M6 + M1 - M2 + M3 | |
// T4 = (M4) = A22 * (B21-B11) // lease T3 | |
// C11 = M7 + M1 - M5 + M4 | |
// C21 = M2 + M4 | |
Matrix T3_, T4_, T5_, *T3 = NULL, *T4 = NULL, *T5 = NULL; | |
if (T) { | |
T3_ = mat_linear_block_view(T, 0, 0, 2); | |
T4_ = mat_linear_block_view(T, 0, 1, 2); | |
T5_ = mat_linear_block_view(T, 1, 0, 2); | |
T3 = &T3_; | |
T4 = &T4_; | |
T5 = &T5_; | |
} else { | |
T3 = mat_create(A11.n); | |
T4 = mat_create(A11.n); | |
T5 = mat_create(A11.n); | |
} | |
{ | |
Matrix *M1 = &C12; | |
/*M7*/ mat_strassen_mul_impl_(&C11, mat_diff_to(T4, &A12, &A22), mat_sum_to(T5, &B21, &B22), T3); | |
/*M6*/ mat_strassen_mul_impl_(&C22, mat_diff_to(T4, &A21, &A11), mat_sum_to(T5, &B11, &B12), T3); | |
/*M1*/ mat_strassen_mul_impl_(M1, mat_sum_to(T4, &A11, &A22), mat_sum_to(T5, &B11, &B22), T3); | |
mat_add_to(&C11, M1); | |
mat_add_to(&C22, M1); | |
} | |
{ | |
Matrix *M5 = mat_strassen_mul_impl_(&C12, mat_sum_to(T5, &A11, &A12), &B22, T3); | |
mat_sub_from(&C11, M5); | |
Matrix *M2 = mat_strassen_mul_impl_(&C21, mat_sum_to(T5, &A21, &A22), &B11, T3); | |
mat_sub_from(&C22, M2); | |
} | |
{ | |
Matrix *M3 = mat_strassen_mul_impl_(T4, &A11, mat_diff_to(T5, &B12, &B22), T3); | |
mat_add_to(&C12, M3); | |
mat_add_to(&C22, M3); | |
} | |
{ | |
Matrix *M4 = mat_strassen_mul_impl_(T4, &A22, mat_diff_to(T5, &B21, &B11), T3); | |
mat_add_to(&C11, M4); | |
mat_add_to(&C21, M4); | |
} | |
free_all(&T3, &T4, &T5, NULL); | |
} | |
free_all_temp(A, B, T, NULL); | |
return C; | |
} | |
static void unpack_row_major(float *const restrict B, const float *const restrict A, int const Ars); | |
static void unpack_col_major(float *const restrict B, const float *const restrict A, int const Ars); | |
#if 0 | |
static void pack_row_major(float *const restrict B, const float *const restrict A, int const Brs); | |
static void pack_col_major(float *const restrict B, const float *const restrict A, int const Brs); | |
#endif | |
static void mat_mul_impl_(float *restrict C, const float *restrict A, const float *restrict B, | |
int C_row_stride, int A_row_stride, int B_row_stride) | |
{ | |
enum { N = KERNEL_SIZE }; | |
float AA[N*N], BB[N*N]; | |
unpack_row_major(AA, A, A_row_stride); | |
if (TRANSPOSE_B) | |
unpack_row_major(BB, B, B_row_stride); | |
else | |
unpack_col_major(BB, B, B_row_stride); | |
for (int i = 0; i < N; ++i) { | |
for (int j = 0; j < N; ++j) {//00 | |
float accum = 0; | |
for (int k = 0; k < N; ++k) { | |
accum += AA[i*N+k] * BB[j*N+k]; | |
} | |
C[i*C_row_stride+j] = accum; | |
} | |
} | |
} | |
static void unpack_row_major(float *const restrict B, const float *const restrict A, int const Ars) | |
{ | |
const int N = KERNEL_SIZE; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
B[i*N+j] = A[i*Ars+j]; | |
} | |
static void unpack_col_major(float *const restrict B, const float *const restrict A, int const Ars) | |
{ | |
const int N = KERNEL_SIZE; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
B[i*N+j] = A[j*Ars+i]; | |
} | |
#if 0 | |
static void pack_row_major(float *const restrict B, const float *const restrict A, int const Brs) | |
{ | |
const int N = KERNEL_SIZE; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
B[i*Brs+j] = A[i*N+j]; | |
} | |
static void pack_col_major(float *const restrict B, const float *const restrict A, int const Brs) | |
{ | |
const int N = KERNEL_SIZE; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
B[j*Brs+i] = A[i*N+j]; | |
} | |
#endif | |
Matrix *mat_strassen_mul_to(Matrix *C, Matrix *A, Matrix *B) { | |
mat_use_check(C); | |
mat_use_check(A); | |
mat_use_check(B); | |
assert(C->n == A->n && C->n == B->n); | |
assert(C->n >= KERNEL_SIZE); | |
if (TRANSPOSE_B) | |
mat_transpose(B); | |
if (C->n <= 64) { | |
printf("A\n"); | |
mat_print(A); | |
printf("B\n"); | |
mat_print(B); | |
} | |
mat_strassen_mul_impl_(C, A, B, NULL); | |
if (C->n <= 64) { | |
printf("C\n"); | |
mat_print(C); | |
} | |
if (TRANSPOSE_B) | |
mat_transpose(B); | |
return C; | |
} | |
_nodiscard_ Matrix *mat_strassen_mul(Matrix *A, Matrix *B) { | |
mat_use_check(A); | |
mat_use_check(B); | |
Matrix *C = mat_create(A->n); | |
mat_strassen_mul_to(C, A, B); | |
return C; | |
} | |
// | |
// Addition/subtraction | |
// | |
Matrix *mat_sum_to(Matrix *C, Matrix *A, Matrix *B) { | |
mat_use_check(C); | |
mat_use_check(A); | |
mat_use_check(B); | |
assert(C->n == A->n && C->n == B->n); | |
float *restrict c = C->ptr, * restrict a = A->ptr, * restrict b = B->ptr; | |
int const N = A->n; | |
int const Ars = A->row_stride, Brs = B->row_stride, Crs = C->row_stride; | |
for (int i = 0; i < N; ++i) { | |
for (int j = 0; j < N; ++j) | |
c[i*Crs+j] = a[i*Ars+j] + b[i*Brs+j]; | |
} | |
free_all_temp(A, B, NULL); | |
return C; | |
} | |
_nodiscard_ Matrix *mat_sum(Matrix *A, Matrix *B) { | |
return mat_sum_to(mat_create(A->n), A, B); | |
} | |
Matrix *mat_add_to(Matrix *C, Matrix *B) { | |
mat_use_check(C); | |
mat_use_check(B); | |
assert(C->n == B->n); | |
float *restrict c = C->ptr, *restrict b = B->ptr; | |
int const N = C->n; | |
int const Brs = B->row_stride, Crs = C->row_stride; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
c[i*Crs+j] += b[i*Brs+j]; | |
free_temp(B); | |
return C; | |
} | |
Matrix *mat_diff_to(Matrix *C, Matrix *A, Matrix *B) { | |
mat_use_check(C); | |
mat_use_check(A); | |
mat_use_check(B); | |
assert(C->n == A->n && C->n == B->n); | |
int const N = A->n, Ars = A->row_stride, Brs = B->row_stride, Crs = C->row_stride; | |
float *restrict c = C->ptr, *restrict a = A->ptr, *restrict b = B->ptr; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
c[i*Crs+j] = a[i*Ars+j] - b[i*Brs+j]; | |
free_all_temp(A, B, NULL); | |
return C; | |
} | |
_nodiscard_ Matrix *mat_diff(Matrix *A, Matrix *B) { | |
return mat_diff_to(mat_create(A->n), A, B); | |
} | |
Matrix *mat_sub_from(Matrix *C, Matrix *B) { | |
mat_use_check(C); | |
mat_use_check(B); | |
assert(C->n == B->n); | |
float *restrict c = C->ptr, *restrict b = B->ptr; | |
int const N = C->n, Brs = B->row_stride, Crs = C->row_stride; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
c[i*Crs+j] -= b[i*Brs+j]; | |
free_temp(B); | |
return C; | |
} | |
// | |
// Misc Value Setting | |
// | |
_nodiscard_ size_t mat_num_bytes(const Matrix *A) { | |
mat_use_check(A); | |
return A ? sizeof(*A) + sizeof(float) * A->n * A->row_stride : 0; | |
} | |
Matrix *mat_zero(Matrix *M) { | |
mat_use_check(M); | |
int const N = M->n; | |
float *restrict m = M->ptr; | |
for (int i = 0; i < N; ++i) { | |
memset(m, 0, sizeof(float) * N); | |
m += M->row_stride; | |
} | |
return M; | |
} | |
_nodiscard_ Matrix *mat_zeroed(int n) { return mat_zero(mat_create(n)); } | |
Matrix *mat_randomize(Matrix *M) { | |
mat_use_check(M); | |
float *restrict m = M->ptr; | |
const int N = M->n, Mrs = M->row_stride; | |
for (int i = 0; i < N; ++i) { | |
for (int j = 0; j < N; ++j) | |
m[i*Mrs+j] = -1. + 2.*((float)rand())/RAND_MAX; | |
} | |
return M; | |
} | |
_nodiscard_ Matrix *mat_randomized(int n) { return mat_randomize(mat_create(n)); } | |
Matrix *mat_row_seq(Matrix *M) { | |
mat_use_check(M); | |
mat_zero(M); | |
float *restrict m = M->ptr; | |
const int N = M->n; | |
for (int i = 0; i < N; ++i) | |
m[i] = i; | |
return M; | |
} | |
Matrix *mat_col_seq(Matrix *M) { | |
mat_use_check(M); | |
mat_zero(M); | |
float *restrict m = M->ptr; | |
const int N = M->n, Mrs = M->row_stride; | |
for (int i = 0; i < N; ++i) | |
m[i*Mrs] = i; | |
return M; | |
} | |
Matrix *mat_transpose(Matrix *M) { | |
mat_use_check(M); | |
const int N = M->n, Mrs = M->row_stride; | |
float *const restrict m = M->ptr; | |
for (int i = 0; i < N; ++i) { | |
for (int j = i+1; j < N; ++j) { | |
float a = m[i*Mrs+j]; | |
m[i*Mrs+j] = m[j*Mrs+i]; | |
m[j*Mrs+i] = a; | |
} | |
} | |
return M; | |
} | |
Matrix *mat_copy_to(Matrix *M, Matrix *A) { | |
mat_use_check(M); | |
mat_use_check(A); | |
assert(M->n == A->n); | |
if (mat_same_layouts(M, A)) { | |
memcpy(M->ptr, A->ptr, mat_num_bytes(M)); | |
} else { | |
float *restrict m = M->ptr, *restrict a = A->ptr; | |
int const N = M->n, Ars = A->row_stride, Mrs = M->row_stride; | |
for (int i = 0; i < N; ++i) | |
for (int j = 0; j < N; ++j) | |
m[i*Mrs+j] = a[i*Ars+j]; | |
} | |
free_temp(A); | |
return M; | |
} | |
// | |
// Matrix Creation/Destruction | |
// | |
//! A modifier used to pass a temporary matrix as a matrix argument - the | |
//! called function will treat this matrix as a temporary one and free it when | |
//! it returns. | |
Matrix *temp(Matrix *M) { | |
mat_use_check(M); | |
assert(M->tmp_count >= 0); | |
M->tmp_count ++; | |
return M; | |
} | |
inline size_t mat_alloc_size(const int n) { | |
return sizeof(Matrix) + sizeof(float) * n * n; | |
} | |
__attribute__((noreturn)) static void out_of_memory(void) { | |
fprintf(stderr, "Out of memory\n"); | |
fflush(stderr); | |
abort(); | |
} | |
_nodiscard_ Matrix *mat_create(int const n) { | |
size_t const bytes = mat_alloc_size(n); | |
Matrix *const M = malloc(bytes); | |
if (TRACK_ALLOCS) { | |
printf("Allocating (%ld) %dx%d %p\n", obj_track(TE_COUNT, NULL, 0), n, n, M); | |
fflush(stdout); | |
} | |
if (!M) out_of_memory(); | |
bool ok = obj_track(TE_CREATE, M, bytes); | |
assert(ok); | |
M->shown = NULL; | |
M->ptr = (void*)(M+1); | |
M->n = n; | |
M->row_stride = n; | |
M->tmp_count = 0; | |
return M; | |
} | |
void mat_free(Matrix **M) { | |
Matrix *mp = *M; | |
if (!mp || mp->shown) return; | |
mat_use_check(mp); | |
const size_t bytes = mat_alloc_size(mat_shown(mp)->n); | |
if (TRACK_ALLOCS) { | |
printf("Freeing %s (%ld) %dx%d %p\n", | |
mp->shown ? "View" : "Matrix", obj_track(TE_COUNT, NULL, 0), | |
mp->n, mp->n, mp); | |
fflush(stdout); | |
} | |
free(mp); | |
bool ok = obj_track(TE_DESTROY, mp, bytes); | |
assert(ok); | |
*M = mp = NULL; | |
} | |
void free_all(Matrix **M, ...) { | |
va_list args; | |
va_start(args, M); | |
while (M) { | |
mat_free(M); | |
M = va_arg(args, Matrix **); | |
} | |
va_end(args); | |
} | |
void free_temp(Matrix *M) { | |
if (!M) return; | |
if (!M->tmp_count) return; | |
assert(M->tmp_count > 0); | |
if (!--M->tmp_count) | |
mat_free(&M); | |
} | |
void free_all_temp(Matrix *M, ...) { | |
va_list args; | |
va_start(args, M); | |
while(M) { | |
free_temp(M); | |
M = va_arg(args, Matrix *); | |
} | |
va_end(args); | |
} | |
// | |
// Matrix Query and Output | |
// | |
static _nodiscard_ bool mat_same_layouts(const Matrix *A, const Matrix *B) { | |
mat_use_check(A); | |
mat_use_check(B); | |
return A->n == B->n && A->row_stride == B->row_stride; | |
} | |
void mat_print(const Matrix *M) { | |
mat_use_check(M); | |
float *m = M->ptr; | |
for (int i = 0; i < M->n; ++i) { | |
for (int j = 0; j < M->n; ++j) printf("%.0f ", m[j]); | |
printf("\n"); | |
m += M->row_stride; | |
} | |
fflush(stdout); | |
} | |
void mat_dump(const Matrix *M) { | |
mat_use_check(M); | |
if (!TRACK_DUMPS) return; | |
if (!M) { | |
printf("Null\n"); | |
} else { | |
const char *kind = !M->shown ? "Matrix" : "View"; | |
printf("%s %dx%d <->%d %p", kind, M->n, M->n, M->row_stride, M->ptr); | |
if (M->shown) printf(" ..%p", M->shown->ptr); | |
printf("\n"); | |
} | |
fflush(stdout); | |
} | |
// | |
// Views of a Matrix | |
// | |
static void track_view(const Matrix *V, const char *kind) { | |
if (TRACK_VIEWS) { | |
printf("New %s %dx%d <->%d %p\n", kind, V->n, V->n, V->row_stride, V->ptr); | |
fflush(stdout); | |
} | |
} | |
//! Returns a sub-block *view* of a given matrix. The block's index is i,j (0-based), | |
//! out of an nxn square of blocks. Thew view is by-value and is meant to be | |
//! kept by value. It doesn't allocate. | |
_nodiscard_ Matrix mat_block_view(const Matrix *M, int i, int j, int nbl) { | |
mat_use_check(M); | |
const Matrix *shown = mat_shown(M); | |
Matrix view = { .shown = (Matrix*)shown }; | |
view.n = M->n / nbl; | |
view.row_stride = M->row_stride; | |
view.ptr = M->ptr + ((size_t)i * view.n * view.row_stride) + ((size_t)j * view.n); | |
track_view(&view, "View"); | |
return view; | |
} | |
//! Returns a sub-block linearized view of a given matrix, i.e. the sub-blocks | |
//! have the smallest possible row_stride. Useful for cache locality when reusing | |
//! temporary allocations. The source matrix must be contiguous, i.e. it can't be | |
//! a mat_block_view. | |
_nodiscard_ Matrix mat_linear_block_view(const Matrix *M, int i, int j, int nbl) { | |
mat_use_check(M); | |
assert(M->row_stride == M->n); | |
const Matrix *shown = mat_shown(M); | |
Matrix view = { .shown = (Matrix*)shown }; | |
view.n = M->n / nbl; | |
view.row_stride = view.n; | |
view.ptr = M->ptr + ((size_t)i * nbl + (size_t)j) * view.n * view.n; | |
track_view(&view, "Linear View"); | |
return view; | |
} | |
// | |
// Example/Test | |
// | |
typedef struct timeval timeval; | |
_nodiscard_ timeval get_time(void) { | |
timeval result; | |
gettimeofday(&result, 0); | |
return result; | |
} | |
_nodiscard_ double time_delta(const timeval *start, const timeval *end) { | |
double const t1 = start->tv_sec + start->tv_usec/1e6; | |
double const t2 = end->tv_sec + end->tv_usec/1e6; | |
return t2-t1; | |
} | |
int main() | |
{ | |
size_t const N = 2048; | |
#if 1 | |
Matrix *A = mat_randomize(mat_create(N)); | |
Matrix *B = mat_randomize(mat_create(N)); | |
#else | |
Matrix *A = mat_row_seq(mat_create(N)); | |
Matrix *B = mat_col_seq(mat_create(N)); | |
#endif | |
Matrix *C = mat_create(N); | |
printf("Memory used for A,B,C matrices: %lu\n", obj_track(TE_ALLOC, NULL, 0)); | |
timeval start = get_time(); | |
mat_strassen_mul_to(C, A, B); | |
timeval end = get_time(); | |
free_all(&C, &B, &A, NULL); | |
printf("Number of entries to Strassen multiplication: %d\n", strassen_entry_count); | |
printf("Total wall time: %gs\n", time_delta(&start, &end)); | |
printf("Maximum allocated matrices/memory: %lu / %lu\n", | |
obj_track(TE_MAX_COUNT, NULL, 0), obj_track(TE_MAX_ALLOC, NULL, 0)); | |
printf("Matrices left: %lu\n", obj_track(TE_COUNT, NULL, 0)); | |
assert(obj_track(TE_ISEMPTY, NULL, 0)); | |
} | |
// | |
// Diagnostic Object Tracking | |
// | |
#if TRACK_MEMORY_USE || TRACK_LIFETIME | |
struct { | |
const void *ptr; | |
} typedef ObjEntry; | |
struct { | |
ObjEntry *objects; | |
size_t capacity; | |
size_t count; | |
size_t alloc; | |
size_t max_alloc; | |
size_t max_count; | |
bool is_sorted; | |
} typedef ObjState; | |
bool obj_count(const void *obj, size_t size, ObjState *ost); | |
bool obj_uncount(const void *obj, size_t size, ObjState *ost); | |
bool obj_push_back(const void *obj, size_t size, ObjState *ost); | |
bool obj_find(const void *obj, ObjState *ost); | |
bool obj_remove(const void *obj, size_t size, ObjState *ost); | |
size_t obj_track(enum TrackEvent event, const void *obj, size_t param) { | |
static ObjState ost; | |
switch (event) { | |
#if TRACK_MEMORY_USE && !TRACK_LIFETIME | |
case TE_CREATE: | |
return obj_count(obj, param, &ost); | |
case TE_DESTROY: | |
return obj_uncount(obj, param, &ost); | |
case TE_USE: | |
return !!obj; | |
#else | |
case TE_CREATE: | |
return !obj_find(obj, &ost) && obj_push_back(obj, param, &ost); | |
case TE_USE: | |
return obj_find(obj, &ost); | |
case TE_DESTROY: | |
return obj_remove(obj, param, &ost); | |
#endif | |
case TE_ISEMPTY: | |
return !ost.count; | |
case TE_COUNT: | |
return ost.count; | |
case TE_ALLOC: | |
return ost.alloc; | |
case TE_MAX_COUNT: | |
return ost.max_count; | |
case TE_MAX_ALLOC: | |
return ost.max_alloc; | |
default: | |
return false; | |
} | |
} | |
bool obj_count(const void *obj, size_t size, ObjState *ost) { | |
if (!obj || !size) return false; | |
++ost->count; | |
ost->alloc += size; | |
if (ost->count > ost->max_count) ost->max_count = ost->count; | |
if (ost->alloc > ost->max_alloc) ost->max_alloc = ost->alloc; | |
return true; | |
} | |
bool obj_uncount(const void *obj, size_t size, ObjState *ost) { | |
if (!obj || !size) return false; | |
--ost->count; | |
ost->alloc -= size; | |
return true; | |
} | |
bool obj_push_back(const void *obj, size_t size, ObjState *ost) { | |
if (!ost->capacity) { | |
ost->capacity = 32; | |
ost->objects = malloc(sizeof(ObjEntry) * ost->capacity); | |
} | |
else if (ost->capacity == ost->count) { | |
ost->capacity *= 2; | |
ost->objects = realloc(ost->objects, sizeof(ObjEntry) * ost->capacity); | |
if (!ost->objects) out_of_memory(); | |
} | |
if (!obj_count(obj, size, ost)) | |
return false; | |
ost->objects[ost->count-1] = (ObjEntry){ .ptr = obj }; | |
if (ost->count == 1) { | |
ost->is_sorted = true; | |
} else { | |
ObjEntry *second_to_last = &(ost->objects[ost->count - 2]); | |
ost->is_sorted = ost->is_sorted && obj > second_to_last->ptr; | |
} | |
return true; | |
} | |
static int ptr_comp(const void *a, const void *b) { | |
const ObjEntry *obj1 = a; | |
const ObjEntry *obj2 = b; | |
ssize_t diff = obj1->ptr - obj2->ptr; | |
return diff < 0 ? - 1 : diff > 0 ? 1 : 0; | |
} | |
int obj_lookup(const void *obj, ObjState *ost) { | |
if (!ost->is_sorted) { | |
qsort(ost->objects, ost->count, sizeof(ObjEntry), ptr_comp); | |
ost->is_sorted = true; | |
} | |
if (ost->count > 1) { | |
// Sanity check: the first two objects must be sorted, at least. | |
assert(ost->objects[0].ptr < ost->objects[1].ptr); | |
} | |
const ObjEntry *found = | |
bsearch(&obj, ost->objects, ost->count, sizeof(ObjEntry), ptr_comp); | |
return (!found) ? -1 : (found - ost->objects); | |
} | |
bool obj_find(const void *obj, ObjState *ost) { return obj_lookup(obj, ost) >= 0; } | |
bool obj_erase(int pos, size_t size, ObjState *ost) { | |
assert(pos >= -1 && pos < ost->count); | |
if (pos == -1) return false; | |
if (!obj_uncount(ost->objects[pos].ptr, size, ost)) | |
return false; | |
if (pos < (ost->count)) | |
memmove(ost->objects + pos, ost->objects + pos + 1, | |
sizeof(ObjEntry) * (ost->count - pos)); | |
return true; | |
} | |
bool obj_remove(const void *obj, size_t size, ObjState *ost) { | |
int index = obj_lookup(obj, ost); | |
return obj_erase(index, size, ost); | |
} | |
#endif // TRACK_MEMORY_USE || TRACK_LIFETIME | |
// complete compileable example ends |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment