Skip to content

Instantly share code, notes, and snippets.

@philipturner
Last active June 28, 2023 13:38
Show Gist options
  • Select an option

  • Save philipturner/2a4077561c3fd6c00a3cc63fefa7a679 to your computer and use it in GitHub Desktop.

Select an option

Save philipturner/2a4077561c3fd6c00a3cc63fefa7a679 to your computer and use it in GitHub Desktop.
__attribute__((__always_inline__))
inline static double _kmeans1d_cost(double* cumsum, double* cumsum2, int i, int j)
{
if (j < i)
return 0;
double mu = (cumsum[j + 1] - cumsum[i]) / (j - i + 1);
double result = cumsum2[j + 1] - cumsum2[i];
result += (j - i + 1) * (mu * mu);
result -= (2 * mu) * (cumsum[j + 1] - cumsum[i]);
return result;
}
__attribute__((__always_inline__))
inline static double _kmeans1d_lookup(double* D, double* cumsum, double* cumsum2, int i, int j)
{
const int i_minus_j_plus_1 = i - j + 1;
const int col = i_minus_j_plus_1 < 0 ? i : j - 1;
double result = (col >= 0 ? D[col] : 0);
if (i_minus_j_plus_1 < 1)
return result;
double temp = (cumsum[i + 1] - cumsum[j]);
double mu = temp / i_minus_j_plus_1;
double result_alt = result + cumsum2[i + 1] - cumsum2[j];
result_alt += i_minus_j_plus_1 * (mu * mu);
result_alt -= (2 * mu) * temp;
return result_alt;
}
__attribute__((__always_inline__))
inline static bool _kmeans1d_lookup2(double* D, double* cumsum, double* cumsum2, int i, int j1, int j2)
{
// Uses 2-wide SIMD or instruction-level parallelism.
const int i_minus_j1_plus_1 = i - j1 + 1;
const int i_minus_j2_plus_1 = i - j2 + 1;
const int col1 = i_minus_j1_plus_1 < 0 ? i : j1 - 1;
const int col2 = i_minus_j2_plus_1 < 0 ? i : j2 - 1;
double result1 = (col1 >= 0 ? D[col1] : 0);
double result2 = (col2 >= 0 ? D[col2] : 0);
double cumsum_i_1 = cumsum[i + 1];
double cumsum2_i_1 = cumsum2[i + 1];
double temp1 = (cumsum_i_1 - cumsum[j1]);
double temp2 = (cumsum_i_1 - cumsum[j2]);
double mu1 = temp1 / i_minus_j1_plus_1;
double mu2 = temp2 / i_minus_j2_plus_1;
double result1_alt = result1 + cumsum2_i_1 - cumsum2[j1];
double result2_alt = result2 + cumsum2_i_1 - cumsum2[j2];
result1_alt += i_minus_j1_plus_1 * (mu1 * mu1);
result2_alt += i_minus_j2_plus_1 * (mu2 * mu2);
result1_alt -= (2 * mu1) * temp1;
result2_alt -= (2 * mu2) * temp2;
result1 = (i_minus_j1_plus_1 < 1) ? result1 : result1_alt;
result2 = (i_minus_j2_plus_1 < 1) ? result2 : result2_alt;
return result1 >= result2;
}
static void _smawk(int row_start, int row_stride, int row_size, int* cols, int col_size, int* reserved, double* D, double* cumsum, double* cumsum2, int* result)
{
if (row_size == 0)
return;
int* _cols = cols + col_size;
int _col_size = 0;
int i;
for (i = 0; i < col_size; i++)
{
const int col = cols[i];
for (;;)
{
if (_col_size == 0)
break;
const int row = row_start + row_stride * (_col_size - 1);
if (_kmeans1d_lookup2(D, cumsum, cumsum2, row, col, _cols[_col_size - 1]))
break;
--_col_size;
}
if (_col_size < row_size)
{
_cols[_col_size] = col;
++_col_size;
}
}
_smawk(row_start + row_stride, row_stride * 2, row_size / 2, _cols, _col_size, reserved, D, cumsum, cumsum2, result);
// Build the reverse lookup table.
for (i = 0; i < _col_size; i++)
reserved[_cols[i]] = i;
int start = 0;
for (i = 0; i < row_size; i += 2) {
const int row = row_start + i * row_stride;
int stop = _col_size - 1;
if (i < row_size - 1)
{
const int argmin = result[row_start + (i + 1) * row_stride];
stop = reserved[argmin];
}
int argmin = _cols[start];
double min = _kmeans1d_lookup(D, cumsum, cumsum2, row, argmin);
int c;
for (c = start + 1; c <= stop; c++)
{
double value = _kmeans1d_lookup(D, cumsum, cumsum2, row, _cols[c]);
if (c == start || value < min) {
argmin = _cols[c];
min = value;
}
}
result[row] = argmin;
start = stop;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment