Last active
June 28, 2023 13:38
-
-
Save philipturner/2a4077561c3fd6c00a3cc63fefa7a679 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __attribute__((__always_inline__)) | |
| inline static double _kmeans1d_cost(double* cumsum, double* cumsum2, int i, int j) | |
| { | |
| if (j < i) | |
| return 0; | |
| double mu = (cumsum[j + 1] - cumsum[i]) / (j - i + 1); | |
| double result = cumsum2[j + 1] - cumsum2[i]; | |
| result += (j - i + 1) * (mu * mu); | |
| result -= (2 * mu) * (cumsum[j + 1] - cumsum[i]); | |
| return result; | |
| } | |
| __attribute__((__always_inline__)) | |
| inline static double _kmeans1d_lookup(double* D, double* cumsum, double* cumsum2, int i, int j) | |
| { | |
| const int i_minus_j_plus_1 = i - j + 1; | |
| const int col = i_minus_j_plus_1 < 0 ? i : j - 1; | |
| double result = (col >= 0 ? D[col] : 0); | |
| if (i_minus_j_plus_1 < 1) | |
| return result; | |
| double temp = (cumsum[i + 1] - cumsum[j]); | |
| double mu = temp / i_minus_j_plus_1; | |
| double result_alt = result + cumsum2[i + 1] - cumsum2[j]; | |
| result_alt += i_minus_j_plus_1 * (mu * mu); | |
| result_alt -= (2 * mu) * temp; | |
| return result_alt; | |
| } | |
| __attribute__((__always_inline__)) | |
| inline static bool _kmeans1d_lookup2(double* D, double* cumsum, double* cumsum2, int i, int j1, int j2) | |
| { | |
| // Uses 2-wide SIMD or instruction-level parallelism. | |
| const int i_minus_j1_plus_1 = i - j1 + 1; | |
| const int i_minus_j2_plus_1 = i - j2 + 1; | |
| const int col1 = i_minus_j1_plus_1 < 0 ? i : j1 - 1; | |
| const int col2 = i_minus_j2_plus_1 < 0 ? i : j2 - 1; | |
| double result1 = (col1 >= 0 ? D[col1] : 0); | |
| double result2 = (col2 >= 0 ? D[col2] : 0); | |
| double cumsum_i_1 = cumsum[i + 1]; | |
| double cumsum2_i_1 = cumsum2[i + 1]; | |
| double temp1 = (cumsum_i_1 - cumsum[j1]); | |
| double temp2 = (cumsum_i_1 - cumsum[j2]); | |
| double mu1 = temp1 / i_minus_j1_plus_1; | |
| double mu2 = temp2 / i_minus_j2_plus_1; | |
| double result1_alt = result1 + cumsum2_i_1 - cumsum2[j1]; | |
| double result2_alt = result2 + cumsum2_i_1 - cumsum2[j2]; | |
| result1_alt += i_minus_j1_plus_1 * (mu1 * mu1); | |
| result2_alt += i_minus_j2_plus_1 * (mu2 * mu2); | |
| result1_alt -= (2 * mu1) * temp1; | |
| result2_alt -= (2 * mu2) * temp2; | |
| result1 = (i_minus_j1_plus_1 < 1) ? result1 : result1_alt; | |
| result2 = (i_minus_j2_plus_1 < 1) ? result2 : result2_alt; | |
| return result1 >= result2; | |
| } | |
| static void _smawk(int row_start, int row_stride, int row_size, int* cols, int col_size, int* reserved, double* D, double* cumsum, double* cumsum2, int* result) | |
| { | |
| if (row_size == 0) | |
| return; | |
| int* _cols = cols + col_size; | |
| int _col_size = 0; | |
| int i; | |
| for (i = 0; i < col_size; i++) | |
| { | |
| const int col = cols[i]; | |
| for (;;) | |
| { | |
| if (_col_size == 0) | |
| break; | |
| const int row = row_start + row_stride * (_col_size - 1); | |
| if (_kmeans1d_lookup2(D, cumsum, cumsum2, row, col, _cols[_col_size - 1])) | |
| break; | |
| --_col_size; | |
| } | |
| if (_col_size < row_size) | |
| { | |
| _cols[_col_size] = col; | |
| ++_col_size; | |
| } | |
| } | |
| _smawk(row_start + row_stride, row_stride * 2, row_size / 2, _cols, _col_size, reserved, D, cumsum, cumsum2, result); | |
| // Build the reverse lookup table. | |
| for (i = 0; i < _col_size; i++) | |
| reserved[_cols[i]] = i; | |
| int start = 0; | |
| for (i = 0; i < row_size; i += 2) { | |
| const int row = row_start + i * row_stride; | |
| int stop = _col_size - 1; | |
| if (i < row_size - 1) | |
| { | |
| const int argmin = result[row_start + (i + 1) * row_stride]; | |
| stop = reserved[argmin]; | |
| } | |
| int argmin = _cols[start]; | |
| double min = _kmeans1d_lookup(D, cumsum, cumsum2, row, argmin); | |
| int c; | |
| for (c = start + 1; c <= stop; c++) | |
| { | |
| double value = _kmeans1d_lookup(D, cumsum, cumsum2, row, _cols[c]); | |
| if (c == start || value < min) { | |
| argmin = _cols[c]; | |
| min = value; | |
| } | |
| } | |
| result[row] = argmin; | |
| start = stop; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment