Created
August 28, 2021 14:47
-
-
Save Eleobert/a80de036f87d853e84c59bc977213c14 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <algorithm> | |
#include <cassert> | |
#include <vector> | |
#include <numeric> | |
#include <cmath> | |
#include <limits> | |
template<typename T> | |
auto print(const T& c) | |
{ | |
for(auto x: c) | |
{ | |
std::cout << x << ' '; | |
} | |
std::cout << std::endl; | |
} | |
namespace rank | |
{ | |
enum class nan_policy | |
{ | |
none, first, last | |
}; | |
namespace details | |
{ | |
template<typename RandomAcess> | |
auto index_sort(const RandomAcess& rng) | |
{ | |
auto indices = std::vector<size_t>(rng.size()); | |
std::iota(indices.begin(), indices.end(), 0ul); | |
std::sort(indices.begin(), indices.end(), [&rng](auto i, auto j) | |
{ | |
return rng[i] < rng[j]; | |
}); | |
return indices; | |
} | |
template<typename RandomAcess> | |
auto sanitize_nans(RandomAcess& rng, nan_policy policy) | |
{ | |
using value_type = typename RandomAcess::value_type; | |
if (std::is_integral_v<value_type> || policy == nan_policy::none) | |
{ | |
return; | |
} | |
// If you want nans with the same rank just replace them with | |
// numeric limits max or min. | |
constexpr auto max = std::numeric_limits<value_type>::max(); | |
constexpr auto epsilon = std::numeric_limits<value_type>::epsilon(); | |
auto nan = (policy == nan_policy::last)? max: -max; | |
// This assigns a different rank for each nan (R like) | |
for(int i = rng.size() - 1; i >= 0; i--) | |
{ | |
if(std::isnan(rng[i])) | |
{ | |
rng[i] = nan; | |
nan = std::nextafter(nan, -nan); | |
} | |
} | |
} | |
} | |
template<typename OutRandomAcess = std::vector<int>, typename RandomAcess> | |
auto dense(RandomAcess vec, nan_policy policy = nan_policy::none) -> OutRandomAcess | |
{ | |
assert(vec.empty() == false); | |
details::sanitize_nans(vec, policy); | |
const auto n = vec.size(); | |
const auto indices = details::index_sort(vec); | |
auto res = OutRandomAcess(n); | |
auto rank = 1; | |
res[indices[0]] = rank; | |
for(auto i = 1ul; i < n; i++) | |
{ | |
rank += vec[indices[i]] != vec[indices[i - 1]]; | |
res[indices[i]] = rank; | |
} | |
return res; | |
} | |
template<typename OutRandomAcess = std::vector<float>, typename RandomAcess> | |
auto average(RandomAcess vec, nan_policy policy = nan_policy::none | |
) -> OutRandomAcess | |
{ | |
static_assert(std::is_integral_v<typename OutRandomAcess::value_type> == false); | |
details::sanitize_nans(vec, policy); | |
assert(vec.empty() == false); | |
const auto n = vec.size(); | |
const auto indices = details::index_sort(vec); | |
auto res = OutRandomAcess(n); | |
auto start = 0; | |
auto end = 0; | |
do | |
{ | |
end = start + 1; | |
// find the end of the current tie | |
while(end < vec.size() && vec[indices[start]] == vec[indices[end]]) | |
{ | |
end++; | |
} | |
// fill the ties with rank | |
for(auto i = start; i < end; i++) | |
{ | |
res[indices[i]] = start + (end - start + 1) / 2.0; | |
} | |
start = end; | |
}while(end < vec.size()); | |
return res; | |
} | |
} // namespace rank | |
int main() | |
{ | |
auto nan = std::numeric_limits<float>::quiet_NaN(); | |
auto vec = std::vector<float>{nan, 5, nan, 5, nan, 3, 5}; | |
print(rank::dense(vec, rank::nan_policy::first)); | |
print(rank::average(vec, rank::nan_policy::first)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment