Last active
July 18, 2017 00:47
-
-
Save ShigekiKarita/421015f38d5332d971e1c8d09a1c52d4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <thrust/host_vector.h> | |
#include <thrust/device_vector.h> | |
#include <algorithm> | |
#include <iostream> | |
#include <limits> | |
#include <cmath> | |
#include <utility> | |
const static auto inf = std::numeric_limits<float>::infinity(); | |
template <size_t N> | |
struct MultiIndex { | |
const size_t size_list[N]; | |
template <typename ... Ts> | |
__host__ __device__ | |
size_t operator()(const Ts ... indices) const { | |
constexpr size_t M = sizeof...(indices); | |
static_assert(M == N, "fancy indexing is not supported yet"); | |
const size_t indice_list[M] = { indices ... }; | |
size_t ret = 0; | |
for (size_t m = 0; m < M; ++m) { | |
assert(indice_list[m] < size_list[m]); | |
ret += (m > 0 ? size_list[m-1] : 1) * indice_list[m]; | |
} | |
return ret; | |
} | |
}; | |
template <typename ... Ts> | |
MultiIndex<sizeof...(Ts)> make_index(Ts ... sizes) { | |
return {{sizes ...}}; | |
} | |
struct L1 { | |
template <typename T> | |
T operator()(T a, T b) { | |
return std::abs(a - b); | |
} | |
}; | |
struct L2 { | |
template <typename T> | |
T operator()(T a, T b) { | |
return std::sqrt(a * a + b * b); | |
} | |
}; | |
template <typename Dist, typename Vec> | |
auto dtw(const Vec& a, const Vec& b) -> decltype(a[0]) { | |
size_t alen = a.size() + 1; | |
size_t blen = b.size() + 1; | |
auto dpmat = Vec(alen * blen); | |
auto at = make_index(alen, blen); | |
auto dist = Dist(); | |
for (size_t i = 1; i < alen; ++i) { | |
dpmat[at(i, 0ul)] = inf; | |
} | |
for (size_t j = 1; j < blen; ++j) { | |
dpmat[at(0ul, j)] = inf; | |
} | |
dpmat[at(0ul, 0ul)] = 0; | |
for (size_t i = 1; i < alen; ++i) { | |
for (size_t j = 1; j < blen; ++j) { | |
auto cost = dist(a[i], b[j]); | |
dpmat[at(i, j)] = cost + std::min({ | |
dpmat[at(i-1, j)], | |
dpmat[at(i-1, j-1)], | |
dpmat[at(i, j-1)]}); | |
} | |
} | |
return dpmat[at(alen-1, blen-1)]; | |
} | |
template <typename Storage, typename Indexer> | |
__global__ void dtw_cuda(const Storage a, const Storage b, Storage dpmat, Indexer at) { | |
const auto alen = at.size_list[0]; | |
const auto blen = at.size_list[1]; | |
for (size_t i = 1; i < alen; ++i) { | |
dpmat[at(i, 0ul)] = inf; | |
} | |
for (size_t j = 1; j < blen; ++j) { | |
dpmat[at(0ul, j)] = inf; | |
} | |
dpmat[at(0ul, 0ul)] = 0; | |
for (size_t i = 1; i < alen; ++i) { | |
for (size_t j = 1; j < blen; ++j) { | |
auto cost = fabs(a[i] - b[j]); | |
dpmat[at(i, j)] = cost + fminf(fminf( | |
dpmat[at(i-1, j)], | |
dpmat[at(i-1, j-1)]), | |
dpmat[at(i, j-1)]); | |
} | |
} | |
} | |
template <typename Storage> | |
struct PackedSequence { | |
Storage storage; | |
size_t* batch_sizes; | |
}; | |
template <typename Iter> | |
std::vector<size_t> batch_to_length(const Iter batch_begin, const Iter batch_end) { | |
std::vector<size_t> length_list(*batch_begin); // must be sorted with a descend order | |
for (Iter b = batch_begin; b != batch_end; ++b) { | |
for (size_t i = 0; i < *b; ++i) { | |
++length_list[i]; | |
} | |
} | |
return std::move(length_list); | |
} | |
void test_packed_sequence() { | |
std::vector<float> padded = { | |
0, 1, 2, 3, | |
4, 5, 6, 7, | |
8, 9, 10, 110 | |
}; | |
std::vector<size_t> lens = {3, 2, 2, 1}; | |
std::vector<float> packed = { | |
0, 1, 2, 3, | |
4, 5, 6, | |
8 | |
}; | |
std::vector<size_t> batchs = {4, 3, 1}; | |
PackedSequence<float*> p{packed.data(), batchs.data()} ; | |
const auto length_b = batch_to_length(batchs.cbegin(), batchs.cend()); | |
for (size_t i = 0; i < length_b.size(); ++i) { | |
assert(lens[i] == length_b[i]); | |
} | |
const auto batchs_l = batch_to_length(lens.cbegin(), lens.cend()); | |
for (size_t i = 0; i < batchs_l.size(); ++i) { | |
assert(batchs[i] == batchs_l[i]); | |
} | |
} | |
int main() { | |
const float ra[] = {1,1,1,1,1}; | |
const float rb[] = {1,1,2,2,1,3,3}; | |
thrust::host_vector<float> a(ra, ra + 5); | |
thrust::host_vector<float> b(rb, rb + 7); | |
thrust::device_vector<float> da = a; | |
thrust::device_vector<float> db = b; | |
size_t alen = a.size() + 1; | |
size_t blen = b.size() + 1; | |
thrust::device_vector<float> dpmat(alen, blen); | |
const auto at = make_index(alen, blen); | |
dtw_cuda<<<1, 1>>>(thrust::raw_pointer_cast(da.data()), | |
thrust::raw_pointer_cast(db.data()), | |
thrust::raw_pointer_cast(dpmat.data()), | |
at); | |
thrust::host_vector<float> result(dpmat.size()); | |
thrust::copy(dpmat.begin(), dpmat.end(), result.begin()); | |
std::cout << result[at(alen-1, blen-1)] << std::endl;; | |
for (size_t i = 0; i < alen; ++i) { | |
for (size_t j = 0; j < blen; ++j) { | |
std::cout << result[at(i, j)] << ", "; | |
} | |
std::cout << std::endl; | |
} | |
std::cout << std::endl; | |
std::cout << dtw<L1>(a, b) << " == 6 ?" << std::endl; | |
std::cout << dtw<L2>(a, b) << " == 13.3006 ?" << std::endl; | |
test_packed_sequence(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment