-
Testing with python, indexing seems to be 4-5x slower on dev with the same index size.
5000^2 indices release-1.7.7:
(vvrel) $ time python c2.py num cpus: 8 --- starting read --- [8653986 2684847 5359819 6371280 2575670 6294078 6491145 8731866 9526938 8170667] real 1m15.728s user 6m22.080s sys 0m46.743s
dev:
(vv) [ec2-user@ip-172-31-3-82 ~]$ time python c2.py num cpus: 8 --- starting read --- [ 578693 2524771 5801837 2139416 8007168 9360407 3173001 669253 8497520 8872297] real 4m44.506s user 33m56.111s sys 0m41.129s
-
test_incomplete.cc
- query.est_result_size returns 1 byte
- query only returns 8 results per submit, unless the buffers are overallocated 8x
- duplicate results for duplicate column index 308. not sure if this is by design, but I was surprised by the behavior (2 extra results, so the query return do not match the input size)
-
test_main.cc
- set
index_size
to 200_000 to reproducestd::bad_alloc
error - for performance testing, set
index_size = 2000
- observe the following (m5.4xlarge instance)
- the first
query.est_result_size
returns 1 - the second one (after first incomplete) hangs for roughly the same amount of time as the first read
- subsequently, if I use the "known" allocation size (2000 elements) then each incomplete read will only return 6-8 elements per iteration
- if I use aggressive reallocation (doubling) then only 4 retries to finish
and the incomplete returns
are fast after the initial pause, but the
est_result_size
is very low (e.g. withindex_size=200
theest_result_size
is 7 per iteration) - however,
- the first
- observe the following (m5.4xlarge instance)
- set
Last active
April 21, 2020 01:54
-
-
Save ihnorton/45034e5e230ce6bb45dc5256783461b5 to your computer and use it in GitHub Desktop.
Debugging code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
project(foo) | |
cmake_minimum_required(VERSION 3.12) | |
set(CMAKE_CXX_STANDARD 14) | |
set(CMAKE_BUILD_TYPE Debug) | |
### | |
set(SRCS test_incomplete test_main) | |
### | |
set(TileDB_DIR "${TileDB_DIST}/lib64/cmake/TileDB") | |
find_package(TileDB REQUIRED) | |
foreach (SRC ${SRCS}) | |
add_executable(${SRC} ${SRC}.cc) | |
target_link_libraries(${SRC} PRIVATE TileDB::tiledb_shared) | |
endforeach() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <tiledb/tiledb> | |
#include <cmath> | |
#include <vector> | |
using namespace tiledb; | |
const uint64_t dim_base = 3163; | |
const uint64_t dim_max = dim_base ^ 2; | |
const uint64_t index_size = 50; | |
const uint64_t tile_size = 3000; | |
bool debug = false; | |
void create_array(Context ctx, std::string path) { | |
Domain domain(ctx); | |
domain | |
.add_dimension( | |
Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size)) | |
.add_dimension( | |
Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size)); | |
ArraySchema schema(ctx, TILEDB_SPARSE); | |
schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}}); | |
schema.add_attribute(Attribute::create<int64_t>(ctx, "a")); | |
tiledb::Array::create(path, schema); | |
} | |
int main() { | |
// print tiledb version to be safe | |
auto [major, minor, rev] = tiledb::version(); | |
std::cout << "tiledb_version: " << major << " " << minor << " " << rev | |
<< std::endl; | |
// create a temp array name | |
srand(time(NULL)); | |
std::stringstream ptmp; | |
ptmp << "/tmp/py296rpr2_" << rand(); | |
std::string path = ptmp.str(); | |
std::cout << "[path]: " << path << std::endl; | |
auto cfg = tiledb::Config(); | |
// cfg["sm.num_tbb_threads"] = 1; | |
cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000); | |
cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000); | |
auto ctx = tiledb::Context(cfg); | |
create_array(ctx, path); | |
std::vector<int64_t> rows = { | |
2909, 2799, 1325, 2016, 6, 3003, 2348, 2148, 1574, 2245, | |
2065, 2792, 1765, 2811, 1186, 1298, 1204, 312, 1195, 1619, | |
1049, 2650, 2525, 3122, 2427, 1199, 2056, 2410, 1198, 2988, | |
2441, 1680, 897, 2502, 3149, 1063, 1628, 2631, 451, 2287, | |
51, 1589, 1043, 2394, 643, 817, 869, 1381, 202, 1498}; | |
std::vector<int64_t> cols = { | |
458, 155, 2530, 2632, 130, 1365, 772, 172, 781, 2195, | |
1290, 2191, 1728, 521, 2810, 780, 1789, 308, 1280, 409, | |
840, 1385, 2673, 1114, 1396, 183, 109, 3112, 17, 454, | |
308, 2385, 1954, 1529, 1237, 2921, 1732, 1149, 2495, 2502, | |
1700, 266, 296, 834, 3159, 939, 309, 1597, 1071, 284}; | |
std::vector<int64_t> a = { | |
459, 515, 878, 107, 779, 1189, 2160, 1355, 1796, 2198, | |
1576, 2362, 226, 2059, 2078, 2694, 1824, 1320, 1908, 2968, | |
1547, 681, 640, 1082, 2115, 2937, 965, 2690, 1572, 701, | |
596, 224, 1439, 799, 2358, 241, 743, 2370, 2188, 1713, | |
1114, 3107, 783, 922, 1124, 2934, 1928, 3007, 2894, 1687}; | |
// still need zipped coords, w/out duplicates | |
// no-op here but was used in bigger example | |
std::vector<int64_t> coords(rows.size() + cols.size()); | |
uint64_t i = 0, num_g = 0; | |
bool skip = false; | |
for (; i < rows.size(); i++) { | |
for (uint64_t j = 0; j < num_g; j++) { | |
if (coords[2 * j] == rows[i] && coords[2 * j + 1] == cols[i]) { | |
skip = true; | |
break; | |
} | |
} | |
if (skip) { | |
skip = false; | |
continue; | |
} | |
coords[2 * num_g] = rows[i]; | |
coords[2 * num_g + 1] = cols[i]; | |
num_g++; | |
} | |
coords.resize(2 * num_g); | |
std::cout << "coords.size: " << coords.size() << std::endl; | |
// write array | |
{ | |
auto array = tiledb::Array(ctx, path, TILEDB_WRITE); | |
Query query(ctx, array); | |
query.set_layout(TILEDB_UNORDERED) | |
.set_buffer("a", a) | |
.set_coordinates(coords); | |
query.submit(); | |
query.finalize(); | |
array.close(); | |
} | |
// read array | |
{ | |
std::vector<int64_t> a_back(rows.size()); | |
std::vector<int64_t> coords_back(2 * rows.size()); | |
auto array = tiledb::Array(ctx, path, TILEDB_READ); | |
Query query(ctx, array, TILEDB_READ); | |
query.set_layout(TILEDB_UNORDERED); | |
for (uint64_t i = 0; i < cols.size(); i++) { | |
query.add_range(0, rows[i], rows[i]); | |
query.add_range(1, cols[i], cols[i]); | |
// std::cout << "adding: " << i << " " << rows[i] << " " << cols[i] << " | |
// (data: " << a[i] << ")" << std::endl; | |
} | |
uint64_t est_bytes = query.est_result_size("a"); | |
size_t est_elements = est_bytes / sizeof(int64_t); | |
std::cout << "estimated query result bytes: " << est_bytes << std::endl; | |
std::cout << "a_back computed size: " << sizeof(int64_t) * a_back.size() | |
<< std::endl; | |
query.set_buffer("a", a_back).set_buffer(TILEDB_COORDS, coords_back); | |
// using the lines below (overallocating 8x) allows to return all the | |
// expected results | |
//.set_buffer(TILEDB_COORDS, (void*)coords_back.data(), 800) | |
//.set_buffer("a", (void*)a_back.data(), 400); | |
std::cout << "initial query status: " << query.query_status() << std::endl; | |
std::cout << "submitting read query" << std::endl; | |
query.submit(); | |
size_t a_read = 0; | |
size_t c_read = 0; | |
size_t retries = 0; | |
while (query.query_status() == Query::Status::INCOMPLETE) { | |
auto res_sizes = query.result_buffer_elements(); | |
// std::cout << " | rsz a: " << res_sizes["a"].second << " " << | |
// res_sizes[TILEDB_COORDS].second; | |
a_read += res_sizes["a"].second; | |
c_read += res_sizes[TILEDB_COORDS].second; | |
query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read); | |
query.set_buffer(TILEDB_COORDS, coords_back.data() + c_read, | |
coords_back.size() - c_read); | |
retries += (coords_back.size() == c_read) ? 1 : 0; | |
if (retries > 3) | |
break; | |
query.submit(); | |
} | |
std::cout << "query status: " << query.query_status() << std::endl; | |
query.finalize(); | |
std::cout << "a_back.size after: " << a_back.size() | |
<< " coords_back.size after: " << coords_back.size() << std::endl; | |
std::cout << "a_back:" << std::endl; | |
for (int kj = 0; kj < a_back.size(); kj++) { | |
std::cout << "idx: " << kj << " coord: " << coords_back[2 * kj] << " " | |
<< coords_back[2 * kj + 1] << " -- value: " << a_back[kj] | |
<< std::endl; | |
} | |
array.close(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <tiledb/tiledb> | |
#include <chrono> | |
#include <cmath> | |
#include <ostream> | |
#include <random> | |
#include <set> | |
#include <vector> | |
using namespace tiledb; | |
// change this parameter to control the number of indexes per dimension | |
// or use command line | |
// - 3000 to 5000 is good for perf regression testing | |
// - 200_000 will reproduce the bad_alloc on m5.2xlarge (may need larger if more | |
// RAM available) | |
uint64_t index_size = 200; | |
// other parameters | |
const uint64_t dim_base = 3163; | |
const uint64_t dim_max = pow(dim_base, 2); | |
const uint64_t tile_size = 3000; | |
// incomplete query parameters | |
// - this parameter controls when we switch from doubling allocation to linear | |
const uint64_t linear_alloc_threshold = (1024 ^ 2) * 4000; // 4 GB | |
// - this parameter controls the additional bytes per linear allocation | |
const uint64_t linear_alloc_bytes = (1024 ^ 2) * 2000; // 2 GB | |
// rng -- the output is deterministic by default | |
// static std::default_random_engine g_rng; | |
std::default_random_engine g_rng; | |
#define vtype(x) sizeof(decltype(x)::value_type) | |
void create_array(Context ctx, std::string path) { | |
Domain domain(ctx); | |
domain | |
.add_dimension( | |
Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size)) | |
.add_dimension( | |
Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size)); | |
ArraySchema schema(ctx, TILEDB_SPARSE); | |
schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}}); | |
schema.add_attribute(Attribute::create<int64_t>(ctx, "a")); | |
tiledb::Array::create(path, schema); | |
} | |
std::ostream &operator<<(std::ostream &os, std::vector<int64_t> v) { | |
os << "| vector size: " << v.size() << " |" << std::endl; | |
os << "[ "; | |
for (auto i = 0; i < 10; i++) | |
os << v[i] << ", "; | |
os << " ... "; | |
for (auto i = v.size() - 10; i < v.size(); i++) | |
os << v[i] << ", "; | |
os << " ]"; | |
return os; | |
} | |
int64_t one_randint() { | |
static std::uniform_int_distribution<> r(1, dim_max); | |
// std::default_random_engine e; | |
// std::uniform_int_distribution<> r(1, dim_max); | |
return r(g_rng); | |
} | |
std::vector<int64_t> fill_randints(size_t size, bool unique = true) { | |
std::set<int64_t> intset; | |
std::vector<int64_t> output; | |
output.resize(size); | |
for (size_t i = 0; i < size; i++) { | |
auto new_int = one_randint(); | |
output[i] = new_int; | |
} | |
// shuffle the vector | |
// std::copy(intset.begin(), intset.end(), std::back_inserter(output)); | |
// std::shuffle(std::begin(output), std::end(output), g_rng); | |
return output; | |
} | |
int main(int argc, char **argv) { | |
if (argc > 1) { | |
index_size = atoi(argv[1]); | |
} | |
// are we running against libtiledb dev branch? | |
bool dev = true; | |
{ | |
auto [major, minor, rev] = tiledb::version(); | |
std::cout << "tiledb_version: " << major << " " << minor << " " << rev | |
<< std::endl; | |
dev = (major == 2) ? true : false; | |
} | |
// generate unique array path | |
srand(time(NULL)); | |
std::string path = std::string("/tmp/py296rpr_") + std::to_string(rand()); | |
std::cout << "-- array base path: " << path << std::endl; | |
// config options | |
auto cfg = tiledb::Config(); | |
cfg["sm.dedup_coords"] = "false"; | |
cfg["sm.check_coord_dups"] = "false"; | |
cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000); | |
cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000); | |
cfg["sm.num_tbb_threads"] = 8; | |
cfg["sm.num_reader_threads"] = 8; | |
auto ctx = tiledb::Context(cfg); | |
// create the array | |
create_array(ctx, path); | |
// create data | |
std::vector<int64_t> rows(index_size); | |
std::vector<int64_t> cols(index_size); | |
std::vector<int64_t> a(index_size); | |
std::vector<int64_t> coords; | |
// create random data | |
rows = fill_randints(index_size); | |
cols = fill_randints(index_size); | |
a = fill_randints(index_size); | |
// zip the coordinates | |
if (!dev) { | |
coords.resize(2 * index_size); | |
for (size_t i = 0; i < rows.size(); i++) { | |
coords[2 * i] = rows[i]; | |
coords[2 * i + 1] = cols[i]; | |
} | |
} | |
std::cout << "preparing write query" << std::endl; | |
{ | |
auto array = tiledb::Array(ctx, path, TILEDB_WRITE); | |
Query query(ctx, array); | |
query.set_layout(TILEDB_UNORDERED).set_buffer("a", a.data(), a.size()); | |
if (dev) { | |
query.set_buffer("d1", rows.data(), rows.size()) | |
.set_buffer("d2", cols.data(), cols.size()); | |
} else { | |
query.set_coordinates(coords); | |
} | |
query.submit(); | |
query.finalize(); | |
} | |
std::vector<int64_t> a_back(a.size()); | |
std::vector<int64_t> coords_back; | |
std::vector<int64_t> coords_back_d1; | |
std::vector<int64_t> coords_back_d2; | |
if (dev) { | |
coords_back_d1.resize(rows.size()); | |
coords_back_d2.resize(rows.size()); | |
} else { | |
coords_back.resize(rows.size() * 2); | |
} | |
size_t a_read = 0; | |
size_t coords_read = 0; | |
tiledb::Stats::enable(); | |
std::cout << "preparing read query" << std::endl; | |
// read array | |
{ | |
auto array = tiledb::Array(ctx, path, TILEDB_READ); | |
Query query(ctx, array, TILEDB_READ); | |
for (size_t i = 0; i < rows.size(); i++) { | |
query.add_range(0, rows[i], rows[i]).add_range(1, cols[i], cols[i]); | |
} | |
std::cout << "-- setting buffers --" << std::endl; | |
query.set_layout(TILEDB_UNORDERED).set_buffer("a", a_back); | |
if (dev) { | |
query.set_buffer("d1", coords_back_d1).set_buffer("d2", coords_back_d2); | |
} else { | |
query.set_buffer(TILEDB_COORDS, coords_back); | |
} | |
std::cout << "-- calling est_result_size --" << std::endl; | |
auto start = std::chrono::high_resolution_clock::now(); | |
std::cout << "estimated query result bytes: " << query.est_result_size("a") | |
<< std::endl; | |
std::chrono::duration<double> elapsed = | |
std::chrono::high_resolution_clock::now() - start; | |
std::cout << " -- elapsed " << elapsed.count() << std::endl; | |
start = std::chrono::high_resolution_clock::now(); | |
/******************************************************************/ | |
std::cout << "submitting read query" << std::endl; | |
query.submit(); | |
std::cout << "query status: " << query.query_status() << std::endl; | |
/******************************************************************/ | |
size_t retries = 0; | |
uint64_t est_bytes = 0; | |
while (query.query_status() == Query::Status::INCOMPLETE) { | |
if (retries > 100) { | |
std::cout << ".. !!! exceeded retries, breaking..." << std::endl; | |
std::cout << ".. !!! final query status: " << query.query_status() | |
<< std::endl; | |
break; | |
} | |
std::cout << "..retrying incomplete..." << std::endl; | |
auto res_sizes = query.result_buffer_elements(); | |
a_read += res_sizes["a"].second; | |
if (dev) { | |
coords_read += res_sizes["d1"].second; | |
} else { | |
coords_read += res_sizes[TILEDB_COORDS].second; | |
} | |
est_bytes = query.est_result_size("a"); | |
std::cout << "current coords_read: " << coords_read | |
<< " | estimated query result bytes: " << est_bytes | |
<< std::endl; | |
// reallocate before resubmitting incomplete queries | |
if (true) { | |
size_t a_new_elems; | |
if ((a_back.size() * sizeof(int64_t)) < linear_alloc_threshold) { | |
// reallocate by doubling until hitting `linear_alloc_threshold` | |
std::cout << " reallocating 2x!" << std::endl; | |
a_new_elems = a_back.size() * 2; | |
} else if ((a_back.size() * sizeof(int64_t)) < (2 * est_bytes)) { | |
// linear reallocation if we are under 2x the estimate | |
auto a_new_bytes = | |
(a_back.size() * sizeof(vtype(a_back))) + linear_alloc_bytes; | |
a_new_elems = a_new_bytes / sizeof(vtype(a_back)); | |
std::cout << " reallocating linear! elems: " << a_new_elems | |
<< std::endl; | |
retries += 1; | |
} | |
a_back.resize(a_new_elems); | |
if (dev) { | |
coords_back_d1.resize(a_new_elems); | |
coords_back_d2.resize(a_new_elems); | |
} else { | |
coords_back.resize(a_new_elems * 2); | |
} | |
} | |
query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read); | |
if (dev) { | |
query | |
.set_buffer("d1", coords_back_d1.data() + coords_read, | |
coords_back_d1.size() - coords_read) | |
.set_buffer("d2", coords_back_d2.data() + coords_read, | |
coords_back_d2.size() - coords_read); | |
} else { | |
query.set_buffer(TILEDB_COORDS, coords_back.data() + (coords_read), | |
coords_back.size() - (coords_read)); | |
} | |
query.submit(); | |
} | |
std::cout << "query status: " << query.query_status() << std::endl; | |
elapsed = std::chrono::high_resolution_clock::now() - start; | |
std::cout << " READ elapsed " << elapsed.count() << std::endl; | |
auto res_sizes = query.result_buffer_elements(); | |
a_read += res_sizes["a"].second; | |
if (dev) { | |
coords_read += res_sizes["d1"].second; | |
} else { | |
coords_read += res_sizes[TILEDB_COORDS].second; | |
} | |
} | |
a_back.resize(a_read); | |
if (dev) { | |
coords_back_d1.resize(coords_read); | |
coords_back_d2.resize(coords_read); | |
} else { | |
coords_back.resize(coords_read); | |
} | |
std::string stout; | |
tiledb::Stats::dump(&stout); | |
std::cout << "STATS: " << std::endl; | |
std::cout << stout << std::endl; | |
std::cout << "----------------------------------------------------------"; | |
std::cout << "final coords_read: " << coords_read << std::endl; | |
std::cout << "final a_read: " << a_read << std::endl; | |
std::cout << "final a_back.size(): " << a_back.size() << std::endl | |
<< std::endl; | |
std::cout << "----------------------------------------------------------" | |
<< std::endl; | |
std::cout << "Data check: printing first and last 10 elements of each vector" | |
<< std::endl | |
<< std::endl; | |
std::cout << "Input data:" << std::endl; | |
std::cout << ".. rows: " << rows << std::endl; | |
std::cout << ".. cols: " << cols << std::endl; | |
std::cout << ".. data 'a': " << a << std::endl << std::endl; | |
std::cout << "Query result data:" << std::endl; | |
std::cout << "data 'a' returned: " << a_back << std::endl; | |
auto start = std::chrono::high_resolution_clock::now(); | |
auto a_set = std::set<int64_t>(a.begin(), a.end()); | |
auto a_back_set = std::set<int64_t>(a_back.begin(), a_back.end()); | |
std::cout << "'a == a_back': " << std::boolalpha << (a_set == a_back_set) | |
<< std::endl; | |
if (dev) { | |
std::cout << "coords_d1 returned: " << coords_back_d1 << std::endl; | |
std::cout << "coords_d2 returned: " << coords_back_d2 << std::endl; | |
auto cback1_set = | |
std::set<int64_t>(coords_back_d1.begin(), coords_back_d1.end()); | |
auto cback2_set = | |
std::set<int64_t>(coords_back_d2.begin(), coords_back_d2.end()); | |
auto rows_set = std::set<int64_t>(rows.begin(), rows.end()); | |
auto cols_set = std::set<int64_t>(cols.begin(), cols.end()); | |
std::cout << "'coords_d1 == rows': " << std::boolalpha | |
<< (cback1_set == rows_set) << std::endl; | |
std::cout << "'coords_d2 == cols': " << std::boolalpha | |
<< (cback2_set == cols_set) << std::endl; | |
} else { | |
auto coords_set = std::set<int64_t>(coords.begin(), coords.end()); | |
auto cback_set = std::set<int64_t>(coords_back.begin(), coords_back.end()); | |
std::cout << "(ZIPPED) coords returned: " << coords << std::endl; | |
std::cout << "'coords_back == coords': " << std::boolalpha | |
<< (coords_set == cback_set) << std::endl; | |
} | |
std::chrono::duration<double> elapsed = | |
std::chrono::high_resolution_clock::now() - start; | |
std::cout << "data check elapsed " << elapsed.count() << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment