Skip to content

Instantly share code, notes, and snippets.

@ihnorton
Last active April 21, 2020 01:54
Show Gist options
  • Save ihnorton/45034e5e230ce6bb45dc5256783461b5 to your computer and use it in GitHub Desktop.
Save ihnorton/45034e5e230ce6bb45dc5256783461b5 to your computer and use it in GitHub Desktop.
Debugging code
project(foo)
cmake_minimum_required(VERSION 3.12)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)
###
set(SRCS test_incomplete test_main)
###
set(TileDB_DIR "${TileDB_DIST}/lib64/cmake/TileDB")
find_package(TileDB REQUIRED)
foreach (SRC ${SRCS})
add_executable(${SRC} ${SRC}.cc)
target_link_libraries(${SRC} PRIVATE TileDB::tiledb_shared)
endforeach()
  • Testing with python, indexing seems to be 4-5x slower on dev with the same index size.

    5000^2 indices release-1.7.7:

    (vvrel) $ time python c2.py
    num cpus:  8
    --- starting read ---
    [8653986 2684847 5359819 6371280 2575670 6294078 6491145 8731866 9526938
     8170667]
    
    real    1m15.728s
    user    6m22.080s
    sys     0m46.743s
    

    dev:

    (vv) [ec2-user@ip-172-31-3-82 ~]$ time python c2.py
    num cpus:  8
    --- starting read ---
    [ 578693 2524771 5801837 2139416 8007168 9360407 3173001  669253 8497520
     8872297]
    
    real    4m44.506s
    user    33m56.111s
    sys     0m41.129s
    
  • test_incomplete.cc

    • query.est_result_size returns 1 byte
    • query only returns 8 results per submit, unless the buffers are overallocated 8x
    • duplicate results for duplicate column index 308. not sure if this is by design, but I was surprised by the behavior (2 extra results, so the query return do not match the input size)
  • test_main.cc

    • set index_size to 200_000 to reproduce std::bad_alloc error
    • for performance testing, set index_size = 2000
      • observe the following (m5.4xlarge instance)
        • the first query.est_result_size returns 1
        • the second one (after first incomplete) hangs for roughly the same amount of time as the first read
        • subsequently, if I use the "known" allocation size (2000 elements) then each incomplete read will only return 6-8 elements per iteration
        • if I use aggressive reallocation (doubling) then only 4 retries to finish and the incomplete returns are fast after the initial pause, but the est_result_size is very low (e.g. with index_size=200 the est_result_size is 7 per iteration)
        • however,
#include <tiledb/tiledb>
#include <cmath>
#include <vector>
using namespace tiledb;
const uint64_t dim_base = 3163;
const uint64_t dim_max = dim_base ^ 2;
const uint64_t index_size = 50;
const uint64_t tile_size = 3000;
bool debug = false;
void create_array(Context ctx, std::string path) {
Domain domain(ctx);
domain
.add_dimension(
Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
.add_dimension(
Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));
ArraySchema schema(ctx, TILEDB_SPARSE);
schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});
schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));
tiledb::Array::create(path, schema);
}
int main() {
// print tiledb version to be safe
auto [major, minor, rev] = tiledb::version();
std::cout << "tiledb_version: " << major << " " << minor << " " << rev
<< std::endl;
// create a temp array name
srand(time(NULL));
std::stringstream ptmp;
ptmp << "/tmp/py296rpr2_" << rand();
std::string path = ptmp.str();
std::cout << "[path]: " << path << std::endl;
auto cfg = tiledb::Config();
// cfg["sm.num_tbb_threads"] = 1;
cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
auto ctx = tiledb::Context(cfg);
create_array(ctx, path);
std::vector<int64_t> rows = {
2909, 2799, 1325, 2016, 6, 3003, 2348, 2148, 1574, 2245,
2065, 2792, 1765, 2811, 1186, 1298, 1204, 312, 1195, 1619,
1049, 2650, 2525, 3122, 2427, 1199, 2056, 2410, 1198, 2988,
2441, 1680, 897, 2502, 3149, 1063, 1628, 2631, 451, 2287,
51, 1589, 1043, 2394, 643, 817, 869, 1381, 202, 1498};
std::vector<int64_t> cols = {
458, 155, 2530, 2632, 130, 1365, 772, 172, 781, 2195,
1290, 2191, 1728, 521, 2810, 780, 1789, 308, 1280, 409,
840, 1385, 2673, 1114, 1396, 183, 109, 3112, 17, 454,
308, 2385, 1954, 1529, 1237, 2921, 1732, 1149, 2495, 2502,
1700, 266, 296, 834, 3159, 939, 309, 1597, 1071, 284};
std::vector<int64_t> a = {
459, 515, 878, 107, 779, 1189, 2160, 1355, 1796, 2198,
1576, 2362, 226, 2059, 2078, 2694, 1824, 1320, 1908, 2968,
1547, 681, 640, 1082, 2115, 2937, 965, 2690, 1572, 701,
596, 224, 1439, 799, 2358, 241, 743, 2370, 2188, 1713,
1114, 3107, 783, 922, 1124, 2934, 1928, 3007, 2894, 1687};
// still need zipped coords, w/out duplicates
// no-op here but was used in bigger example
std::vector<int64_t> coords(rows.size() + cols.size());
uint64_t i = 0, num_g = 0;
bool skip = false;
for (; i < rows.size(); i++) {
for (uint64_t j = 0; j < num_g; j++) {
if (coords[2 * j] == rows[i] && coords[2 * j + 1] == cols[i]) {
skip = true;
break;
}
}
if (skip) {
skip = false;
continue;
}
coords[2 * num_g] = rows[i];
coords[2 * num_g + 1] = cols[i];
num_g++;
}
coords.resize(2 * num_g);
std::cout << "coords.size: " << coords.size() << std::endl;
// write array
{
auto array = tiledb::Array(ctx, path, TILEDB_WRITE);
Query query(ctx, array);
query.set_layout(TILEDB_UNORDERED)
.set_buffer("a", a)
.set_coordinates(coords);
query.submit();
query.finalize();
array.close();
}
// read array
{
std::vector<int64_t> a_back(rows.size());
std::vector<int64_t> coords_back(2 * rows.size());
auto array = tiledb::Array(ctx, path, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
query.set_layout(TILEDB_UNORDERED);
for (uint64_t i = 0; i < cols.size(); i++) {
query.add_range(0, rows[i], rows[i]);
query.add_range(1, cols[i], cols[i]);
// std::cout << "adding: " << i << " " << rows[i] << " " << cols[i] << "
// (data: " << a[i] << ")" << std::endl;
}
uint64_t est_bytes = query.est_result_size("a");
size_t est_elements = est_bytes / sizeof(int64_t);
std::cout << "estimated query result bytes: " << est_bytes << std::endl;
std::cout << "a_back computed size: " << sizeof(int64_t) * a_back.size()
<< std::endl;
query.set_buffer("a", a_back).set_buffer(TILEDB_COORDS, coords_back);
// using the lines below (overallocating 8x) allows to return all the
// expected results
//.set_buffer(TILEDB_COORDS, (void*)coords_back.data(), 800)
//.set_buffer("a", (void*)a_back.data(), 400);
std::cout << "initial query status: " << query.query_status() << std::endl;
std::cout << "submitting read query" << std::endl;
query.submit();
size_t a_read = 0;
size_t c_read = 0;
size_t retries = 0;
while (query.query_status() == Query::Status::INCOMPLETE) {
auto res_sizes = query.result_buffer_elements();
// std::cout << " | rsz a: " << res_sizes["a"].second << " " <<
// res_sizes[TILEDB_COORDS].second;
a_read += res_sizes["a"].second;
c_read += res_sizes[TILEDB_COORDS].second;
query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
query.set_buffer(TILEDB_COORDS, coords_back.data() + c_read,
coords_back.size() - c_read);
retries += (coords_back.size() == c_read) ? 1 : 0;
if (retries > 3)
break;
query.submit();
}
std::cout << "query status: " << query.query_status() << std::endl;
query.finalize();
std::cout << "a_back.size after: " << a_back.size()
<< " coords_back.size after: " << coords_back.size() << std::endl;
std::cout << "a_back:" << std::endl;
for (int kj = 0; kj < a_back.size(); kj++) {
std::cout << "idx: " << kj << " coord: " << coords_back[2 * kj] << " "
<< coords_back[2 * kj + 1] << " -- value: " << a_back[kj]
<< std::endl;
}
array.close();
}
}
#include <tiledb/tiledb>
#include <chrono>
#include <cmath>
#include <ostream>
#include <random>
#include <set>
#include <vector>
using namespace tiledb;
// change this parameter to control the number of indexes per dimension
// or use command line
// - 3000 to 5000 is good for perf regression testing
// - 200_000 will reproduce the bad_alloc on m5.2xlarge (may need larger if more
// RAM available)
uint64_t index_size = 200;
// other parameters
const uint64_t dim_base = 3163;
const uint64_t dim_max = pow(dim_base, 2);
const uint64_t tile_size = 3000;
// incomplete query parameters
// - this parameter controls when we switch from doubling allocation to linear
const uint64_t linear_alloc_threshold = (1024 ^ 2) * 4000; // 4 GB
// - this parameter controls the additional bytes per linear allocation
const uint64_t linear_alloc_bytes = (1024 ^ 2) * 2000; // 2 GB
// rng -- the output is deterministic by default
// static std::default_random_engine g_rng;
std::default_random_engine g_rng;
#define vtype(x) sizeof(decltype(x)::value_type)
void create_array(Context ctx, std::string path) {
Domain domain(ctx);
domain
.add_dimension(
Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
.add_dimension(
Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));
ArraySchema schema(ctx, TILEDB_SPARSE);
schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});
schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));
tiledb::Array::create(path, schema);
}
std::ostream &operator<<(std::ostream &os, std::vector<int64_t> v) {
os << "| vector size: " << v.size() << " |" << std::endl;
os << "[ ";
for (auto i = 0; i < 10; i++)
os << v[i] << ", ";
os << " ... ";
for (auto i = v.size() - 10; i < v.size(); i++)
os << v[i] << ", ";
os << " ]";
return os;
}
int64_t one_randint() {
static std::uniform_int_distribution<> r(1, dim_max);
// std::default_random_engine e;
// std::uniform_int_distribution<> r(1, dim_max);
return r(g_rng);
}
std::vector<int64_t> fill_randints(size_t size, bool unique = true) {
std::set<int64_t> intset;
std::vector<int64_t> output;
output.resize(size);
for (size_t i = 0; i < size; i++) {
auto new_int = one_randint();
output[i] = new_int;
}
// shuffle the vector
// std::copy(intset.begin(), intset.end(), std::back_inserter(output));
// std::shuffle(std::begin(output), std::end(output), g_rng);
return output;
}
int main(int argc, char **argv) {
if (argc > 1) {
index_size = atoi(argv[1]);
}
// are we running against libtiledb dev branch?
bool dev = true;
{
auto [major, minor, rev] = tiledb::version();
std::cout << "tiledb_version: " << major << " " << minor << " " << rev
<< std::endl;
dev = (major == 2) ? true : false;
}
// generate unique array path
srand(time(NULL));
std::string path = std::string("/tmp/py296rpr_") + std::to_string(rand());
std::cout << "-- array base path: " << path << std::endl;
// config options
auto cfg = tiledb::Config();
cfg["sm.dedup_coords"] = "false";
cfg["sm.check_coord_dups"] = "false";
cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
cfg["sm.num_tbb_threads"] = 8;
cfg["sm.num_reader_threads"] = 8;
auto ctx = tiledb::Context(cfg);
// create the array
create_array(ctx, path);
// create data
std::vector<int64_t> rows(index_size);
std::vector<int64_t> cols(index_size);
std::vector<int64_t> a(index_size);
std::vector<int64_t> coords;
// create random data
rows = fill_randints(index_size);
cols = fill_randints(index_size);
a = fill_randints(index_size);
// zip the coordinates
if (!dev) {
coords.resize(2 * index_size);
for (size_t i = 0; i < rows.size(); i++) {
coords[2 * i] = rows[i];
coords[2 * i + 1] = cols[i];
}
}
std::cout << "preparing write query" << std::endl;
{
auto array = tiledb::Array(ctx, path, TILEDB_WRITE);
Query query(ctx, array);
query.set_layout(TILEDB_UNORDERED).set_buffer("a", a.data(), a.size());
if (dev) {
query.set_buffer("d1", rows.data(), rows.size())
.set_buffer("d2", cols.data(), cols.size());
} else {
query.set_coordinates(coords);
}
query.submit();
query.finalize();
}
std::vector<int64_t> a_back(a.size());
std::vector<int64_t> coords_back;
std::vector<int64_t> coords_back_d1;
std::vector<int64_t> coords_back_d2;
if (dev) {
coords_back_d1.resize(rows.size());
coords_back_d2.resize(rows.size());
} else {
coords_back.resize(rows.size() * 2);
}
size_t a_read = 0;
size_t coords_read = 0;
tiledb::Stats::enable();
std::cout << "preparing read query" << std::endl;
// read array
{
auto array = tiledb::Array(ctx, path, TILEDB_READ);
Query query(ctx, array, TILEDB_READ);
for (size_t i = 0; i < rows.size(); i++) {
query.add_range(0, rows[i], rows[i]).add_range(1, cols[i], cols[i]);
}
std::cout << "-- setting buffers --" << std::endl;
query.set_layout(TILEDB_UNORDERED).set_buffer("a", a_back);
if (dev) {
query.set_buffer("d1", coords_back_d1).set_buffer("d2", coords_back_d2);
} else {
query.set_buffer(TILEDB_COORDS, coords_back);
}
std::cout << "-- calling est_result_size --" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
std::cout << "estimated query result bytes: " << query.est_result_size("a")
<< std::endl;
std::chrono::duration<double> elapsed =
std::chrono::high_resolution_clock::now() - start;
std::cout << " -- elapsed " << elapsed.count() << std::endl;
start = std::chrono::high_resolution_clock::now();
/******************************************************************/
std::cout << "submitting read query" << std::endl;
query.submit();
std::cout << "query status: " << query.query_status() << std::endl;
/******************************************************************/
size_t retries = 0;
uint64_t est_bytes = 0;
while (query.query_status() == Query::Status::INCOMPLETE) {
if (retries > 100) {
std::cout << ".. !!! exceeded retries, breaking..." << std::endl;
std::cout << ".. !!! final query status: " << query.query_status()
<< std::endl;
break;
}
std::cout << "..retrying incomplete..." << std::endl;
auto res_sizes = query.result_buffer_elements();
a_read += res_sizes["a"].second;
if (dev) {
coords_read += res_sizes["d1"].second;
} else {
coords_read += res_sizes[TILEDB_COORDS].second;
}
est_bytes = query.est_result_size("a");
std::cout << "current coords_read: " << coords_read
<< " | estimated query result bytes: " << est_bytes
<< std::endl;
// reallocate before resubmitting incomplete queries
if (true) {
size_t a_new_elems;
if ((a_back.size() * sizeof(int64_t)) < linear_alloc_threshold) {
// reallocate by doubling until hitting `linear_alloc_threshold`
std::cout << " reallocating 2x!" << std::endl;
a_new_elems = a_back.size() * 2;
} else if ((a_back.size() * sizeof(int64_t)) < (2 * est_bytes)) {
// linear reallocation if we are under 2x the estimate
auto a_new_bytes =
(a_back.size() * sizeof(vtype(a_back))) + linear_alloc_bytes;
a_new_elems = a_new_bytes / sizeof(vtype(a_back));
std::cout << " reallocating linear! elems: " << a_new_elems
<< std::endl;
retries += 1;
}
a_back.resize(a_new_elems);
if (dev) {
coords_back_d1.resize(a_new_elems);
coords_back_d2.resize(a_new_elems);
} else {
coords_back.resize(a_new_elems * 2);
}
}
query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
if (dev) {
query
.set_buffer("d1", coords_back_d1.data() + coords_read,
coords_back_d1.size() - coords_read)
.set_buffer("d2", coords_back_d2.data() + coords_read,
coords_back_d2.size() - coords_read);
} else {
query.set_buffer(TILEDB_COORDS, coords_back.data() + (coords_read),
coords_back.size() - (coords_read));
}
query.submit();
}
std::cout << "query status: " << query.query_status() << std::endl;
elapsed = std::chrono::high_resolution_clock::now() - start;
std::cout << " READ elapsed " << elapsed.count() << std::endl;
auto res_sizes = query.result_buffer_elements();
a_read += res_sizes["a"].second;
if (dev) {
coords_read += res_sizes["d1"].second;
} else {
coords_read += res_sizes[TILEDB_COORDS].second;
}
}
a_back.resize(a_read);
if (dev) {
coords_back_d1.resize(coords_read);
coords_back_d2.resize(coords_read);
} else {
coords_back.resize(coords_read);
}
std::string stout;
tiledb::Stats::dump(&stout);
std::cout << "STATS: " << std::endl;
std::cout << stout << std::endl;
std::cout << "----------------------------------------------------------";
std::cout << "final coords_read: " << coords_read << std::endl;
std::cout << "final a_read: " << a_read << std::endl;
std::cout << "final a_back.size(): " << a_back.size() << std::endl
<< std::endl;
std::cout << "----------------------------------------------------------"
<< std::endl;
std::cout << "Data check: printing first and last 10 elements of each vector"
<< std::endl
<< std::endl;
std::cout << "Input data:" << std::endl;
std::cout << ".. rows: " << rows << std::endl;
std::cout << ".. cols: " << cols << std::endl;
std::cout << ".. data 'a': " << a << std::endl << std::endl;
std::cout << "Query result data:" << std::endl;
std::cout << "data 'a' returned: " << a_back << std::endl;
auto start = std::chrono::high_resolution_clock::now();
auto a_set = std::set<int64_t>(a.begin(), a.end());
auto a_back_set = std::set<int64_t>(a_back.begin(), a_back.end());
std::cout << "'a == a_back': " << std::boolalpha << (a_set == a_back_set)
<< std::endl;
if (dev) {
std::cout << "coords_d1 returned: " << coords_back_d1 << std::endl;
std::cout << "coords_d2 returned: " << coords_back_d2 << std::endl;
auto cback1_set =
std::set<int64_t>(coords_back_d1.begin(), coords_back_d1.end());
auto cback2_set =
std::set<int64_t>(coords_back_d2.begin(), coords_back_d2.end());
auto rows_set = std::set<int64_t>(rows.begin(), rows.end());
auto cols_set = std::set<int64_t>(cols.begin(), cols.end());
std::cout << "'coords_d1 == rows': " << std::boolalpha
<< (cback1_set == rows_set) << std::endl;
std::cout << "'coords_d2 == cols': " << std::boolalpha
<< (cback2_set == cols_set) << std::endl;
} else {
auto coords_set = std::set<int64_t>(coords.begin(), coords.end());
auto cback_set = std::set<int64_t>(coords_back.begin(), coords_back.end());
std::cout << "(ZIPPED) coords returned: " << coords << std::endl;
std::cout << "'coords_back == coords': " << std::boolalpha
<< (coords_set == cback_set) << std::endl;
}
std::chrono::duration<double> elapsed =
std::chrono::high_resolution_clock::now() - start;
std::cout << "data check elapsed " << elapsed.count() << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment