ihnorton · April 21, 2020 01:54
diff --git a/CMakeLists.txt b/CMakeLists.txt
 project(foo)
 cmake_minimum_required(VERSION 3.12)
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_BUILD_TYPE Debug)

 ###
 set(SRCS test_incomplete test_main)
 ###

 set(TileDB_DIR "${TileDB_DIST}/lib64/cmake/TileDB")
 find_package(TileDB REQUIRED)

 foreach (SRC ${SRCS})
   
    add_executable(${SRC} ${SRC}.cc)
    
    target_link_libraries(${SRC} PRIVATE TileDB::tiledb_shared)
    
 endforeach()
diff --git a/notes.md b/notes.md
diff --git a/test_incomplete.cc b/test_incomplete.cc
 #include <tiledb/tiledb>

 #include <cmath>
 #include <vector>

 using namespace tiledb;

 const uint64_t dim_base = 3163;
 const uint64_t dim_max = dim_base ^ 2;
 const uint64_t index_size = 50;
 const uint64_t tile_size = 3000;

 bool debug = false;

 void create_array(Context ctx, std::string path) {
  Domain domain(ctx);
  domain
      .add_dimension(
          Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
      .add_dimension(
          Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));

  ArraySchema schema(ctx, TILEDB_SPARSE);
  schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});

  schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));

  tiledb::Array::create(path, schema);
 }

 int main() {
  // print tiledb version to be safe
  auto [major, minor, rev] = tiledb::version();
  std::cout << "tiledb_version: " << major << " " << minor << " " << rev
            << std::endl;

  // create a temp array name
  srand(time(NULL));
  std::stringstream ptmp;
  ptmp << "/tmp/py296rpr2_" << rand();
  std::string path = ptmp.str();
  std::cout << "[path]: " << path << std::endl;

  auto cfg = tiledb::Config();
  // cfg["sm.num_tbb_threads"] = 1;
  cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
  cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
  auto ctx = tiledb::Context(cfg);

  create_array(ctx, path);

  std::vector<int64_t> rows = {
      2909, 2799, 1325, 2016, 6,    3003, 2348, 2148, 1574, 2245,
      2065, 2792, 1765, 2811, 1186, 1298, 1204, 312,  1195, 1619,
      1049, 2650, 2525, 3122, 2427, 1199, 2056, 2410, 1198, 2988,
      2441, 1680, 897,  2502, 3149, 1063, 1628, 2631, 451,  2287,
      51,   1589, 1043, 2394, 643,  817,  869,  1381, 202,  1498};

  std::vector<int64_t> cols = {
      458,  155,  2530, 2632, 130,  1365, 772,  172,  781,  2195,
      1290, 2191, 1728, 521,  2810, 780,  1789, 308,  1280, 409,
      840,  1385, 2673, 1114, 1396, 183,  109,  3112, 17,   454,
      308,  2385, 1954, 1529, 1237, 2921, 1732, 1149, 2495, 2502,
      1700, 266,  296,  834,  3159, 939,  309,  1597, 1071, 284};

  std::vector<int64_t> a = {
      459,  515,  878,  107,  779,  1189, 2160, 1355, 1796, 2198,
      1576, 2362, 226,  2059, 2078, 2694, 1824, 1320, 1908, 2968,
      1547, 681,  640,  1082, 2115, 2937, 965,  2690, 1572, 701,
      596,  224,  1439, 799,  2358, 241,  743,  2370, 2188, 1713,
      1114, 3107, 783,  922,  1124, 2934, 1928, 3007, 2894, 1687};

  // still need zipped coords, w/out duplicates
  // no-op here but was used in bigger example
  std::vector<int64_t> coords(rows.size() + cols.size());
  uint64_t i = 0, num_g = 0;
  bool skip = false;
  for (; i < rows.size(); i++) {
    for (uint64_t j = 0; j < num_g; j++) {
      if (coords[2 * j] == rows[i] && coords[2 * j + 1] == cols[i]) {
        skip = true;
        break;
      }
    }
    if (skip) {
      skip = false;
      continue;
    }
    coords[2 * num_g] = rows[i];
    coords[2 * num_g + 1] = cols[i];
    num_g++;
  }
  coords.resize(2 * num_g);
  std::cout << "coords.size: " << coords.size() << std::endl;

  // write array
  {
    auto array = tiledb::Array(ctx, path, TILEDB_WRITE);

    Query query(ctx, array);
    query.set_layout(TILEDB_UNORDERED)
        .set_buffer("a", a)
        .set_coordinates(coords);

    query.submit();
    query.finalize();
    array.close();
  }

  // read array
  {
    std::vector<int64_t> a_back(rows.size());
    std::vector<int64_t> coords_back(2 * rows.size());

    auto array = tiledb::Array(ctx, path, TILEDB_READ);

    Query query(ctx, array, TILEDB_READ);
    query.set_layout(TILEDB_UNORDERED);

    for (uint64_t i = 0; i < cols.size(); i++) {
      query.add_range(0, rows[i], rows[i]);
      query.add_range(1, cols[i], cols[i]);
      // std::cout << "adding: " << i << " " << rows[i] << " " << cols[i] << "
      // (data: " << a[i] << ")" << std::endl;
    }

    uint64_t est_bytes = query.est_result_size("a");
    size_t est_elements = est_bytes / sizeof(int64_t);
    std::cout << "estimated query result bytes: " << est_bytes << std::endl;

    std::cout << "a_back computed size: " << sizeof(int64_t) * a_back.size()
              << std::endl;

    query.set_buffer("a", a_back).set_buffer(TILEDB_COORDS, coords_back);

    // using the lines below (overallocating 8x) allows to return all the
    // expected results
    //.set_buffer(TILEDB_COORDS, (void*)coords_back.data(), 800)
    //.set_buffer("a", (void*)a_back.data(), 400);

    std::cout << "initial query status: " << query.query_status() << std::endl;
    std::cout << "submitting read query" << std::endl;
    query.submit();

    size_t a_read = 0;
    size_t c_read = 0;
    size_t retries = 0;
    while (query.query_status() == Query::Status::INCOMPLETE) {
      auto res_sizes = query.result_buffer_elements();
      // std::cout << " | rsz a: " << res_sizes["a"].second << " " <<
      // res_sizes[TILEDB_COORDS].second;
      a_read += res_sizes["a"].second;
      c_read += res_sizes[TILEDB_COORDS].second;

      query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
      query.set_buffer(TILEDB_COORDS, coords_back.data() + c_read,
                       coords_back.size() - c_read);
      retries += (coords_back.size() == c_read) ? 1 : 0;
      if (retries > 3)
        break;
      query.submit();
    }
    std::cout << "query status: " << query.query_status() << std::endl;
    query.finalize();

    std::cout << "a_back.size after: " << a_back.size()
              << " coords_back.size after: " << coords_back.size() << std::endl;
    std::cout << "a_back:" << std::endl;
    for (int kj = 0; kj < a_back.size(); kj++) {
      std::cout << "idx: " << kj << " coord: " << coords_back[2 * kj] << " "
                << coords_back[2 * kj + 1] << " -- value: " << a_back[kj]
                << std::endl;
    }

    array.close();
  }
 }
diff --git a/test_main.cc b/test_main.cc
 #include <tiledb/tiledb>

 #include <chrono>
 #include <cmath>
 #include <ostream>
 #include <random>
 #include <set>
 #include <vector>

 using namespace tiledb;

 // change this parameter to control the number of indexes per dimension
 // or use command line
 // - 3000 to 5000 is good for perf regression testing
 // - 200_000 will reproduce the bad_alloc on m5.2xlarge (may need larger if more
 //   RAM available)

 uint64_t index_size = 200;

 // other parameters
 const uint64_t dim_base = 3163;
 const uint64_t dim_max = pow(dim_base, 2);
 const uint64_t tile_size = 3000;

 // incomplete query parameters
 // - this parameter controls when we switch from doubling allocation to linear
 const uint64_t linear_alloc_threshold = (1024 ^ 2) * 4000; // 4 GB
 // - this parameter controls the additional bytes per linear allocation
 const uint64_t linear_alloc_bytes = (1024 ^ 2) * 2000; // 2 GB

 // rng -- the output is deterministic by default
 // static std::default_random_engine g_rng;
 std::default_random_engine g_rng;

 #define vtype(x) sizeof(decltype(x)::value_type)

 void create_array(Context ctx, std::string path) {
  Domain domain(ctx);
  domain
      .add_dimension(
          Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
      .add_dimension(
          Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));

  ArraySchema schema(ctx, TILEDB_SPARSE);
  schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});

  schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));

  tiledb::Array::create(path, schema);
 }

 std::ostream &operator<<(std::ostream &os, std::vector<int64_t> v) {
  os << "| vector size: " << v.size() << " |" << std::endl;
  os << "[ ";

  for (auto i = 0; i < 10; i++)
    os << v[i] << ", ";

  os << " ... ";

  for (auto i = v.size() - 10; i < v.size(); i++)
    os << v[i] << ", ";

  os << " ]";
  return os;
 }

 int64_t one_randint() {
  static std::uniform_int_distribution<> r(1, dim_max);
  // std::default_random_engine e;
  // std::uniform_int_distribution<> r(1, dim_max);
  return r(g_rng);
 }

 std::vector<int64_t> fill_randints(size_t size, bool unique = true) {
  std::set<int64_t> intset;
  std::vector<int64_t> output;
  output.resize(size);

  for (size_t i = 0; i < size; i++) {
    auto new_int = one_randint();
    output[i] = new_int;
  }

  // shuffle the vector
  // std::copy(intset.begin(), intset.end(), std::back_inserter(output));
  // std::shuffle(std::begin(output), std::end(output), g_rng);

  return output;
 }

 int main(int argc, char **argv) {
  if (argc > 1) {
    index_size = atoi(argv[1]);
  }

  // are we running against libtiledb dev branch?
  bool dev = true;
  {
    auto [major, minor, rev] = tiledb::version();
    std::cout << "tiledb_version: " << major << " " << minor << " " << rev
              << std::endl;

    dev = (major == 2) ? true : false;
  }

  // generate unique array path
  srand(time(NULL));
  std::string path = std::string("/tmp/py296rpr_") + std::to_string(rand());

  std::cout << "-- array base path: " << path << std::endl;

  // config options
  auto cfg = tiledb::Config();
  cfg["sm.dedup_coords"] = "false";
  cfg["sm.check_coord_dups"] = "false";
  cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
  cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
  cfg["sm.num_tbb_threads"] = 8;
  cfg["sm.num_reader_threads"] = 8;

  auto ctx = tiledb::Context(cfg);

  // create the array
  create_array(ctx, path);

  // create data
  std::vector<int64_t> rows(index_size);
  std::vector<int64_t> cols(index_size);
  std::vector<int64_t> a(index_size);
  std::vector<int64_t> coords;

  // create random data
  rows = fill_randints(index_size);
  cols = fill_randints(index_size);
  a = fill_randints(index_size);

  // zip the coordinates
  if (!dev) {
    coords.resize(2 * index_size);
    for (size_t i = 0; i < rows.size(); i++) {
      coords[2 * i] = rows[i];
      coords[2 * i + 1] = cols[i];
    }
  }

  std::cout << "preparing write query" << std::endl;
  {
    auto array = tiledb::Array(ctx, path, TILEDB_WRITE);

    Query query(ctx, array);
    query.set_layout(TILEDB_UNORDERED).set_buffer("a", a.data(), a.size());
    if (dev) {
      query.set_buffer("d1", rows.data(), rows.size())
          .set_buffer("d2", cols.data(), cols.size());
    } else {
      query.set_coordinates(coords);
    }
    query.submit();
    query.finalize();
  }

  std::vector<int64_t> a_back(a.size());
  std::vector<int64_t> coords_back;
  std::vector<int64_t> coords_back_d1;
  std::vector<int64_t> coords_back_d2;

  if (dev) {
    coords_back_d1.resize(rows.size());
    coords_back_d2.resize(rows.size());
  } else {
    coords_back.resize(rows.size() * 2);
  }

  size_t a_read = 0;
  size_t coords_read = 0;

  tiledb::Stats::enable();

  std::cout << "preparing read query" << std::endl;
  // read array
  {
    auto array = tiledb::Array(ctx, path, TILEDB_READ);

    Query query(ctx, array, TILEDB_READ);

    for (size_t i = 0; i < rows.size(); i++) {
      query.add_range(0, rows[i], rows[i]).add_range(1, cols[i], cols[i]);
    }

    std::cout << "-- setting buffers --" << std::endl;
    query.set_layout(TILEDB_UNORDERED).set_buffer("a", a_back);

    if (dev) {
      query.set_buffer("d1", coords_back_d1).set_buffer("d2", coords_back_d2);
    } else {
      query.set_buffer(TILEDB_COORDS, coords_back);
    }

    std::cout << "-- calling est_result_size --" << std::endl;

    auto start = std::chrono::high_resolution_clock::now();
    std::cout << "estimated query result bytes: " << query.est_result_size("a")
              << std::endl;
    std::chrono::duration<double> elapsed =
        std::chrono::high_resolution_clock::now() - start;
    std::cout << " -- elapsed " << elapsed.count() << std::endl;

    start = std::chrono::high_resolution_clock::now();

    /******************************************************************/
    std::cout << "submitting read query" << std::endl;
    query.submit();
    std::cout << "query status: " << query.query_status() << std::endl;
    /******************************************************************/

    size_t retries = 0;
    uint64_t est_bytes = 0;

    while (query.query_status() == Query::Status::INCOMPLETE) {
      if (retries > 100) {
        std::cout << ".. !!! exceeded retries, breaking..." << std::endl;
        std::cout << ".. !!! final query status: " << query.query_status()
                  << std::endl;
        break;
      }

      std::cout << "..retrying incomplete..." << std::endl;

      auto res_sizes = query.result_buffer_elements();

      a_read += res_sizes["a"].second;

      if (dev) {
        coords_read += res_sizes["d1"].second;
      } else {
        coords_read += res_sizes[TILEDB_COORDS].second;
      }

      est_bytes = query.est_result_size("a");
      std::cout << "current coords_read: " << coords_read
                << " | estimated query result bytes: " << est_bytes
                << std::endl;

      // reallocate before resubmitting incomplete queries
      if (true) {
        size_t a_new_elems;

        if ((a_back.size() * sizeof(int64_t)) < linear_alloc_threshold) {
          // reallocate by doubling until hitting `linear_alloc_threshold`

          std::cout << "  reallocating 2x!" << std::endl;
          a_new_elems = a_back.size() * 2;

        } else if ((a_back.size() * sizeof(int64_t)) < (2 * est_bytes)) {
          // linear reallocation if we are under 2x the estimate

          auto a_new_bytes =
              (a_back.size() * sizeof(vtype(a_back))) + linear_alloc_bytes;
          a_new_elems = a_new_bytes / sizeof(vtype(a_back));

          std::cout << "  reallocating linear! elems: " << a_new_elems
                    << std::endl;

          retries += 1;
        }

        a_back.resize(a_new_elems);
        if (dev) {
          coords_back_d1.resize(a_new_elems);
          coords_back_d2.resize(a_new_elems);
        } else {
          coords_back.resize(a_new_elems * 2);
        }
      }

      query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
      if (dev) {
        query
            .set_buffer("d1", coords_back_d1.data() + coords_read,
                        coords_back_d1.size() - coords_read)
            .set_buffer("d2", coords_back_d2.data() + coords_read,
                        coords_back_d2.size() - coords_read);
      } else {
        query.set_buffer(TILEDB_COORDS, coords_back.data() + (coords_read),
                         coords_back.size() - (coords_read));
      }

      query.submit();
    }

    std::cout << "query status: " << query.query_status() << std::endl;

    elapsed = std::chrono::high_resolution_clock::now() - start;
    std::cout << " READ elapsed " << elapsed.count() << std::endl;

    auto res_sizes = query.result_buffer_elements();
    a_read += res_sizes["a"].second;

    if (dev) {
      coords_read += res_sizes["d1"].second;
    } else {
      coords_read += res_sizes[TILEDB_COORDS].second;
    }
  }

  a_back.resize(a_read);
  if (dev) {
    coords_back_d1.resize(coords_read);
    coords_back_d2.resize(coords_read);
  } else {
    coords_back.resize(coords_read);
  }

  std::string stout;
  tiledb::Stats::dump(&stout);

  std::cout << "STATS: " << std::endl;
  std::cout << stout << std::endl;
  std::cout << "----------------------------------------------------------";

  std::cout << "final coords_read: " << coords_read << std::endl;
  std::cout << "final a_read: " << a_read << std::endl;
  std::cout << "final a_back.size(): " << a_back.size() << std::endl
            << std::endl;

  std::cout << "----------------------------------------------------------"
            << std::endl;

  std::cout << "Data check: printing first and last 10 elements of each vector"
            << std::endl
            << std::endl;
  std::cout << "Input data:" << std::endl;
  std::cout << ".. rows: " << rows << std::endl;
  std::cout << ".. cols: " << cols << std::endl;
  std::cout << ".. data 'a': " << a << std::endl << std::endl;

  std::cout << "Query result data:" << std::endl;
  std::cout << "data 'a' returned: " << a_back << std::endl;

  auto start = std::chrono::high_resolution_clock::now();

  auto a_set = std::set<int64_t>(a.begin(), a.end());
  auto a_back_set = std::set<int64_t>(a_back.begin(), a_back.end());
  std::cout << "'a == a_back': " << std::boolalpha << (a_set == a_back_set)
            << std::endl;

  if (dev) {
    std::cout << "coords_d1 returned: " << coords_back_d1 << std::endl;
    std::cout << "coords_d2 returned: " << coords_back_d2 << std::endl;

    auto cback1_set =
        std::set<int64_t>(coords_back_d1.begin(), coords_back_d1.end());
    auto cback2_set =
        std::set<int64_t>(coords_back_d2.begin(), coords_back_d2.end());
    auto rows_set = std::set<int64_t>(rows.begin(), rows.end());
    auto cols_set = std::set<int64_t>(cols.begin(), cols.end());
    std::cout << "'coords_d1 == rows': " << std::boolalpha
              << (cback1_set == rows_set) << std::endl;
    std::cout << "'coords_d2 == cols': " << std::boolalpha
              << (cback2_set == cols_set) << std::endl;

  } else {
    auto coords_set = std::set<int64_t>(coords.begin(), coords.end());
    auto cback_set = std::set<int64_t>(coords_back.begin(), coords_back.end());
    std::cout << "(ZIPPED) coords returned: " << coords << std::endl;
    std::cout << "'coords_back == coords': " << std::boolalpha
              << (coords_set == cback_set) << std::endl;
  }

  std::chrono::duration<double> elapsed =
      std::chrono::high_resolution_clock::now() - start;
  std::cout << "data check elapsed " << elapsed.count() << std::endl;
 }
	project(foo)
	cmake_minimum_required(VERSION 3.12)
	set(CMAKE_CXX_STANDARD 14)
	set(CMAKE_BUILD_TYPE Debug)

	###
	set(SRCS test_incomplete test_main)
	###

	set(TileDB_DIR "${TileDB_DIST}/lib64/cmake/TileDB")
	find_package(TileDB REQUIRED)

	foreach (SRC ${SRCS})

	add_executable(${SRC} ${SRC}.cc)

	target_link_libraries(${SRC} PRIVATE TileDB::tiledb_shared)

	endforeach()
	#include <tiledb/tiledb>

	#include <cmath>
	#include <vector>

	using namespace tiledb;

	const uint64_t dim_base = 3163;
	const uint64_t dim_max = dim_base ^ 2;
	const uint64_t index_size = 50;
	const uint64_t tile_size = 3000;

	bool debug = false;

	void create_array(Context ctx, std::string path) {
	Domain domain(ctx);
	domain
	.add_dimension(
	Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
	.add_dimension(
	Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));

	ArraySchema schema(ctx, TILEDB_SPARSE);
	schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});

	schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));

	tiledb::Array::create(path, schema);
	}

	int main() {
	// print tiledb version to be safe
	auto [major, minor, rev] = tiledb::version();
	std::cout << "tiledb_version: " << major << " " << minor << " " << rev
	<< std::endl;

	// create a temp array name
	srand(time(NULL));
	std::stringstream ptmp;
	ptmp << "/tmp/py296rpr2_" << rand();
	std::string path = ptmp.str();
	std::cout << "[path]: " << path << std::endl;

	auto cfg = tiledb::Config();
	// cfg["sm.num_tbb_threads"] = 1;
	cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
	cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
	auto ctx = tiledb::Context(cfg);

	create_array(ctx, path);

	std::vector<int64_t> rows = {
	2909, 2799, 1325, 2016, 6, 3003, 2348, 2148, 1574, 2245,
	2065, 2792, 1765, 2811, 1186, 1298, 1204, 312, 1195, 1619,
	1049, 2650, 2525, 3122, 2427, 1199, 2056, 2410, 1198, 2988,
	2441, 1680, 897, 2502, 3149, 1063, 1628, 2631, 451, 2287,
	51, 1589, 1043, 2394, 643, 817, 869, 1381, 202, 1498};

	std::vector<int64_t> cols = {
	458, 155, 2530, 2632, 130, 1365, 772, 172, 781, 2195,
	1290, 2191, 1728, 521, 2810, 780, 1789, 308, 1280, 409,
	840, 1385, 2673, 1114, 1396, 183, 109, 3112, 17, 454,
	308, 2385, 1954, 1529, 1237, 2921, 1732, 1149, 2495, 2502,
	1700, 266, 296, 834, 3159, 939, 309, 1597, 1071, 284};

	std::vector<int64_t> a = {
	459, 515, 878, 107, 779, 1189, 2160, 1355, 1796, 2198,
	1576, 2362, 226, 2059, 2078, 2694, 1824, 1320, 1908, 2968,
	1547, 681, 640, 1082, 2115, 2937, 965, 2690, 1572, 701,
	596, 224, 1439, 799, 2358, 241, 743, 2370, 2188, 1713,
	1114, 3107, 783, 922, 1124, 2934, 1928, 3007, 2894, 1687};

	// still need zipped coords, w/out duplicates
	// no-op here but was used in bigger example
	std::vector<int64_t> coords(rows.size() + cols.size());
	uint64_t i = 0, num_g = 0;
	bool skip = false;
	for (; i < rows.size(); i++) {
	for (uint64_t j = 0; j < num_g; j++) {
	if (coords[2 * j] == rows[i] && coords[2 * j + 1] == cols[i]) {
	skip = true;
	break;
	}
	}
	if (skip) {
	skip = false;
	continue;
	}
	coords[2 * num_g] = rows[i];
	coords[2 * num_g + 1] = cols[i];
	num_g++;
	}
	coords.resize(2 * num_g);
	std::cout << "coords.size: " << coords.size() << std::endl;

	// write array
	{
	auto array = tiledb::Array(ctx, path, TILEDB_WRITE);

	Query query(ctx, array);
	query.set_layout(TILEDB_UNORDERED)
	.set_buffer("a", a)
	.set_coordinates(coords);

	query.submit();
	query.finalize();
	array.close();
	}

	// read array
	{
	std::vector<int64_t> a_back(rows.size());
	std::vector<int64_t> coords_back(2 * rows.size());

	auto array = tiledb::Array(ctx, path, TILEDB_READ);

	Query query(ctx, array, TILEDB_READ);
	query.set_layout(TILEDB_UNORDERED);

	for (uint64_t i = 0; i < cols.size(); i++) {
	query.add_range(0, rows[i], rows[i]);
	query.add_range(1, cols[i], cols[i]);
	// std::cout << "adding: " << i << " " << rows[i] << " " << cols[i] << "
	// (data: " << a[i] << ")" << std::endl;
	}

	uint64_t est_bytes = query.est_result_size("a");
	size_t est_elements = est_bytes / sizeof(int64_t);
	std::cout << "estimated query result bytes: " << est_bytes << std::endl;

	std::cout << "a_back computed size: " << sizeof(int64_t) * a_back.size()
	<< std::endl;

	query.set_buffer("a", a_back).set_buffer(TILEDB_COORDS, coords_back);

	// using the lines below (overallocating 8x) allows to return all the
	// expected results
	//.set_buffer(TILEDB_COORDS, (void*)coords_back.data(), 800)
	//.set_buffer("a", (void*)a_back.data(), 400);

	std::cout << "initial query status: " << query.query_status() << std::endl;
	std::cout << "submitting read query" << std::endl;
	query.submit();

	size_t a_read = 0;
	size_t c_read = 0;
	size_t retries = 0;
	while (query.query_status() == Query::Status::INCOMPLETE) {
	auto res_sizes = query.result_buffer_elements();
	// std::cout << " \| rsz a: " << res_sizes["a"].second << " " <<
	// res_sizes[TILEDB_COORDS].second;
	a_read += res_sizes["a"].second;
	c_read += res_sizes[TILEDB_COORDS].second;

	query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
	query.set_buffer(TILEDB_COORDS, coords_back.data() + c_read,
	coords_back.size() - c_read);
	retries += (coords_back.size() == c_read) ? 1 : 0;
	if (retries > 3)
	break;
	query.submit();
	}
	std::cout << "query status: " << query.query_status() << std::endl;
	query.finalize();

	std::cout << "a_back.size after: " << a_back.size()
	<< " coords_back.size after: " << coords_back.size() << std::endl;
	std::cout << "a_back:" << std::endl;
	for (int kj = 0; kj < a_back.size(); kj++) {
	std::cout << "idx: " << kj << " coord: " << coords_back[2 * kj] << " "
	<< coords_back[2 * kj + 1] << " -- value: " << a_back[kj]
	<< std::endl;
	}

	array.close();
	}
	}
	#include <tiledb/tiledb>

	#include <chrono>
	#include <cmath>
	#include <ostream>
	#include <random>
	#include <set>
	#include <vector>

	using namespace tiledb;

	// change this parameter to control the number of indexes per dimension
	// or use command line
	// - 3000 to 5000 is good for perf regression testing
	// - 200_000 will reproduce the bad_alloc on m5.2xlarge (may need larger if more
	// RAM available)

	uint64_t index_size = 200;

	// other parameters
	const uint64_t dim_base = 3163;
	const uint64_t dim_max = pow(dim_base, 2);
	const uint64_t tile_size = 3000;

	// incomplete query parameters
	// - this parameter controls when we switch from doubling allocation to linear
	const uint64_t linear_alloc_threshold = (1024 ^ 2) * 4000; // 4 GB
	// - this parameter controls the additional bytes per linear allocation
	const uint64_t linear_alloc_bytes = (1024 ^ 2) * 2000; // 2 GB

	// rng -- the output is deterministic by default
	// static std::default_random_engine g_rng;
	std::default_random_engine g_rng;

	#define vtype(x) sizeof(decltype(x)::value_type)

	void create_array(Context ctx, std::string path) {
	Domain domain(ctx);
	domain
	.add_dimension(
	Dimension::create<int64_t>(ctx, "d1", {{1, dim_max}}, tile_size))
	.add_dimension(
	Dimension::create<int64_t>(ctx, "d2", {{1, dim_max}}, tile_size));

	ArraySchema schema(ctx, TILEDB_SPARSE);
	schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR}});

	schema.add_attribute(Attribute::create<int64_t>(ctx, "a"));

	tiledb::Array::create(path, schema);
	}

	std::ostream &operator<<(std::ostream &os, std::vector<int64_t> v) {
	os << "\| vector size: " << v.size() << " \|" << std::endl;
	os << "[ ";

	for (auto i = 0; i < 10; i++)
	os << v[i] << ", ";

	os << " ... ";

	for (auto i = v.size() - 10; i < v.size(); i++)
	os << v[i] << ", ";

	os << " ]";
	return os;
	}

	int64_t one_randint() {
	static std::uniform_int_distribution<> r(1, dim_max);
	// std::default_random_engine e;
	// std::uniform_int_distribution<> r(1, dim_max);
	return r(g_rng);
	}

	std::vector<int64_t> fill_randints(size_t size, bool unique = true) {
	std::set<int64_t> intset;
	std::vector<int64_t> output;
	output.resize(size);

	for (size_t i = 0; i < size; i++) {
	auto new_int = one_randint();
	output[i] = new_int;
	}

	// shuffle the vector
	// std::copy(intset.begin(), intset.end(), std::back_inserter(output));
	// std::shuffle(std::begin(output), std::end(output), g_rng);

	return output;
	}

	int main(int argc, char **argv) {
	if (argc > 1) {
	index_size = atoi(argv[1]);
	}

	// are we running against libtiledb dev branch?
	bool dev = true;
	{
	auto [major, minor, rev] = tiledb::version();
	std::cout << "tiledb_version: " << major << " " << minor << " " << rev
	<< std::endl;

	dev = (major == 2) ? true : false;
	}

	// generate unique array path
	srand(time(NULL));
	std::string path = std::string("/tmp/py296rpr_") + std::to_string(rand());

	std::cout << "-- array base path: " << path << std::endl;

	// config options
	auto cfg = tiledb::Config();
	cfg["sm.dedup_coords"] = "false";
	cfg["sm.check_coord_dups"] = "false";
	cfg["sm.memory_budget"] = (uint64_t)(pow(1024, 2) * 2000000);
	cfg["sm.memory_budget_var"] = (uint64_t)(pow(1024, 2) * 2000000);
	cfg["sm.num_tbb_threads"] = 8;
	cfg["sm.num_reader_threads"] = 8;

	auto ctx = tiledb::Context(cfg);

	// create the array
	create_array(ctx, path);

	// create data
	std::vector<int64_t> rows(index_size);
	std::vector<int64_t> cols(index_size);
	std::vector<int64_t> a(index_size);
	std::vector<int64_t> coords;

	// create random data
	rows = fill_randints(index_size);
	cols = fill_randints(index_size);
	a = fill_randints(index_size);

	// zip the coordinates
	if (!dev) {
	coords.resize(2 * index_size);
	for (size_t i = 0; i < rows.size(); i++) {
	coords[2 * i] = rows[i];
	coords[2 * i + 1] = cols[i];
	}
	}

	std::cout << "preparing write query" << std::endl;
	{
	auto array = tiledb::Array(ctx, path, TILEDB_WRITE);

	Query query(ctx, array);
	query.set_layout(TILEDB_UNORDERED).set_buffer("a", a.data(), a.size());
	if (dev) {
	query.set_buffer("d1", rows.data(), rows.size())
	.set_buffer("d2", cols.data(), cols.size());
	} else {
	query.set_coordinates(coords);
	}
	query.submit();
	query.finalize();
	}

	std::vector<int64_t> a_back(a.size());
	std::vector<int64_t> coords_back;
	std::vector<int64_t> coords_back_d1;
	std::vector<int64_t> coords_back_d2;

	if (dev) {
	coords_back_d1.resize(rows.size());
	coords_back_d2.resize(rows.size());
	} else {
	coords_back.resize(rows.size() * 2);
	}

	size_t a_read = 0;
	size_t coords_read = 0;

	tiledb::Stats::enable();

	std::cout << "preparing read query" << std::endl;
	// read array
	{
	auto array = tiledb::Array(ctx, path, TILEDB_READ);

	Query query(ctx, array, TILEDB_READ);

	for (size_t i = 0; i < rows.size(); i++) {
	query.add_range(0, rows[i], rows[i]).add_range(1, cols[i], cols[i]);
	}

	std::cout << "-- setting buffers --" << std::endl;
	query.set_layout(TILEDB_UNORDERED).set_buffer("a", a_back);

	if (dev) {
	query.set_buffer("d1", coords_back_d1).set_buffer("d2", coords_back_d2);
	} else {
	query.set_buffer(TILEDB_COORDS, coords_back);
	}

	std::cout << "-- calling est_result_size --" << std::endl;

	auto start = std::chrono::high_resolution_clock::now();
	std::cout << "estimated query result bytes: " << query.est_result_size("a")
	<< std::endl;
	std::chrono::duration<double> elapsed =
	std::chrono::high_resolution_clock::now() - start;
	std::cout << " -- elapsed " << elapsed.count() << std::endl;

	start = std::chrono::high_resolution_clock::now();

	/******************************************************************/
	std::cout << "submitting read query" << std::endl;
	query.submit();
	std::cout << "query status: " << query.query_status() << std::endl;
	/******************************************************************/

	size_t retries = 0;
	uint64_t est_bytes = 0;

	while (query.query_status() == Query::Status::INCOMPLETE) {
	if (retries > 100) {
	std::cout << ".. !!! exceeded retries, breaking..." << std::endl;
	std::cout << ".. !!! final query status: " << query.query_status()
	<< std::endl;
	break;
	}

	std::cout << "..retrying incomplete..." << std::endl;

	auto res_sizes = query.result_buffer_elements();

	a_read += res_sizes["a"].second;

	if (dev) {
	coords_read += res_sizes["d1"].second;
	} else {
	coords_read += res_sizes[TILEDB_COORDS].second;
	}

	est_bytes = query.est_result_size("a");
	std::cout << "current coords_read: " << coords_read
	<< " \| estimated query result bytes: " << est_bytes
	<< std::endl;

	// reallocate before resubmitting incomplete queries
	if (true) {
	size_t a_new_elems;

	if ((a_back.size() * sizeof(int64_t)) < linear_alloc_threshold) {
	// reallocate by doubling until hitting `linear_alloc_threshold`

	std::cout << " reallocating 2x!" << std::endl;
	a_new_elems = a_back.size() * 2;

	} else if ((a_back.size() * sizeof(int64_t)) < (2 * est_bytes)) {
	// linear reallocation if we are under 2x the estimate

	auto a_new_bytes =
	(a_back.size() * sizeof(vtype(a_back))) + linear_alloc_bytes;
	a_new_elems = a_new_bytes / sizeof(vtype(a_back));

	std::cout << " reallocating linear! elems: " << a_new_elems
	<< std::endl;

	retries += 1;
	}

	a_back.resize(a_new_elems);
	if (dev) {
	coords_back_d1.resize(a_new_elems);
	coords_back_d2.resize(a_new_elems);
	} else {
	coords_back.resize(a_new_elems * 2);
	}
	}

	query.set_buffer("a", a_back.data() + a_read, a_back.size() - a_read);
	if (dev) {
	query
	.set_buffer("d1", coords_back_d1.data() + coords_read,
	coords_back_d1.size() - coords_read)
	.set_buffer("d2", coords_back_d2.data() + coords_read,
	coords_back_d2.size() - coords_read);
	} else {
	query.set_buffer(TILEDB_COORDS, coords_back.data() + (coords_read),
	coords_back.size() - (coords_read));
	}

	query.submit();
	}

	std::cout << "query status: " << query.query_status() << std::endl;

	elapsed = std::chrono::high_resolution_clock::now() - start;
	std::cout << " READ elapsed " << elapsed.count() << std::endl;

	auto res_sizes = query.result_buffer_elements();
	a_read += res_sizes["a"].second;

	if (dev) {
	coords_read += res_sizes["d1"].second;
	} else {
	coords_read += res_sizes[TILEDB_COORDS].second;
	}
	}

	a_back.resize(a_read);
	if (dev) {
	coords_back_d1.resize(coords_read);
	coords_back_d2.resize(coords_read);
	} else {
	coords_back.resize(coords_read);
	}

	std::string stout;
	tiledb::Stats::dump(&stout);

	std::cout << "STATS: " << std::endl;
	std::cout << stout << std::endl;
	std::cout << "----------------------------------------------------------";

	std::cout << "final coords_read: " << coords_read << std::endl;
	std::cout << "final a_read: " << a_read << std::endl;
	std::cout << "final a_back.size(): " << a_back.size() << std::endl
	<< std::endl;

	std::cout << "----------------------------------------------------------"
	<< std::endl;

	std::cout << "Data check: printing first and last 10 elements of each vector"
	<< std::endl
	<< std::endl;
	std::cout << "Input data:" << std::endl;
	std::cout << ".. rows: " << rows << std::endl;
	std::cout << ".. cols: " << cols << std::endl;
	std::cout << ".. data 'a': " << a << std::endl << std::endl;

	std::cout << "Query result data:" << std::endl;
	std::cout << "data 'a' returned: " << a_back << std::endl;

	auto start = std::chrono::high_resolution_clock::now();

	auto a_set = std::set<int64_t>(a.begin(), a.end());
	auto a_back_set = std::set<int64_t>(a_back.begin(), a_back.end());
	std::cout << "'a == a_back': " << std::boolalpha << (a_set == a_back_set)
	<< std::endl;

	if (dev) {
	std::cout << "coords_d1 returned: " << coords_back_d1 << std::endl;
	std::cout << "coords_d2 returned: " << coords_back_d2 << std::endl;

	auto cback1_set =
	std::set<int64_t>(coords_back_d1.begin(), coords_back_d1.end());
	auto cback2_set =
	std::set<int64_t>(coords_back_d2.begin(), coords_back_d2.end());
	auto rows_set = std::set<int64_t>(rows.begin(), rows.end());
	auto cols_set = std::set<int64_t>(cols.begin(), cols.end());
	std::cout << "'coords_d1 == rows': " << std::boolalpha
	<< (cback1_set == rows_set) << std::endl;
	std::cout << "'coords_d2 == cols': " << std::boolalpha
	<< (cback2_set == cols_set) << std::endl;

	} else {
	auto coords_set = std::set<int64_t>(coords.begin(), coords.end());
	auto cback_set = std::set<int64_t>(coords_back.begin(), coords_back.end());
	std::cout << "(ZIPPED) coords returned: " << coords << std::endl;
	std::cout << "'coords_back == coords': " << std::boolalpha
	<< (coords_set == cback_set) << std::endl;
	}

	std::chrono::duration<double> elapsed =
	std::chrono::high_resolution_clock::now() - start;
	std::cout << "data check elapsed " << elapsed.count() << std::endl;
	}