Skip to content

Instantly share code, notes, and snippets.

@mpenick
Created January 9, 2019 15:47
Show Gist options
  • Save mpenick/87f42af22ae110095b07702fad4d1c07 to your computer and use it in GitHub Desktop.
Save mpenick/87f42af22ae110095b07702fad4d1c07 to your computer and use it in GitHub Desktop.
Vector vs HashMap for result metadata
#include <benchmark/benchmark.h>
#include <iostream>
#include <random>
#include "small_dense_hash_map.hpp"
#include "dense_hash_map.hpp"
#include "hash_table.hpp"
#include "string_ref.hpp"
using namespace cass;
struct ColumnDefinition1 : public HashTableEntry<ColumnDefinition1> {
ColumnDefinition1(StringRef name = StringRef())
: name(name) { }
StringRef name;
size_t extra;
};
struct ColumnDefinition2 {
ColumnDefinition2(StringRef name = StringRef())
: name(name) { }
bool operator==(const ColumnDefinition2& other) const {
return name.iequals(other.name);
}
StringRef name;
size_t extra;
};
template <class K, class V, class HashFunc, class EqualsFunc>
class LinkedHashMap {
struct Entry {
Entry(const V& value, size_t index)
: value(value)
, index(index)
, next(-1) { }
Entry(V&& value, size_t index)
: value(std::move(value))
, index(index)
, next(-1) { }
V value;
size_t index;
size_t next;
};
public:
struct Iterator {
V& operator*() { return entry->value; }
Iterator(const Entry* entry, const LinkedHashMap& map)
: entry(entry)
, map(map) { }
Iterator& operator++() {
entry = entry->next == -1 ? nullptr : map.entries_[entry->next];
return *this;
}
Iterator operator++(int) {
Entry* temp(entry);
entry = entry->next == -1 ? nullptr : map.entries_[entry->next];
return Iterator(temp, map);
}
bool operator==(Iterator other) const {
return entry == other.entry;
}
bool operator!=(Iterator other) const {
return !(*this == other);
}
const Entry* entry;
const LinkedHashMap& map;
};
public:
LinkedHashMap(size_t capacity = 16) {
mapping_.resize(capacity);
entries_.reserve(capacity);
}
void set_empty_key(const K& key) {
mapping_.set_empty_key(key);
}
void add(const K& key, const V& value) {
const size_t index = entries_.size();
entries_.push_back(Entry(value, index));
put(key, index);
}
void add(const K& key, V&& value) {
const size_t index = entries_.size();
entries_.emplace_back(Entry(std::move(value), index));
put(key, index);
}
Iterator find(const K& key) const {
typename Map::const_iterator it = mapping_.find(key);
if (it != mapping_.end()) {
return Iterator(&entries_[it->second], *this);
} else {
return Iterator(nullptr, *this);
}
}
Iterator end() const {
return Iterator(nullptr, *this);
}
private:
typedef SmallDenseHashMap<K, size_t, 24, HashFunc, EqualsFunc> Map;
typedef SmallVector<Entry, 16> Vec;
void put(const K& key, size_t index) {
typename Map::iterator it = mapping_.find(key);
if (it == mapping_.end()) {
mapping_.insert(std::make_pair(key, index));
} else {
Entry* entry(&entries_[it->second]);
while (entry->next != -1) {
entry = &entries_[entry->next];
}
entry->next = index;
}
}
private:
Map mapping_;
Vec entries_;
};
typedef LinkedHashMap<StringRef, ColumnDefinition2, StringRefIHash, StringRefIEquals> ColumnDefinitions2;
typedef SmallVector<ColumnDefinition2, 16> ColumnDefinitions3;
struct Finder {
Finder(StringRef name)
: name(name) { }
bool operator()(const ColumnDefinition2& def) const {
return def.name.iequals(name);
}
StringRef name;
};
struct CaseSensitiveFinder {
CaseSensitiveFinder(StringRef name)
: name(name) { }
bool operator()(const ColumnDefinition2& def) const {
return def.name.equals(name);
}
StringRef name;
};
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<size_t> column_name_lenght_dist(1, 100);
String generate_random(size_t size) {
String s;
s.reserve(size);
static const char* chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
std::uniform_int_distribution<> dist(0, sizeof(chars) - 1);
for (size_t i = 0; i < size; ++i) {
s.push_back(chars[dist(gen)]);
}
return s;
}
using Columns = Vector<String>;
Columns generate_columns(size_t count) {
Columns columns;
columns.reserve(count);
for (size_t i = 0; i < count; ++i) {
columns.push_back(generate_random(column_name_lenght_dist(gen)));
}
return columns;
}
Columns generate_to_find(const Columns& columns, size_t count) {
Columns to_find;
for (size_t i = 0; i < count; ++i) {
to_find.push_back(columns[i % columns.size()]);
}
return to_find;
}
Columns all_columns(generate_columns(4096));
static void BM_ResultMetadataHashTable(benchmark::State& state) {
Columns columns(all_columns.begin(), all_columns.begin() + state.range(0));
Columns to_find(generate_to_find(columns, state.range(1)));
for (auto _ : state) {
benchmark::DoNotOptimize(columns);
CaseInsensitiveHashTable<ColumnDefinition1> defs(columns.size());
for (auto column : columns) {
defs.add(ColumnDefinition1(column));
benchmark::DoNotOptimize(&defs);
}
for (auto column : to_find) {
IndexVec indices;
defs.get_indices(column, &indices);
benchmark::DoNotOptimize(&indices);
}
}
}
static void BM_ResultMetadataDenseHashMap(benchmark::State& state) {
Columns columns(all_columns.begin(), all_columns.begin() + state.range(0));
Columns to_find(generate_to_find(columns, state.range(1)));
for (auto _ : state) {
benchmark::DoNotOptimize(columns);
ColumnDefinitions2 defs(columns.size());
defs.set_empty_key(StringRef());
for (auto column : columns) {
defs.add(column, ColumnDefinition2(column));
benchmark::DoNotOptimize(&defs);
}
for (auto column : to_find) {
ColumnDefinitions2::Iterator it = defs.find(column);
benchmark::DoNotOptimize(it);
}
}
}
static void BM_ResultMetadataVector(benchmark::State& state) {
Columns columns(all_columns.begin(), all_columns.begin() + state.range(0));
Columns to_find(generate_to_find(columns, state.range(1)));
for (auto _ : state) {
benchmark::DoNotOptimize(columns);
ColumnDefinitions3 defs;
defs.reserve(columns.size());
for (auto column : columns) {
defs.emplace_back(ColumnDefinition2(column));
benchmark::DoNotOptimize(&defs);
}
for (auto column : to_find) {
StringRef name(column);
ColumnDefinitions3::const_iterator it;
if (name.size() > 0 && name.front() == '"' && name.back() == '"') {
name = name.substr(1, name.size() - 2);
it = std::find_if(defs.begin(), defs.end(), CaseSensitiveFinder(name));
} else {
it = std::find_if(defs.begin(), defs.end(), Finder(name));
}
benchmark::DoNotOptimize(it);
}
}
}
BENCHMARK(BM_ResultMetadataDenseHashMap)->Ranges({{1, 4096}, {1, 4096}});
BENCHMARK(BM_ResultMetadataHashTable)->Ranges({{1, 4096}, {1, 4096}});
BENCHMARK(BM_ResultMetadataVector)->Ranges({{1, 4096}, {1, 4096}});
BENCHMARK_MAIN();
-------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------
BM_ResultMetadataDenseHashMap/1/1 151 ns 151 ns 4415292
BM_ResultMetadataDenseHashMap/8/1 1659 ns 1659 ns 442730
BM_ResultMetadataDenseHashMap/64/1 12948 ns 12948 ns 53686
BM_ResultMetadataDenseHashMap/512/1 108033 ns 108031 ns 6344
BM_ResultMetadataDenseHashMap/4096/1 914680 ns 914673 ns 742
BM_ResultMetadataDenseHashMap/1/8 657 ns 657 ns 1064311
BM_ResultMetadataDenseHashMap/8/8 4231 ns 4230 ns 170891
BM_ResultMetadataDenseHashMap/64/8 15325 ns 15324 ns 45168
BM_ResultMetadataDenseHashMap/512/8 113724 ns 113721 ns 6317
BM_ResultMetadataDenseHashMap/4096/8 888678 ns 888671 ns 763
BM_ResultMetadataDenseHashMap/1/64 4714 ns 4714 ns 157221
BM_ResultMetadataDenseHashMap/8/64 20156 ns 20156 ns 34721
BM_ResultMetadataDenseHashMap/64/64 34016 ns 34015 ns 21788
BM_ResultMetadataDenseHashMap/512/64 126937 ns 126936 ns 5423
BM_ResultMetadataDenseHashMap/4096/64 913562 ns 913554 ns 747
BM_ResultMetadataDenseHashMap/1/512 36563 ns 36562 ns 17408
BM_ResultMetadataDenseHashMap/8/512 152773 ns 152770 ns 4598
BM_ResultMetadataDenseHashMap/64/512 169100 ns 169099 ns 3545
BM_ResultMetadataDenseHashMap/512/512 271164 ns 271163 ns 2569
BM_ResultMetadataDenseHashMap/4096/512 1087255 ns 1087240 ns 624
BM_ResultMetadataDenseHashMap/1/4096 285127 ns 285123 ns 2449
BM_ResultMetadataDenseHashMap/8/4096 1277325 ns 1277315 ns 573
BM_ResultMetadataDenseHashMap/64/4096 1270248 ns 1270238 ns 547
BM_ResultMetadataDenseHashMap/512/4096 1510374 ns 1510340 ns 484
BM_ResultMetadataDenseHashMap/4096/4096 2203775 ns 2203756 ns 314
BM_ResultMetadataHashTable/1/1 95 ns 95 ns 7314335
BM_ResultMetadataHashTable/8/1 930 ns 930 ns 551217
BM_ResultMetadataHashTable/64/1 7510 ns 7510 ns 90234
BM_ResultMetadataHashTable/512/1 66511 ns 66511 ns 10499
BM_ResultMetadataHashTable/4096/1 515914 ns 515910 ns 1291
BM_ResultMetadataHashTable/1/8 302 ns 302 ns 2373634
BM_ResultMetadataHashTable/8/8 3136 ns 3136 ns 204547
BM_ResultMetadataHashTable/64/8 9629 ns 9628 ns 70926
BM_ResultMetadataHashTable/512/8 71835 ns 71834 ns 10042
BM_ResultMetadataHashTable/4096/8 525207 ns 525203 ns 1277
BM_ResultMetadataHashTable/1/64 2010 ns 2010 ns 369017
BM_ResultMetadataHashTable/8/64 19175 ns 19174 ns 36802
BM_ResultMetadataHashTable/64/64 27996 ns 27996 ns 26483
BM_ResultMetadataHashTable/512/64 82546 ns 82546 ns 8152
BM_ResultMetadataHashTable/4096/64 535020 ns 535008 ns 1253
BM_ResultMetadataHashTable/1/512 15545 ns 15545 ns 45863
BM_ResultMetadataHashTable/8/512 146792 ns 146790 ns 4834
BM_ResultMetadataHashTable/64/512 165854 ns 165853 ns 4396
BM_ResultMetadataHashTable/512/512 225282 ns 225280 ns 3088
BM_ResultMetadataHashTable/4096/512 686897 ns 686888 ns 991
BM_ResultMetadataHashTable/1/4096 116952 ns 116952 ns 4847
BM_ResultMetadataHashTable/8/4096 1147010 ns 1147002 ns 580
BM_ResultMetadataHashTable/64/4096 1293699 ns 1271552 ns 566
BM_ResultMetadataHashTable/512/4096 1351422 ns 1351304 ns 517
BM_ResultMetadataHashTable/4096/4096 1945733 ns 1945316 ns 377
BM_ResultMetadataVector/1/1 48 ns 48 ns 13989769
BM_ResultMetadataVector/8/1 143 ns 143 ns 4675255
BM_ResultMetadataVector/64/1 1054 ns 1054 ns 652654
BM_ResultMetadataVector/512/1 7921 ns 7921 ns 84943
BM_ResultMetadataVector/4096/1 68654 ns 68652 ns 10067
BM_ResultMetadataVector/1/8 381 ns 381 ns 1857193
BM_ResultMetadataVector/8/8 1896 ns 1895 ns 384112
BM_ResultMetadataVector/64/8 2673 ns 2673 ns 257614
BM_ResultMetadataVector/512/8 9938 ns 9937 ns 71950
BM_ResultMetadataVector/4096/8 68749 ns 68744 ns 10368
BM_ResultMetadataVector/1/64 3064 ns 3064 ns 237585
BM_ResultMetadataVector/8/64 13975 ns 13975 ns 49999
BM_ResultMetadataVector/64/64 15837 ns 15836 ns 45379
BM_ResultMetadataVector/512/64 22311 ns 22310 ns 31679
BM_ResultMetadataVector/4096/64 81039 ns 81035 ns 8347
BM_ResultMetadataVector/1/512 23518 ns 23517 ns 26828
BM_ResultMetadataVector/8/512 108546 ns 108541 ns 6101
BM_ResultMetadataVector/64/512 122290 ns 122285 ns 5951
BM_ResultMetadataVector/512/512 136477 ns 136465 ns 5131
BM_ResultMetadataVector/4096/512 212444 ns 212388 ns 3454
BM_ResultMetadataVector/1/4096 196702 ns 196670 ns 3500
BM_ResultMetadataVector/8/4096 876835 ns 876795 ns 737
BM_ResultMetadataVector/64/4096 960212 ns 960159 ns 699
BM_ResultMetadataVector/512/4096 1047247 ns 1047172 ns 670
BM_ResultMetadataVector/4096/4096 1187273 ns 1187193 ns 615
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment