Created
July 30, 2018 16:57
-
-
Save malkia/15bd4936d29c9a40f55b4959c048ee88 to your computer and use it in GitHub Desktop.
reddit_cpp_speeding_up_string_view_string_split
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://www.reddit.com/r/cpp/comments/934r63/speeding_up_string_view_string_split/ | |
// StringViewTestss.cpp : performance experiments for string_view | |
// | |
#include <string_view> | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <algorithm> | |
#include <chrono> | |
#include <fstream> | |
#include <sstream> | |
// initialize this, as otherwise we'll have | |
// system effects here. | |
char global[1024*1024*1024] = {}; | |
size_t global_ptr; | |
static inline void* dumb_alloc(size_t size) | |
{ | |
void* p = global + global_ptr; | |
global_ptr = (global_ptr + size + 7) & ~7; | |
if (global_ptr >= sizeof(global)) { | |
printf("panic at the disco\n"); | |
abort(); | |
} | |
return p; | |
} | |
void* operator new(size_t size) | |
{ | |
return dumb_alloc(size); | |
} | |
void* operator new[](size_t size) | |
{ | |
return dumb_alloc(size); | |
} | |
void operator delete(void*) _NOEXCEPT | |
{ | |
} | |
void operator delete[](void*) _NOEXCEPT | |
{ | |
} | |
using namespace std::literals; | |
// code based on examples from https://tristanbrindle.com/posts/a-quicker-study-on-tokenising/ | |
// and from https://marcoarena.wordpress.com/2017/01/03/string_view-odi-et-amo/ | |
size_t numAllocations = 0; | |
size_t sizeAllocations = 0; | |
// uncomment to count allocations... | |
/*void* operator new(std::size_t sz) { | |
numAllocations++; | |
sizeAllocations += sz; | |
return std::malloc(sz); | |
}*/ | |
// uses string::find_first_of | |
std::vector<std::string> | |
split(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
//output.reserve(str.length() / 4); | |
size_t first = 0; | |
while (first < str.size()) | |
{ | |
const auto second = str.find_first_of(delims, first); | |
if (first != second) | |
{ | |
output.emplace_back(str.data() + first, str.data() + (second == std::string::npos ? str.size() : second)); | |
} | |
if (second == std::string::npos) | |
break; | |
first = second + 1; | |
} | |
return output; | |
} | |
// uses std::find_first_of | |
std::vector<std::string> | |
splitStd(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
//output.reserve(str.length() / 4); | |
auto first = std::cbegin(str); | |
while (first != std::cend(str)) | |
{ | |
const auto second = std::find_first_of(first, std::cend(str), | |
std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
{ | |
output.emplace_back(first, second); | |
} | |
if (second == std::cend(str)) | |
break; | |
first = std::next(second); | |
} | |
return output; | |
} | |
// strings, but works on pointers rather than iterators | |
// code by JFT | |
std::vector<std::string> splitPtr(const std::string& str, const std::string& delims = " ") | |
{ | |
std::vector<std::string> output; | |
//output.reserve(str.size() / 2); | |
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) { | |
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
output.emplace_back(first, second); | |
} | |
return output; | |
} | |
// uses string_view::find_first_of | |
std::vector<std::string_view> | |
splitSV(std::string_view strv, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
//output.reserve(strv.length() / 4); | |
size_t first = 0; | |
while (first < strv.size()) | |
{ | |
const auto second = strv.find_first_of(delims, first); | |
//std::cout << first << ", " << second << '\n'; | |
if (first != second) | |
{ | |
output.emplace_back(strv.substr(first, second-first)); | |
} | |
if (second == std::string_view::npos) | |
break; | |
first = second + 1; | |
} | |
return output; | |
} | |
// uses std::find_first_of rather than string_view::find_first_of | |
std::vector<std::string_view> | |
splitSVStd(std::string_view strv, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
//output.reserve(strv.length() / 4); | |
auto first = strv.begin(); | |
while (first != strv.end()) | |
{ | |
const auto second = std::find_first_of(first, std::cend(strv), | |
std::cbegin(delims), std::cend(delims)); | |
//std::cout << first << ", " << second << '\n'; | |
if (first != second) | |
{ | |
output.emplace_back(strv.substr(std::distance(strv.begin(), first), std::distance(first, second))); | |
} | |
if (second == strv.end()) | |
break; | |
first = std::next(second); | |
} | |
return output; | |
} | |
// based on the JFT's comment: | |
std::vector<std::string_view> splitSVPtr(std::string_view str, std::string_view delims = " ") | |
{ | |
std::vector<std::string_view> output; | |
//output.reserve(str.size() / 2); | |
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) { | |
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims)); | |
if (first != second) | |
output.emplace_back(first, second - first); | |
} | |
return output; | |
} | |
const std::string_view LoremIpsumStrv{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, " | |
"sed do eiusmod tempor incididuntsuperlongwordsuper ut labore et dolore magna aliqua. Ut enim ad minim veniam, " | |
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute " | |
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. " | |
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." }; | |
template <typename TFunc> void RunAndMeasure(const char* title, TFunc func) | |
{ | |
numAllocations = 0; | |
sizeAllocations = 0; | |
const auto start = std::chrono::steady_clock::now(); | |
auto ret = func(); | |
const auto end = std::chrono::steady_clock::now(); | |
std::cout << title << ": " << | |
std::chrono::duration <double, std::milli>(end - start).count() | |
<< " ms, res " << ret << " Allocation count: " << numAllocations << ", size " << sizeAllocations << "\n"; | |
global_ptr = 0; | |
} | |
int main(int argc, const char** argv) | |
{ | |
std::cout << sizeof(std::string_view) << '\n'; | |
std::cout << sizeof(std::string) << '\n'; | |
std::string testString{ LoremIpsumStrv }; | |
if (argc > 1 && "nofile"s != argv[1]) | |
{ | |
std::ifstream inFile(argv[1]); | |
std::stringstream strStream; | |
strStream << inFile.rdbuf(); | |
testString = strStream.str(); | |
} | |
std::cout << "string length: " << testString.length() << '\n'; | |
const int ITERS = argc > 2 ? atoi(argv[2]) : 10000; | |
std::cout << "test iterations: " << ITERS << '\n'; | |
RunAndMeasure("string split", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = split(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
RunAndMeasure("string split std", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = splitStd(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
RunAndMeasure("string split ptr", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = splitPtr(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
RunAndMeasure("string_view split", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = splitSV(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
RunAndMeasure("string_view split std", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = splitSVStd(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
RunAndMeasure("string_view split ptr", [ITERS, &testString]() | |
{ | |
std::size_t sizes = 0; | |
for (int i = 0; i < ITERS; ++i) | |
{ | |
auto v = splitSVStd(testString); | |
sizes += v.size(); | |
global_ptr = 0; | |
} | |
return sizes; | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results after using "StackAllocator" and forcing it to "deallocate" all at end of each ITER cycle - e.g. global_ptr = 0.
`
clang++ -O3 -std=c++17 -stdlib=libc++ StringViewTest.cpp
(bionic)malkia@localhost:~/p/StringViewTests$ time -p operf -g -l ./a.out nofile 10000000
Kernel profiling is not possible with current system config.
Set /proc/sys/kernel/kptr_restrict to 0 to collect kernel samples.
operf: Profiler started
16
24
string length: 489
test iterations: 10000000
string split: 5759.3 ms, res 10000077 Allocation count: 0, size 0
string split std: 6059.83 ms, res 10000000 Allocation count: 0, size 0
string split ptr: 7337.7 ms, res 10000000 Allocation count: 0, size 0
string_view split: 5700.79 ms, res 10000000 Allocation count: 0, size 0
string_view split std: 5769.85 ms, res 10000000 Allocation count: 0, size 0
string_view split ptr: 4813.97 ms, res 10000000 Allocation count: 0, size 0
Profiling done.
`
actual oprofile info:
`
(bionic)malkia@localhost:~/p/StringViewTests$ opreport -g -l
Using /home/malkia/p/StringViewTests/oprofile_data/samples/ for samples directory.
WARNING: Lost samples detected! See /home/malkia/p/StringViewTests/oprofile_data/samples/operf.log for details.
CPU: Intel Skylake microarchitecture, speed 3600 MHz (estimated)
Counted cpu_clk_unhalted events () with a unit mask of 0x00 (Core cycles when at least one thread on the physical core is not in halt state) count 30000045
samples % linenr info image name symbol name
214 35.6073 (no location information) a.out splitStd(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
176 29.2845 (no location information) a.out splitPtr(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
172 28.6190 (no location information) a.out split(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
14 2.3295 (no location information) a.out splitSV(std::__1::basic_string_view<char, std::__1::char_traits >, std::__1::basic_string_view<char, std::__1::char_traits >)
14 2.3295 (no location information) a.out splitSVStd(std::__1::basic_string_view<char, std::__1::char_traits >, std::__1::basic_string_view<char, std::__1::char_traits >)
11 1.8303 (no location information) no-vmlinux /no-vmlinux
`
e.g. the goal is to eliminate new/delete/malloc/free from the picture.
For previous results with them: https://www.reddit.com/r/cpp/comments/934r63/speeding_up_string_view_string_split/e3ao6e2/
Roughly - x2, x3 speed loss due to malloc/free - without going into details (like we have 6 individual benchmarks here, and they use quite differently malloc/free/new/delete).