Last active
July 2, 2020 19:08
-
-
Save anhnguyen1618/82bed12b8599f2f1c44e738982a6b412 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <chrono> | |
#include <codecvt> | |
#include <iostream> | |
#include <locale> | |
#include <memory> | |
#include <stdexcept> | |
#include <string> | |
static std::string dummy; | |
// normal | |
inline std::u16string convertUTF8ToUTF16(const std::string &string) { | |
return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.from_bytes(string); | |
} | |
inline std::string convertUTF16ToUTF8(const std::u16string &string) { | |
return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.to_bytes(string); | |
} | |
// Djini https://github.com/dropbox/djinni/blob/master/support-lib/jni/djinni_support.cpp#L534 | |
static inline bool is_high_surrogate(char16_t c) { | |
return (c >= 0xD800) && (c < 0xDC00); | |
} | |
static inline bool is_low_surrogate(char16_t c) { | |
return (c >= 0xDC00) && (c < 0xE000); | |
} | |
struct offset_pt { | |
int offset; | |
char32_t pt; | |
}; | |
static constexpr const offset_pt invalid_pt = {-1, 0}; | |
static offset_pt utf16_decode_check(const char16_t *str, | |
std::u16string::size_type i) { | |
if (is_high_surrogate(str[i]) && is_low_surrogate(str[i + 1])) { | |
// High surrogate followed by low surrogate | |
char32_t pt = (((str[i] - 0xD800) << 10) | (str[i + 1] - 0xDC00)) + 0x10000; | |
return {2, pt}; | |
} | |
if (is_high_surrogate(str[i]) || is_low_surrogate(str[i])) { | |
// High surrogate *not* followed by low surrogate, or unpaired low surrogate | |
return invalid_pt; | |
} | |
return {1, str[i]}; | |
} | |
static char32_t utf16_decode(const char16_t *str, | |
std::u16string::size_type &i) { | |
offset_pt res = utf16_decode_check(str, i); | |
if (res.offset < 0) { | |
i += 1; | |
return 0xFFFD; | |
} else { | |
i += res.offset; | |
return res.pt; | |
} | |
} | |
static void utf8_encode(char32_t pt, std::string &out) { | |
if (pt < 0x80) { | |
out += static_cast<char>(pt); | |
} else if (pt < 0x800) { | |
out += {static_cast<char>((pt >> 6) | 0xC0), | |
static_cast<char>((pt & 0x3F) | 0x80)}; | |
} else if (pt < 0x10000) { | |
out += {static_cast<char>((pt >> 12) | 0xE0), | |
static_cast<char>(((pt >> 6) & 0x3F) | 0x80), | |
static_cast<char>((pt & 0x3F) | 0x80)}; | |
} else if (pt < 0x110000) { | |
out += {static_cast<char>((pt >> 18) | 0xF0), | |
static_cast<char>(((pt >> 12) & 0x3F) | 0x80), | |
static_cast<char>(((pt >> 6) & 0x3F) | 0x80), | |
static_cast<char>((pt & 0x3F) | 0x80)}; | |
} else { | |
out += {static_cast<char>(0xEF), static_cast<char>(0xBF), | |
static_cast<char>(0xBD)}; // U+FFFD | |
} | |
} | |
std::string convert_dropbox(const std::u16string &str) { | |
std::string out; | |
out.reserve(str.length() * 3 / 2); // estimate | |
for (std::u16string::size_type i = 0; i < str.length();) | |
utf8_encode(utf16_decode(str.data(), i), out); | |
return out; | |
} | |
std::u16string small_str = | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"ever "; | |
std::u16string medium_str = | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text " | |
u"ever "; | |
std::u16string large_str = | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum."; | |
std::u16string super_large_str = | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum.Lorem Ipsum is simply dummy text of the printing and typesetting " | |
u"industry. Lorem Ipsum has been the industry's standard dummy text ever " | |
u"since the 1500s, when an unknown printer took a galley of type and " | |
u"scrambled it to make a type specimen book. It has survived not only " | |
u"five centuries, but also the leap into electronic typesetting, " | |
u"remaining essentially unchanged. It was popularised in the 1960s with " | |
u"the release of Letraset sheets containing Lorem Ipsum passages, and " | |
u"more recently with desktop publishing software like Aldus PageMaker " | |
u"including versions of Lorem Ipsum.Lorem Ipsum is simply dummy text of " | |
u"the printing and typesetting industry. Lorem Ipsum has been the " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"industry's standard dummy text ever since the 1500s, when an unknown " | |
u"printer took a galley of type and scrambled it to make a type specimen " | |
u"book. It has survived not only five centuries, but also the leap into " | |
u"electronic typesetting, remaining essentially unchanged. It was " | |
u"popularised in the 1960s with the release of Letraset sheets " | |
u"containing Lorem Ipsum passages, and more recently with desktop " | |
u"publishing software like Aldus PageMaker including versions of Lorem " | |
u"Ipsum."; | |
int main() { | |
int num_loop = 1000000; | |
// benchmark djini | |
auto started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
convert_dropbox(small_str); | |
} | |
auto done = std::chrono::high_resolution_clock::now(); | |
auto duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
std::cout << "[Benchmark djini small string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
// Benchmark bindgen | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.to_bytes(small_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark bindgen small string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
convert_dropbox(medium_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark djini medium string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.to_bytes(medium_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark bindgen medium string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
convert_dropbox(large_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark djini large string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.to_bytes(large_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark bindgen large string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
convert_dropbox(super_large_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark djini super large string] time elapsed: " << duration | |
<< " ns" << std::endl; | |
started = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < num_loop; ++i) { | |
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>() | |
.to_bytes(super_large_str); | |
} | |
done = std::chrono::high_resolution_clock::now(); | |
duration = | |
std::chrono::duration_cast<std::chrono::nanoseconds>(done - started) | |
.count() / | |
num_loop; | |
// std::cout << dummy << std::endl; | |
std::cout << "[Benchmark bindgen super large string] time elapsed: " | |
<< duration << " ns" << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment