Created
July 25, 2022 17:58
-
-
Save jcelerier/74dfd473bccec8f1bd5d78be5aada569 to your computer and use it in GitHub Desktop.
counting words in c++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <boost/container/small_vector.hpp> | |
#include <fmt/format.h> | |
#include <robin_hood.h> | |
static constexpr inline char to_lower(char c) noexcept { | |
return ((c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c); | |
} | |
static constexpr inline bool is_space(char c) noexcept { return c <= ' '; } | |
namespace robin_hood { | |
template <typename T, std::size_t N> | |
struct hash<boost::container::small_vector<T, N>> { | |
constexpr size_t operator()(const auto &str) const noexcept { | |
return robin_hood::hash_bytes(str.data(), sizeof(T) * str.size()); | |
} | |
}; | |
} // namespace robin_hood | |
int main() { | |
using string = boost::container::small_vector<char, 32>; | |
using map = robin_hood::unordered_flat_map<string, int>; | |
string word; | |
robin_hood::unordered_flat_map<string, int> counts; | |
counts.reserve(1e5); | |
constexpr int bufsize = 1024 * 1024; | |
alignas(64) char buf[bufsize]; | |
for (ssize_t n = 0; (n = read(STDIN_FILENO, buf, bufsize)) && n >= 0;) { | |
for (int i = 0; i < n; i++) { | |
if (is_space(buf[i])) { | |
if (!word.empty()) { | |
counts[word]++; | |
word.clear(); | |
} | |
} else { | |
word.push_back(to_lower(buf[i])); | |
} | |
} | |
} | |
{ | |
using elem_type = map::value_type; | |
auto ptr = std::make_unique_for_overwrite<elem_type *[]>(counts.size()); | |
auto it = counts.begin(); | |
auto dit = ptr.get(); | |
while (it != counts.end()) | |
*dit++ = &*it++; | |
std::sort(ptr.get(), dit, [](const auto &lhs, const auto &rhs) noexcept { | |
return lhs->second > rhs->second; | |
}); | |
for (auto it = ptr.get(); it != dit; ++it) { | |
auto &count = **it; | |
fmt::print(stdout, "{} {}\n", | |
std::string_view(count.first.data(), count.first.size()), | |
count.second); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment