Skip to content

Instantly share code, notes, and snippets.

@arrieta
Created April 25, 2019 18:48
Show Gist options
  • Save arrieta/fa333c34beee245424e0fbb0dc34e5f8 to your computer and use it in GitHub Desktop.
Save arrieta/fa333c34beee245424e0fbb0dc34e5f8 to your computer and use it in GitHub Desktop.
Transform strings into a canonical representation
/// @file make_canonical.hpp
/// @brief Transform strings into a canonical representation.
/// @author J. Arrieta <[email protected]>
/// @copyright (C) 2019 Nabla Zero Labs
/// @license MIT
#pragma once
#include <algorithm>
#include <cctype>
#include <cwctype>
#include <string>
namespace {
template <typename T>
inline bool is_not_space(T c);
template <typename T>
inline T downcase(T c);
template <>
inline bool is_not_space(char c) {
return !std::isspace(c);
}
template <>
inline bool is_not_space(wchar_t c) {
return !std::iswspace(c);
}
template <>
inline char downcase(char c) {
return std::tolower(c);
}
template <>
inline wchar_t downcase(wchar_t c) {
return std::towlower(c);
}
} // anonymous namespace
namespace nzl {
/// @brief Return a canonical string representation.
/// @param value String to canonicalize.
/// @return A canonical representation of @p value.
/// @note The canonical representation of a string is obtained when all leading
/// and trailing whitespace is removed and all characters are downcased.
/// @note This implementation only supports ASCII encoding.
template <typename CharT, typename Traits = std::char_traits<CharT>,
typename Allocator = std::allocator<CharT> >
inline auto make_canonical(std::basic_string_view<CharT, Traits> value) {
std::basic_string<CharT, Traits, Allocator> s(value);
// remove leading whitespace
s.erase(s.begin(), std::find_if(s.begin(), s.end(), is_not_space<CharT>));
// remove trailing whitespace
s.erase(std::find_if(s.rbegin(), s.rend(), is_not_space<CharT>).base(),
s.end());
// downcase all chars
std::transform(s.begin(), s.end(), s.begin(), downcase<CharT>);
return s;
}
inline auto make_canonical(const char* c) {
return make_canonical(std::basic_string_view<char>(c));
}
inline auto make_canonical(const wchar_t* c) {
return make_canonical(std::basic_string_view<wchar_t>(c));
}
inline auto make_canonical(const std::string& c) {
return make_canonical(std::basic_string_view<char>(c));
}
inline auto make_canonical(const std::wstring& c) {
return make_canonical(std::basic_string_view<wchar_t>(c));
}
} // namespace nzl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment