Created
April 25, 2019 18:48
-
-
Save arrieta/fa333c34beee245424e0fbb0dc34e5f8 to your computer and use it in GitHub Desktop.
Transform strings into a canonical representation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// @file make_canonical.hpp | |
/// @brief Transform strings into a canonical representation. | |
/// @author J. Arrieta <[email protected]> | |
/// @copyright (C) 2019 Nabla Zero Labs | |
/// @license MIT | |
#pragma once | |
#include <algorithm> | |
#include <cctype> | |
#include <cwctype> | |
#include <string> | |
namespace { | |
template <typename T> | |
inline bool is_not_space(T c); | |
template <typename T> | |
inline T downcase(T c); | |
template <> | |
inline bool is_not_space(char c) { | |
return !std::isspace(c); | |
} | |
template <> | |
inline bool is_not_space(wchar_t c) { | |
return !std::iswspace(c); | |
} | |
template <> | |
inline char downcase(char c) { | |
return std::tolower(c); | |
} | |
template <> | |
inline wchar_t downcase(wchar_t c) { | |
return std::towlower(c); | |
} | |
} // anonymous namespace | |
namespace nzl { | |
/// @brief Return a canonical string representation. | |
/// @param value String to canonicalize. | |
/// @return A canonical representation of @p value. | |
/// @note The canonical representation of a string is obtained when all leading | |
/// and trailing whitespace is removed and all characters are downcased. | |
/// @note This implementation only supports ASCII encoding. | |
template <typename CharT, typename Traits = std::char_traits<CharT>, | |
typename Allocator = std::allocator<CharT> > | |
inline auto make_canonical(std::basic_string_view<CharT, Traits> value) { | |
std::basic_string<CharT, Traits, Allocator> s(value); | |
// remove leading whitespace | |
s.erase(s.begin(), std::find_if(s.begin(), s.end(), is_not_space<CharT>)); | |
// remove trailing whitespace | |
s.erase(std::find_if(s.rbegin(), s.rend(), is_not_space<CharT>).base(), | |
s.end()); | |
// downcase all chars | |
std::transform(s.begin(), s.end(), s.begin(), downcase<CharT>); | |
return s; | |
} | |
inline auto make_canonical(const char* c) { | |
return make_canonical(std::basic_string_view<char>(c)); | |
} | |
inline auto make_canonical(const wchar_t* c) { | |
return make_canonical(std::basic_string_view<wchar_t>(c)); | |
} | |
inline auto make_canonical(const std::string& c) { | |
return make_canonical(std::basic_string_view<char>(c)); | |
} | |
inline auto make_canonical(const std::wstring& c) { | |
return make_canonical(std::basic_string_view<wchar_t>(c)); | |
} | |
} // namespace nzl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment