fdwr · May 23, 2025 01:38
diff --git a/StringHelpers.ixx b/StringHelpers.ixx
 // Miscellaneous helpers for strings that core C++ is missing.
 module;

 #include "precomp.h"
 #include <stdint.h>
 #include <string>
 #include <string_view>
 #include <span>
 #include <optional>
 #include <type_traits>

 export module StringHelpers;


 namespace
 {
    constexpr static char8_t g_utf8bom[] = {char8_t(0xEF), char8_t(0xBB), char8_t(0xBF)};
    constexpr auto g_utf8bomView = std::u8string_view(std::data(g_utf8bom), std::size(g_utf8bom));
 }


 export namespace StringHelpers
 {

 enum UnicodeCodePoint
 {
    UnicodeSpace = 0x000020,
    UnicodeNbsp = 0x0000A0,
    UnicodeSoftHyphen = 0x0000AD,
    UnicodeEnQuadSpace = 0x002000,
    UnicodeZeroWidthSpace = 0x00200B,
    UnicodeDottedCircle = 0x0025CC,
    UnicodeIdeographicSpace = 0x003000,
    UnicodeInlineObject = 0x00FFFC,   // for embedded objects
    UnicodeReplacementCharacter = 0x00FFFD,   // for invalid sequences
    UnicodeMax = 0x10FFFF,
    UnicodeTotal = 0x110000,
 };


 inline char AsChar(std::byte c) { return static_cast<char>(c); }
 inline char* AsChar(std::byte* p) { return reinterpret_cast<char*>(p); }
 inline char const* AsChar(std::byte const* p) { return reinterpret_cast<char const*>(p); }
 inline char AsChar(char8_t c) { return static_cast<char>(c); }
 inline char* AsChar(char8_t* p) { return reinterpret_cast<char*>(p); }
 inline char const* AsChar(char8_t const* p) { return reinterpret_cast<char const*>(p); }
 inline char* AsChar(char* p) { return p; } // Identity operation for general templates
 inline char const* AsChar(char const* p) { return p; } // Identity operation for general templates
 inline char** AsChar(char8_t** p) { return reinterpret_cast<char**>(p); }
 inline char* const* AsChar(char8_t* const* const p) { return reinterpret_cast<char* const* const>(p); }
 inline char const* const* AsChar(char8_t const* const* const p) { return reinterpret_cast<char const* const* const>(p); }
 inline unsigned char* AsUChar(char* p) { return reinterpret_cast<unsigned char*>(p); }
 inline unsigned char* AsUChar(char8_t* p) { return reinterpret_cast<unsigned char*>(p); }
 inline unsigned char* AsUChar(std::byte* p) { return reinterpret_cast<unsigned char*>(p); }
 inline char8_t* AsUtf8Char(char* p) { return reinterpret_cast<char8_t*>(p); }
 inline char8_t const* AsUtf8Char(char const* p) { return reinterpret_cast<char8_t const*>(p); }
 inline std::u8string_view AsUtf8Char(std::string_view s) { return std::u8string_view(reinterpret_cast<char8_t const*>(s.data()), s.size()); }
 inline std::string_view AsChar(std::u8string_view s) { return std::string_view(reinterpret_cast<char const*>(s.data()), s.size()); }
 inline std::u8string& AsUtf8Char(std::string& s) { return reinterpret_cast<std::u8string&>(s); }
 inline std::string& AsChar(std::u8string& s) { return reinterpret_cast<std::string&>(s); }
 inline std::u8string const& AsUtf8Char(std::string const& s) { return reinterpret_cast<std::u8string const&>(s); }
 inline std::string const& AsChar(std::u8string const& s) { return reinterpret_cast<std::string const&>(s); }


 // The std::span::subset method is dangerous (sadly), offering no clamped version to stay within the buffer.
 template <typename T>
 std::span<T> ClampedSubspan(std::span<T> a, size_t index, size_t count)
 {
    size_t const maxCount = a.size();
    index = std::min(index, maxCount);
    count = std::min(maxCount - index, count);
    return a.subspan(index, count);
 }


 inline bool IsSurrogate(char32_t ch) noexcept
 {
    // 0xD800 <= ch <= 0xDFFF
    return (ch & 0xF800) == 0xD800;
 }


 inline bool IsLeadingSurrogate(char32_t ch) noexcept
 {
    // 0xD800 <= ch <= 0xDBFF
    return (ch & 0xFC00) == 0xD800;
 }


 inline bool IsTrailingSurrogate(char32_t ch) noexcept
 {
    // 0xDC00 <= ch <= 0xDFFF
    return (ch & 0xFC00) == 0xDC00;
 }


 inline bool IsCharacterBeyondBmp(char32_t ch) noexcept
 {
    return ch >= 0x10000;
 }


 inline char32_t MakeUnicodeCodePoint(char32_t high, char32_t low) noexcept
 {
    return ((high & 0x03FF) << 10 | (low & 0x03FF)) + 0x10000;
 }


 // Split into leading and trailing surrogatse.
 // From http://unicode.org/faq/utf_bom.html#35
 inline char16_t GetLeadingSurrogate(char32_t ch)
 {
    return char16_t(0xD800 + (ch >> 10) - (0x10000 >> 10));
 }


 inline char16_t GetTrailingSurrogate(char32_t ch)
 {
    return char16_t(0xDC00 + (ch & 0x3FF));
 }


 inline bool IsHexDigit(char32_t ch) noexcept
 {
    return (ch >= '0' && ch <= '9') || (ch &= ~32, ch >= 'A' && ch <= 'F');
 }


 struct Utf16CharacterReader
 {
    char16_t const* current_ = nullptr;
    char16_t const* end_ = nullptr;

    Utf16CharacterReader() = default;
    
    Utf16CharacterReader(char16_t const* begin, char16_t const* end) : current_(begin), end_(end)
    {}

    template <typename CharacterType>
    Utf16CharacterReader(CharacterType const* begin, CharacterType const* end)
    requires (sizeof(CharacterType) == sizeof(char16_t))
    :   current_(reinterpret_cast<char16_t const*>(begin)),
        end_(reinterpret_cast<char16_t const*>(end))
    {}

    template <typename Container>
    Utf16CharacterReader(Container const& c)
    requires (sizeof(*std::data(c)) == sizeof(char16_t))
    :   Utf16CharacterReader(std::data(c), std::data(c) + std::size(c))
    {}

    size_t size() const noexcept
    {
        return end_ - current_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    char32_t ReadNext() noexcept
    {
        if (current_ >= end_)
            return 0;

        char32_t ch = *current_++;

        if (!IsSurrogate(ch))
            return ch; // Character fits in the basic multilingual plane.

        if (!IsLeadingSurrogate(ch) || current_ >= end_)
            return UnicodeReplacementCharacter; // Illegal unpaired surrogate. Substitute with replacement character.

        char32_t leading = ch;
        char32_t trailing = *current_;

        if (!IsTrailingSurrogate(trailing))
            return UnicodeReplacementCharacter; // Illegal unpaired surrogate. Substitute with replacement character.

        ++current_;

        return MakeUnicodeCodePoint(leading, trailing);
    }

    char32_t ReadNextNoReplacement() noexcept
    {
        if (current_ >= end_)
            return 0;

        char32_t codePoint = *current_++;

        // Just use the character if not a surrogate code point.
        // For unpaired surrogates, pass the isolated surrogate
        // through (rather than remap to U+FFFD replacement).
        if (IsLeadingSurrogate(codePoint) && current_ < end_)
        {
            char32_t leadingCodeUnit = codePoint;
            char32_t trailingCodeUnit = *current_;
            if (IsTrailingSurrogate(trailingCodeUnit))
            {
                codePoint = MakeUnicodeCodePoint(leadingCodeUnit, trailingCodeUnit);
                ++current_;
            }
        }

        return codePoint;
    }
 };


 struct Utf16CharacterWriter
 {
    char16_t* begin_ = nullptr;
    char16_t* current_ = nullptr;
    char16_t* end_ = nullptr;

    Utf16CharacterWriter() = default;

    Utf16CharacterWriter(char16_t* begin, char16_t* end) : begin_(begin), current_(begin), end_(end)
    {}

    template <typename CharacterType>
    inline Utf16CharacterWriter(CharacterType* begin, CharacterType* end)
    requires (sizeof(CharacterType) == sizeof(char16_t))
    :   Utf16CharacterWriter(
            reinterpret_cast<char16_t*>(begin),
            reinterpret_cast<char16_t*>(begin)
        )
    {}

    template <typename Container>
    Utf16CharacterWriter(Container& c)
    requires (sizeof(*std::data(c)) == sizeof(char16_t))
    :   Utf16CharacterWriter(
            reinterpret_cast<char16_t*>(std::data(c)),
            reinterpret_cast<char16_t*>(std::data(c) + std::size(c))
        )
    {}

    size_t size() const noexcept
    {
        return current_ - begin_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    void WriteNext(char32_t ch) noexcept
    {
        if (current_ >= end_)
            return;

        if (IsCharacterBeyondBmp(ch) && end_ - current_ >= 2)
        {
            // Split into leading and trailing surrogatse.
            // From http://unicode.org/faq/utf_bom.html#35
            current_[0] = char16_t(GetLeadingSurrogate(ch));
            current_[1] = char16_t(GetTrailingSurrogate(ch));
            current_ += 2;
        }
        else
        {
            // A BMP character (or isolated surrogate)
            current_[0] = char16_t(ch);
            ++current_;
        }
    }
 };

 // For iterating directly over the characters.
 // - Avoid deprecated std::wstring_convert<std::codecvt_utf8_utf16
 // - OS-specific MultibyteToWideChar and allocating intermediate buffers.
 // TODO: Compare with https://github.com/simdutf/simdutf/blob/master/src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h.
 struct Utf8CharacterReader
 {
    char8_t const* current_ = nullptr;
    char8_t const* end_ = nullptr;

    Utf8CharacterReader() = default;

    Utf8CharacterReader(char8_t const* begin, char8_t const* end) : current_(begin), end_(end)
    {}

    template <typename CharacterType>
    Utf8CharacterReader(CharacterType const* begin, CharacterType const* end)
    requires (sizeof(CharacterType) == sizeof(char8_t))
    :   Utf8CharacterReader(
            reinterpret_cast<char8_t const*>(begin),
            reinterpret_cast<char8_t const*>(end)
        )
    {}

    template <typename Container>
    Utf8CharacterReader(Container const& c)
    requires (sizeof(*std::data(c)) == sizeof(char8_t))
    :   Utf8CharacterReader(
            reinterpret_cast<char8_t const*>(std::data(c)),
            reinterpret_cast<char8_t const*>(std::data(c) + std::size(c))
        )
    {}

    size_t size() const noexcept
    {
        return end_ - current_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    char32_t ReadNext() noexcept
    {
        // TODO: Test overlong sequences.

        if (current_ >= end_)
            return 0;

        char32_t codeUnit = *current_++;

        if (codeUnit <= 0b0111'1111) // 0xxxxxxx  0-127
            return codeUnit;

        // The following byte values should never occur: 0xC0, 0xC1, 0xF5-0xFF
        if (codeUnit < 0b1100'0000) // Any 10xxxxxx patterns are illegal.
            return UnicodeReplacementCharacter;

        uint32_t continuationMask = 0b0100'0000;
        uint32_t continuationByteCount = 0;
        while (codeUnit & continuationMask)
        {
            codeUnit ^= continuationMask;
            continuationMask >>= 1;
            ++continuationByteCount;
            if (continuationByteCount > 3)
                return UnicodeReplacementCharacter;
        }

        char32_t codePoint = codeUnit & 0b0011'1111;
        while (continuationByteCount--)
        {
            codeUnit = *current_;
            if ((codeUnit & 0b1100'0000) != 0b1000'0000) // Expect 10xxxxxx pattern.
                return UnicodeReplacementCharacter; // Expected continuation byte.
            ++current_;

            // Combine next code unit lowest 6 bits with existing bits.
            codePoint = (codePoint << 6) | codeUnit & 0b0011'1111;
        }

        return codePoint;
    }

    // Skip the byte order mark, if present.
    void SkipBom() noexcept
    {
        if (size() >= sizeof(g_utf8bom) && memcmp(current_, g_utf8bom, sizeof(g_utf8bom)) == 0)
        {
            current_ += sizeof(g_utf8bom);
        }
    }
 };


 // Writes a single code point out to a memory region of char8's.
 // Does NOT throw if an invalid character is passed - just writes replacement.
 struct Utf8CharacterWriter
 {
    char8_t* begin_ = nullptr;
    char8_t* current_ = nullptr;
    char8_t* end_ = nullptr;

    Utf8CharacterWriter() = default;

    Utf8CharacterWriter(char8_t* begin, char8_t* end) : begin_(begin), current_(begin), end_(end)
    {}

    template <typename CharacterType>
    Utf8CharacterWriter(CharacterType* begin, CharacterType* end)
    requires (sizeof(CharacterType) == sizeof(char8_t))
    :   Utf8CharacterWriter(reinterpret_cast<char8_t*>(begin), reinterpret_cast<char8_t*>(end))
    {}

    template <typename Container>
    Utf8CharacterWriter(Container& c)
    requires (sizeof(*std::data(c)) == sizeof(char8_t))
    :   Utf8CharacterWriter(
            reinterpret_cast<char8_t*>(std::data(c)),
            reinterpret_cast<char8_t*>(std::data(c) + std::size(c))
        )
    {}

    size_t size() const noexcept
    {
        return current_ - begin_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    void WriteNext(char32_t codePoint) noexcept
    {
        // Consider Duff's device like approach: https://github.com/Alexhuszagh/UTFPP/blob/bd99a5e4f3fbfb3bc86c1d7af5cf5edf2f00e1a7/utf.hpp#L115

        if (current_ >= end_)
            return;

        if (codePoint < 0x80)
        {
            *current_++ = static_cast<char8_t>(codePoint);
        }
        else if (codePoint < 0x0800)
        {
            *current_++ = static_cast<char8_t>((codePoint >> 6) | 0xC0);
            if (current_ < end_) *current_++ = static_cast<char8_t>((codePoint & 0x3F) | 0x80);
        }
        else if (codePoint < 0x10000)
        {
            *current_++ = static_cast<char8_t>((codePoint >> 12) | 0xE0);
            if (current_ < end_) *current_++ = static_cast<char8_t>(((codePoint >> 6) & 0x3F) | 0x80);
            if (current_ < end_) *current_++ = static_cast<char8_t>((codePoint & 0x3F) | 0x80);
        }
        else
        {
            *current_++ = static_cast<char8_t>((codePoint >> 18) | 0xF0);
            if (current_ < end_) *current_++ = static_cast<char8_t>(((codePoint >> 12) & 0x3F) | 0x80);
            if (current_ < end_) *current_++ = static_cast<char8_t>(((codePoint >> 6) & 0x3F) | 0x80);
            if (current_ < end_) *current_++ = static_cast<char8_t>((codePoint & 0x3F) | 0x80);
        }
    }
 };


 struct Utf32CharacterReader
 {
    char32_t const* current_ = nullptr;
    char32_t const* end_ = nullptr;

    Utf32CharacterReader() = default;

    Utf32CharacterReader(char32_t const* begin, char32_t const* end)
    :   current_(begin), end_(end)
    {}

    template <typename CharacterType>
    Utf32CharacterReader(CharacterType const* begin, CharacterType const* end)
    requires (sizeof(CharacterType) == sizeof(char32_t))
    :   Utf32CharacterReader(
            reinterpret_cast<char32_t const*>(begin),
            reinterpret_cast<char32_t const*>(end)
        )
    {}

    template <typename Container>
    Utf32CharacterReader(Container const& c)
    requires (sizeof(*std::data(c)) == sizeof(char32_t))
    :   Utf32CharacterReader(
            reinterpret_cast<char32_t const*>(std::data(c)),
            reinterpret_cast<char32_t const*>(std::data(c) + std::size(c))
        )
    {}

    size_t size() const noexcept
    {
        return end_ - current_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    char32_t ReadNext() noexcept
    {
        if (current_ >= end_)
            return 0;

        return *current_++;
    }
 };


 struct Utf32CharacterWriter
 {
    char32_t* begin_ = nullptr;
    char32_t* current_ = nullptr;
    char32_t* end_ = nullptr;

    Utf32CharacterWriter() = default;

    Utf32CharacterWriter(char32_t* begin, char32_t* end)
    :   begin_(begin), current_(begin), end_(end)
    {}

    template <typename CharacterType>
    Utf32CharacterWriter(CharacterType* begin, CharacterType* end)
    requires (sizeof(CharacterType) == sizeof(char32_t))
    :   Utf32CharacterWriter(
            reinterpret_cast<char32_t*>(begin),
            reinterpret_cast<char32_t*>(end)
        )
    {}

    template <typename Container>
    Utf32CharacterWriter(Container& c)
    requires (sizeof(*std::data(c)) == sizeof(char32_t))
    :   Utf32CharacterWriter(
            reinterpret_cast<char32_t*>(std::data(c)),
            reinterpret_cast<char32_t*>(std::data(c) + std::size(c))
        )
    {}

    size_t size() const noexcept
    {
        return current_ - begin_;
    }

    bool IsAtEnd() const noexcept
    {
        return current_ >= end_;
    }

    void WriteNext(char32_t codePoint) noexcept
    {
        if (current_ >= end_)
            return;

        *current_++;
    }
 };


 std::u8string_view StripUtf8Bom(std::u8string_view s)
 {
    if (s.starts_with(g_utf8bomView))
    {
        s.remove_prefix(3);
    }
    return s;
 }


 template<
    typename InputContainer,
    typename OutputView = std::u8string_view, // Could be a std::string or std::span too or any type that accepts two iterators.
    typename OutputContainer = std::vector<OutputView>
 >
 requires requires(InputContainer i, OutputContainer o, OutputView v) {
    i.begin(); // Must have iterators.
    i.end();
    OutputView(i.begin(), i.end()); // Must be constructible from iterator pair.
    o.push_back(OutputView{}); // Must be push_back'able.
 }
 auto SplitLines(InputContainer& text) -> OutputContainer
 {
    OutputContainer result;

    auto lineBegin = text.begin();
    auto textEnd = text.end();

    while (lineBegin != textEnd)
    {
        auto it = lineBegin;
        while (it != textEnd)
        {
            auto lineEnd = it;
            auto ch = *it++;

            if (ch == '\r' || ch == '\n')
            {
                result.push_back(OutputView(lineBegin, lineEnd));
                // Skip the CR and LF pair.
                // Note parapgrah separate and line separator are ignored.
                if (ch == '\r' && it != textEnd && *it == '\n')
                {
                    ++it; // Skip the line feed.
                }
                break;
            }
        }
        lineBegin = it; // Next line.
    }
    return result;
 }


 template <typename StringViewType = std::u8string_view>
 class SplitEnumerator
 {
    StringViewType view_ = 0;
    char32_t splitCodeUnit_ = 0; // A single code unit to split upon, like ",".
    bool hasMore_ = true;

 public:
    SplitEnumerator(StringViewType view, char32_t splitCodeUnit) noexcept
    :   splitCodeUnit_(splitCodeUnit),
        view_(view)
    {}

    bool HasMore() const noexcept { return hasMore_; }

    StringViewType Read() noexcept
    {
        using C = decltype(*StringViewType().data());
        auto nextSplit = std::find(view_.begin(), view_.end(), C(splitCodeUnit_));
        auto token = StringViewType(view_.begin(), nextSplit);
        if (nextSplit == view_.end())
        {
            hasMore_ = false;
        }
        else
        {
            ++nextSplit; // Skip the split code unit.
        }
        view_ = StringViewType(nextSplit, view_.end());
        return token;
    }
 };


 // Fills the entire buffer up to fixed size, including leading zeroes.
 template <typename CharacterType>
 void WriteZeroPaddedHexNum(uint32_t value, /*out*/ std::span<CharacterType> text)
 {
    minimal_span<CharacterType> currentText(text);

    // Convert character to digits.
    while (!currentText.empty())
    {
        CharacterType digit = value & 0xF;
        digit += (digit >= 10) ? 'A' - 10 : '0';
        currentText.back() = digit;
        currentText.pop_back();
        value >>= 4;
    }
 }


 void WriteZeroPaddedHexNum(uint32_t value, /*out*/ std::span<char8_t> text) { return WriteZeroPaddedHexNum<char8_t>(value, /*out*/ text); }
 void WriteZeroPaddedHexNum(uint32_t value, /*out*/ std::span<char16_t> text) { return WriteZeroPaddedHexNum<char16_t>(value, /*out*/ text); }
 void WriteZeroPaddedHexNum(uint32_t value, /*out*/ std::span<char32_t> text) { return WriteZeroPaddedHexNum<char32_t>(value, /*out*/ text); }


 // 'text' is updated to the end of all characters read.
 // TODO: Consider using std::from_chars instead now that it exists.
 template <typename CharacterType>
 uint32_t ReadUnsignedNumericValue(/*inout*/ std::span<CharacterType const>& text, _In_range_(2, 36) uint32_t base)
 {
    // Sadly, both wcstoul and std::stoul are useless functions because:
    // (1) wcstoul doesn't respect any boundaries and tries to parse beyond the code sequence
    //     (e.g. \x12345 should be treated as {0x1234, '5'}, not as {0x12345})
    // (2) std::stoul throws an exception on parse error, which is overkill for the user
    //     interactively typing in a number.
    // (3) std::stoul requries a std::string as input, which gimps its utility.
    // Additionally, some uses such as escapement conversion don't want whitespace skipped.

    // - 'text' is updated upon returning to point after the consumed part.
    // - Any character outside the radix stops the read. So 123A4G would stop at 'A' for decimal,
    //   but it would continue until 'G' for hexademical.
    // - An empty string returns 0.
    // - The caller doesn't receive a flag, but it can easily detect missing strings or whether
    //   the entire number was read by checking the return std::span.

    uint32_t value = 0;
    minimal_span<CharacterType const> input = text;

    while (!input.empty())
    {
        uint32_t digit = input.front();

        if (digit < '0')
            break;

        digit -= '0'; // Handle 0..9.

        if (digit >= 10) // Handle A..Z.
        {
            digit &= ~32; // Make upper case.
            if (digit < 'A' - '0')
                break;

            digit -= 'A' - '0' - 10;
        }

        if (digit >= base)
        {
            break;
        }

        value = value * base + digit;
        input.pop_front();
    }

    text = input;

    return value;
 }


 uint32_t ReadUnsignedNumericValue(/*inout*/ std::span<char8_t const>& text, _In_range_(2, 36) uint32_t base) { return ReadUnsignedNumericValue<char8_t>(/*inout*/ text, base); }
 uint32_t ReadUnsignedNumericValue(/*inout*/ std::span<char16_t const>& text, _In_range_(2, 36) uint32_t base) { return ReadUnsignedNumericValue<char16_t>(/*inout*/ text, base); }
 uint32_t ReadUnsignedNumericValue(/*inout*/ std::span<char32_t const>& text, _In_range_(2, 36) uint32_t base) { return ReadUnsignedNumericValue<char32_t>(/*inout*/ text, base); }


 void UnescapeCppUniversalCharacterNames(
    std::span<char16_t const> escapedText,
    /*out*/ std::u16string& expandedText
 )
 {
    minimal_span<char16_t const> currentEscapedText(escapedText);
    expandedText.clear();
    expandedText.reserve(currentEscapedText.size());

    while (!currentEscapedText.empty())
    {
        char16_t ch = currentEscapedText.consume_front();

        // Check escape codes.
        if (ch == '\\' && !currentEscapedText.empty())
        {
            char32_t replacement = L'\\';
            char16_t code = currentEscapedText.front();

            switch (code)
            {
            case 'a':  replacement = 0x0007; currentEscapedText.pop_front(); break; // Alert (Beep, Bell)
            case 'b':  replacement = 0x0008; currentEscapedText.pop_front(); break; // Backspace
            case 'f':  replacement = 0x000C; currentEscapedText.pop_front(); break; // Formfeed
            case 'n':  replacement = 0x000A; currentEscapedText.pop_front(); break; // Newline (Line Feed)
            case 'r':  replacement = 0x000D; currentEscapedText.pop_front(); break; // Carriage Return
            case 't':  replacement = 0x0009; currentEscapedText.pop_front(); break; // Horizontal Tab
            case 'v':  replacement = 0x000B; currentEscapedText.pop_front(); break; // Vertical Tab
            case '\\': replacement = 0x005C; currentEscapedText.pop_front(); break; // Backslash
            case '\'': replacement = 0x0027; currentEscapedText.pop_front(); break; // Single quotation mark
            case '\"': replacement = 0x0022; currentEscapedText.pop_front(); break; // Double quotation mark
            case '?':  replacement = 0x003F; currentEscapedText.pop_front(); break; // Question mark
            case L'x':
            case L'u':
            case L'U':
            {
                size_t expectedHexSequenceLength = (code == 'U') ? 8 : 4;
                char16_t const* escapeStart = currentEscapedText.data() + 1; // Skip the 'x' 'u' 'U'
                char16_t const* escapeEnd = std::min(escapeStart + expectedHexSequenceLength, currentEscapedText.data_end());
                std::span<char16_t const> digitSpan = {escapeStart, escapeEnd};

                // Parse the number.
                if (digitSpan.size() >= expectedHexSequenceLength)
                {
                    char32_t hexValue = ReadUnsignedNumericValue(/*inout*/ digitSpan, 16);
                    if (digitSpan.empty()) // Completely read the sequence.
                    {
                        replacement = hexValue;
                        currentEscapedText.reset(digitSpan.data(), currentEscapedText.end());
                    }
                }
                // Else parse error. So keep '\' to preserve original text.
            }
            break;

            // Anything else yields a '\', preserving the original text.
            // Silly octal is not supported.
            }

            if (IsCharacterBeyondBmp(replacement))
            {
                expandedText.push_back(GetLeadingSurrogate(replacement));
                expandedText.push_back(GetTrailingSurrogate(replacement));
            }
            else
            {
                expandedText.push_back(char16_t(replacement));
            }
        }
        else // Just append ordinary code unit.
        {
            expandedText.push_back(ch);
        }
    }
 }


 void EscapeCppUniversalCharacterNames(
    std::span<char16_t const> text,
    /*out*/ std::u16string& escapedText
 )
 {
    constexpr size_t escapePrefixLength = 2; // \u or \U
    constexpr size_t shortEscapeDigitLength = 4;
    constexpr size_t longEscapeDigitLength = 8;
    char16_t shortEscapedSequence[6] = {'\\','u','0','0','0','0'};
    char16_t longEscapedSequence[10] = {'\\','U','0','0','0','0','0','0','0','0'};

    escapedText.clear();
    escapedText.reserve(text.size() * std::size(shortEscapedSequence));
    std::span<char16_t> shortDigitRange(&shortEscapedSequence[escapePrefixLength], &shortEscapedSequence[escapePrefixLength + shortEscapeDigitLength]);
    std::span<char16_t> longDigitRange(&longEscapedSequence[escapePrefixLength], &longEscapedSequence[escapePrefixLength + longEscapeDigitLength]);

    for (Utf16CharacterReader reader(text); !reader.IsAtEnd(); )
    {
        char32_t ch = reader.ReadNext();

        if (IsCharacterBeyondBmp(ch))
        {
            // Write surrogate pair.
            WriteZeroPaddedHexNum(ch, /*out*/ longDigitRange);
            escapedText.append(std::begin(longEscapedSequence), std::end(longEscapedSequence));
        }
        else // Single UTF-16 code unit.
        {
            WriteZeroPaddedHexNum(ch, /*out*/ shortDigitRange);
            escapedText.append(std::begin(shortEscapedSequence), std::end(shortEscapedSequence));
        }
    }
 }


 void EscapeHtmlNamedCharacterReferences(
    std::span<char16_t const> text,
    /*out*/ std::u16string& escapedText
 )
 {
    constexpr size_t escapePrefixLength = 3; // '&#x'
    constexpr size_t shortEscapeDigitLength = 4;
    constexpr size_t longEscapeDigitLength = 8;
    constexpr size_t escapeSuffixLength = 1; // ;
    char16_t shortEscapedSequence[8] = {'&','#','x','0','0','0','0',';'};
    char16_t longEscapedSequence[12] = {'&','#','x','0','0','0','0','0','0','0','0',';'};

    escapedText.clear();
    escapedText.reserve(text.size() * std::size(shortEscapedSequence));
    std::span<char16_t> shortDigitRange(shortEscapedSequence + escapePrefixLength, shortEscapedSequence + escapePrefixLength + shortEscapeDigitLength);
    std::span<char16_t> longDigitRange(longEscapedSequence + escapePrefixLength, longEscapedSequence + escapePrefixLength + longEscapeDigitLength);

    for (Utf16CharacterReader reader(text); !reader.IsAtEnd(); )
    {
        char32_t ch = reader.ReadNext();

        if (IsCharacterBeyondBmp(ch))
        {
            // Write surrogate pair.
            WriteZeroPaddedHexNum(ch, /*out*/ longDigitRange);
            escapedText.append(std::begin(longEscapedSequence), std::end(longEscapedSequence));
        }
        else // Single UTF-16 code unit.
        {
            WriteZeroPaddedHexNum(ch, /*out*/ shortDigitRange);
            escapedText.append(std::begin(shortEscapedSequence), std::end(shortEscapedSequence));
        }
    }
 }


 void UnescapeHtmlNamedCharacterReferences(std::span<char16_t const> escapedText, /*out*/ std::u16string& expandedText)
 {
    minimal_span<char16_t const> currentEscapedText(escapedText);
    expandedText.clear();
    expandedText.reserve(currentEscapedText.size());

    while (!currentEscapedText.empty())
    {
        char16_t ch = currentEscapedText.consume_front();

        // Check escape codes.
        if (ch == '&' && !currentEscapedText.empty())
        {
            char32_t replacement = L'&';
            char16_t const* escapeStart = currentEscapedText.data();
            char16_t const* escapeEnd = escapeStart;

            // Only numeric escapes are supported: &#1234;&#x1A2B;
            // Not named ones: &amp;
            if (*escapeStart == '#')
            {
                uint32_t radix = 10; // Assume decimal, unless 'x' follows.
                ++escapeStart;
                if (escapeStart < currentEscapedText.data_end() && *escapeStart == 'x')
                {
                    radix = 16; // Hexadecimal.
                    ++escapeStart;
                }

                // Parse the number, and replacing on error with just a '\' to preserve original text.
                std::span<char16_t const> digitSpan = {escapeStart, currentEscapedText.end()};
                replacement = ReadUnsignedNumericValue(/*inout*/ digitSpan, radix);

                // Successful if the digits were not empty and a semicolon was present.
                if (digitSpan.data() > currentEscapedText.data() && !digitSpan.empty() && digitSpan.front() == ';')
                {
                    currentEscapedText = {digitSpan.data() + 1, currentEscapedText.data_end()}; // After the semicolon.
                }
                else // Parse error. So restore '\' to preserve original text.
                {
                    replacement = L'\\';
                }
            }

            if (IsCharacterBeyondBmp(replacement))
            {
                expandedText.push_back(GetLeadingSurrogate(replacement));
                expandedText.push_back(GetTrailingSurrogate(replacement));
            }
            else
            {
                expandedText.push_back(char16_t(replacement));
            }
        }
        else // Just append ordinary code unit.
        {
            expandedText.push_back(ch);
        }
    }
 }


 void EscapeFilenameCharactersPercentEncoding(
    std::span<char8_t const> text,
    /*out*/ std::u8string& escapedText,
    char8_t escapeCharacter = '%' // $ might be another useful option, for Javascript variable names.
 )
 {
    escapedText.clear();
    escapedText.reserve(text.size());

    constexpr size_t escapePrefixLength = 1; // '%'
    constexpr size_t shortEscapeDigitLength = 2;
    char8_t shortEscapedSequence[3] = {escapeCharacter,'0','0'};
    std::span<char8_t> shortDigitRange(shortEscapedSequence + escapePrefixLength, shortEscapedSequence + escapePrefixLength + shortEscapeDigitLength);

    for (char8_t ch : text)
    {
        switch (ch)
        {
        case '*':
        case '?':
        case '/':
        case '|':
        case '\\':
        case ':':
        case '<':
        case '>':
        case '"':
            WriteZeroPaddedHexNum(ch, /*out*/ shortDigitRange);
            escapedText.append(std::begin(shortEscapedSequence), std::end(shortEscapedSequence));
            break;

        default:
            escapedText.push_back(ch);
            break;
        }
    }
 }


 void UnescapeFilenameCharactersPercentEncoding(
    std::span<char8_t const> escapedText,
    /*out*/ std::u8string& expandedText,
    char8_t escapeCharacter = '%' // $ might be another useful option, for Javascript variable names.
 )
 {
    expandedText.clear();
    expandedText.reserve(escapedText.size());
    minimal_span<char8_t const> currentEscapedText(escapedText);

    constexpr size_t escapePrefixLength = 1; // '%'
    constexpr size_t shortEscapeDigitLength = 2;

    while (!currentEscapedText.empty())
    {
        char8_t ch = currentEscapedText.consume_front();

        // Read the following two digit hex code (e.g. hello%2Fworld -> hello/world).
        if (ch == escapeCharacter)
        {
            std::span<char8_t const> digitSpan = ClampedSubspan<char8_t const>(currentEscapedText, escapePrefixLength, shortEscapeDigitLength);

            //currentEscapedText.subspan_clamped(escapePrefixLength, shortEscapeDigitLength);
            char32_t replacement = ReadUnsignedNumericValue(/*inout*/ digitSpan, /*radix*/ 16);
            if (digitSpan.size() == shortEscapeDigitLength)
            {
                ch = char8_t(replacement);
                currentEscapedText.remove_prefix(shortEscapeDigitLength);
            }
        }
        expandedText.push_back(ch);
    }
 }


 _Out_range_(0, utf32text.end_ - utf32text.begin_)
 size_t ConvertTextUtf16ToUtf32(
    std::span<char16_t const> utf16text,
    /*out*/ std::span<char32_t> utf32text,
    _Out_opt_ size_t* sourceCount
 ) noexcept
 {
    // Convert all code points, substituting the replacement character for unpaired surrogates.

    Utf16CharacterReader reader(utf16text);
    size_t utf32count = utf32text.size();
    size_t utf32index = 0;

    for (; !reader.IsAtEnd() && utf32index < utf32count; ++utf32index)
    {
        char32_t ch = reader.ReadNext();
        utf32text[utf32index] = ch;
    }

    // Return how many UTF-16 code units and UTF-32 units were read/written.
    // Might have more UTF16 code units than UTF32, but never the other way around.

    if (sourceCount != nullptr)
        *sourceCount = reader.size();

    return utf32index;
 }


 _Out_range_(0, utf32text.end_ - utf32text.begin_)
 size_t ConvertTextUtf16ToUtf32NoReplacement(
    std::span<char16_t const> utf16text,
    /*out*/ std::span<char32_t> utf32text,
    _Out_opt_ size_t* sourceCount
 ) noexcept
 {
    // Can have more UTF16 characters than UTF32,
    // but never the other way around.

    Utf16CharacterReader reader(utf16text);
    size_t const utf32count = utf32text.size();
    size_t utf32index = 0;

    for (; !reader.IsAtEnd() && utf32index < utf32count; ++utf32index)
    {
        utf32text[utf32index] = reader.ReadNextNoReplacement();
    }

    if (sourceCount != nullptr)
        *sourceCount = reader.size();

    return utf32index;
 }


 _Out_range_(0, utf16text.end_ - utf16text.begin_)
 size_t ConvertTextUtf32ToUtf16(
    std::span<char32_t const> utf32text,
    /*out*/ std::span<char16_t> utf16text,
    _Out_opt_ size_t* sourceCount
 ) noexcept
 {
    size_t const utf32count = utf32text.size();
    size_t utf32index = 0;

    Utf16CharacterWriter writer(utf16text);
    for (; !writer.IsAtEnd() && utf32index < utf32count; ++utf32index)
    {
        writer.WriteNext(utf32text[utf32index]);
    }

    if (sourceCount != nullptr)
        *sourceCount = utf32index;

    return writer.size();
 }


 _Out_range_(0, utf16text.end_ - utf16text.begin_)
 size_t ConvertTextUtf32ToUtf8(
    std::span<char32_t const> utf32text,
    /*out*/ std::span<char8_t> utf8text,
    _Out_opt_ size_t* sourceCount
 ) noexcept
 {
    size_t const utf32count = utf32text.size();
    size_t utf32index = 0;

    Utf8CharacterWriter writer(utf8text);
    for (; !writer.IsAtEnd() && utf32index < utf32count; ++utf32index)
    {
        writer.WriteNext(utf32text[utf32index]);
    }

    if (sourceCount != nullptr)
        *sourceCount = utf32index;

    return writer.size();
 }


 void ConvertTextUtf8ToUtf16(
    std::span<char8_t const> utf8text,
    /*out*/ std::u16string& utf16text
 )
 {
    // This function can only throw if out-of-memory when resizing utf16text.
    // If utf16text is already reserve()'d, no exception will happen.

    Utf8CharacterReader reader(utf8text);
    reader.SkipBom();
    utf16text.resize(reader.size());  // UTF-16 (1-2 code units) will always have equal or fewer code units than UTF-8 (1-4 code units).
    Utf16CharacterWriter writer(utf16text);

    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf16text.resize(writer.size());  // Shrink back to actual size.
 }


 void ConvertTextUtf8ToUtf32(
    std::u8string_view utf8text,
    /*out*/ std::u32string& utf32text
 )
 {
    // This function can only throw if out-of-memory when resizing u32string.
    // If u32string is already reserve()'d, no exception will happen.

    Utf8CharacterReader reader(utf8text);
    reader.SkipBom();
    utf32text.resize(reader.size());  // UTF-8 (1-4 code units) will always have equal or fewer code units than UTF-32 (1 code unit).
    Utf32CharacterWriter writer(utf32text);

    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf32text.resize(writer.size());  // Shrink back to actual size.
 }


 void ConvertTextUtf16ToUtf8(
    std::span<char16_t const> utf16text,
    /*out*/ std::u8string& utf8text
 )
 {
    // C++ deprecated codecvt_utf8_utf16 but offered no successor in its place.
    //
    // std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> g_converterToUtf8;
    // std::string temporary = g_converterToUtf8.to_bytes(source.data(), source.data() + source.size());
    //
    // So implement it directly.

    // Allow double the space for the output, as UTF-16 can't yield
    // more than double the UTF-8 unit count:
    //
    // Code point               UTF-16  UTF-8   Factor
    //                          code unit count
    //    U+0 through     U+7F  1       1       1x
    //   U+80 through    U+7FF  1       2       2x
    //  U+800 through   U+FFFF  2       3       1.5x
    // U+0800 through U+10FFFF  2       4       2x

    utf8text.resize(utf16text.size() * 2);  // Preallocate up to 2 UTF-8 code units per UTF-16 code unit.
    Utf16CharacterReader reader(utf16text);
    Utf8CharacterWriter writer(utf8text);

    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf8text.resize(writer.size());
 }


 void ConvertTextUtf16ToUtf32(
    std::span<char16_t const> utf16text,
    /*out*/ std::u32string& utf32text
 )
 {
    utf32text.resize(utf16text.size());  // UTF-32 (1 code unit) will always have equal or fewer code units than UTF-16 (1-2 code units).
    Utf16CharacterReader reader(utf16text);
    Utf32CharacterWriter writer(utf32text);

    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf32text.resize(writer.size());
 }


 void ConvertTextUtf32ToUtf8(
    std::span<char32_t const> utf32text,
    /*out*/ std::u8string& utf8text
 )
 {
    utf8text.resize(utf32text.size() * 4);  // UTF-8 could expand up to 4 code units.
    Utf32CharacterReader reader(utf32text);
    Utf8CharacterWriter writer(utf8text);
  
    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf8text.resize(writer.size());
 }


 void ConvertTextUtf32ToUtf16(
    std::span<char32_t const> utf32text,
    /*out*/ std::u16string& utf16text
 )
 {
    utf16text.resize(utf32text.size() * 2);
    Utf32CharacterReader reader(utf32text);
    Utf16CharacterWriter writer(utf16text);

    while (!reader.IsAtEnd())
    {
        assert(!writer.IsAtEnd());
        writer.WriteNext(reader.ReadNext());
    }
    utf16text.resize(writer.size());
 }


 inline std::u16string ToUtf16String(std::span<char8_t const> source)
 {
    std::u16string dest;
    ConvertTextUtf8ToUtf16(source, dest);
    return dest;
 }


 inline std::u8string ToUtf8String(std::span<char16_t const> source)
 {
    std::u8string dest;
    ConvertTextUtf16ToUtf8(source, dest);
    return dest;
 }


 #ifdef _WIN32
 inline std::u8string ToUtf8String(std::span<wchar_t const> source)
 {
    std::u8string dest;
    ConvertTextUtf16ToUtf8(reinterpret_span<char16_t const>(source), dest);
    return dest;
 }
 #endif // _WIN32


 struct StringAndIndex
 {
    char8_t const* text; // Null terminated.
    uint32_t index;
 };


 std::optional<uint32_t> TryMapStringToIndex(std::u8string_view text, std::span<const StringAndIndex> list) noexcept
 {
    for (StringAndIndex const& item : list)
    {
        if (item.text == text)
        {
            return item.index;
        }
    }

    return {};
 }


 template<typename T>
 std::optional<T> TryMapStringToIndex(std::u8string_view mode, std::span<const StringAndIndex> nameAndIndexList) noexcept
 {
    static_assert(sizeof(T) == sizeof(uint32_t));
    auto result = TryMapStringToIndex(mode, nameAndIndexList);
    return *reinterpret_cast<std::optional<T>*>(std::addressof(result));
 }


 template<typename T>
 T MapStringToIndex(std::u8string_view mode, std::span<const StringAndIndex> nameAndIndexList, T defaultValue) noexcept
 {
    auto result = TryMapStringToIndex(mode, nameAndIndexList);
    return result ? T(*result) : defaultValue;
 }


 template <typename CharType>
 void ToLowercase(/*inout*/ std::span<CharType> text) noexcept
 {
    for (CharType& c : text)
    {
        // TODO: Extend this to other languages besides English?
        // It's currently only used for English keywords, and salient case conversions are 1:1 (the German double S is irrelevant now)
        c = static_cast<CharType>(::tolower(c));
    }
 }

 void ToLowercase(/*inout*/ std::span<char> text) { return ToLowercase<char>(text); };
 void ToLowercase(/*inout*/ std::span<char8_t> text) { return ToLowercase<char8_t>(text); };
 void ToLowercase(/*inout*/ std::span<char16_t> text) { return ToLowercase<char16_t>(text); };


 template <typename CharType>
 void ToUpperCase(/*inout*/ std::span<CharType> text)
 {
    for (CharType& c : text)
    {
        c = static_cast<CharType>(::toupper(c));
    }
 }

 void ToUpperCase(/*inout*/ std::span<char> text) { return ToUpperCase<char>(text); };
 void ToUpperCase(/*inout*/ std::span<char8_t> text) { return ToUpperCase<char8_t>(text); };
 void ToUpperCase(/*inout*/ std::span<char16_t> text) { return ToUpperCase<char16_t>(text); };


 std::optional<std::u8string_view> TryMapIndexToString(uint32_t index, std::span<const StringAndIndex> nameAndIndexList) noexcept
 {
    for (auto& nameAndIndex : nameAndIndexList)
    {
        if (nameAndIndex.index == index)
        {
            return nameAndIndex.text;
        }
    }

    return {};
 }


 template<typename T>
 std::optional<T> TryMapIndexToString(T index, std::span<const StringAndIndex> nameAndIndexList) noexcept
 {
    static_assert(sizeof(T) == sizeof(uint32_t));
    return TryMapIndexToString(static_cast<uint32_t>(index), nameAndIndexList);
 }


 template<typename T>
 std::u8string_view MapIndexToString(T index, std::span<const StringAndIndex> nameAndIndexList, std::u8string_view defaultValue) noexcept
 {
    auto result = TryMapIndexToString(uint32_t(index), nameAndIndexList);
    return result ? *result : defaultValue;
 }


 export uint32_t MapStringSuffixIcaseToIndex(
    std::u8string_view text,
    std::span<const StringAndIndex> nameAndIndexList,
    uint32_t defaultValue
 )
 {
    std::u8string lowerCaseFilename(text.begin(), text.end());
    ToLowercase(/*inout*/ lowerCaseFilename);
    for (auto const& entry : nameAndIndexList)
    {
        if (lowerCaseFilename.ends_with(entry.text))
        {
            return entry.index;
        }
    }

    return defaultValue;
 }


 template <typename StringType, typename StringCharType>
 void TrimSpaces(/*inout*/ StringType& text, /*nullterminated*/ StringCharType const* spaces)
 {
    // Trim space (U+0020) and tab. It does not trim all whitespace, like U+200X
    // or the new line controls.

    // Trim trailing spaces
    size_t lastPos = text.find_last_not_of(spaces);
    if (lastPos != std::string::npos)
    {
        text.erase(lastPos + 1);
    }

    // Trim leading spaces
    size_t firstPos = text.find_first_not_of(spaces);
    if (firstPos != 0)
    {
        if (firstPos == std::string::npos)
            firstPos = text.size();
        text.erase(0, firstPos);
    }
 }

 void TrimSpaces(/*inout*/ std::string text) { return TrimSpaces<std::string>(text, " \t"); }
 void TrimSpaces(/*inout*/ std::u8string text) { return TrimSpaces<std::u8string>(text, u8" \t"); }
 void TrimSpaces(/*inout*/ std::u16string& text) { return TrimSpaces<std::u16string>(text, u" \t"); }

 template <typename StringType>
 void UnquoteString(/*inout*/ StringType& text)
 requires requires (StringType& text) { text.empty(); text.back(); text.pop_back(); text.front(); text.erase(); }
 {
    if (text.empty())
        return;

    if (text.back() == '\"')
    {
        text.pop_back();
    }

    if (text.empty())
        return;

    if (text.front() == '\"')
    {
        text.erase(0, 1);
    }
 }

 void UnquoteString(/*inout*/ std::string& text) { return UnquoteString<std::string>(text); }
 void UnquoteString(/*inout*/ std::u8string& text) { return UnquoteString<std::u8string>(text); }
 void UnquoteString(/*inout*/ std::u16string& text) { return UnquoteString<std::u16string>(text); }


 // Useful for reconcatenating main's argc and argv[].
 template <typename CharType, typename StringType, typename ViewType>
 StringType ConcatenateStrings(std::span<CharType const* const> stringList, ViewType delimiter)
 {
    StringType concatenatedString;
    for (CharType const* s : stringList)
    {
        if (!concatenatedString.empty())
        {
            concatenatedString.append(delimiter);
        }
        concatenatedString.append(s);
    }

    return concatenatedString;
 }


 std::string ConcatenateStrings(std::span<char const* const> stringList, std::string_view delimiter = " ")
 {
    return ConcatenateStrings<char, std::string, std::string_view>(stringList, delimiter);
 }


 std::u8string ConcatenateStrings(std::span<char8_t const* const> stringList, std::u8string_view delimiter = u8" ")
 {
    return ConcatenateStrings<char8_t, std::u8string, std::u8string_view>(stringList, delimiter);
 }


 std::u16string ConcatenateStrings(std::span<char16_t const* const> stringList, std::u16string_view delimiter = u" ")
 {
    return ConcatenateStrings<char16_t, std::u16string, std::u16string_view>(stringList, delimiter);
 }

 // Helper typedef class to resolve a code unit to type the respective reader type.
 // Because we're only targeting Unicode (no Shift-JIS, Big 5, or other), then we can simplify
 // the specialization by code unit byte size.
 template <size_t CharacterTypeByteSize> struct CharacterReaderResolver;
 template <> struct CharacterReaderResolver<1> { using Type = Utf8CharacterReader; };
 template <> struct CharacterReaderResolver<2> { using Type = Utf16CharacterReader; };
 template <> struct CharacterReaderResolver<4> { using Type = Utf32CharacterReader; };

 // Compare two Unicode strings of possibly different encodings.
 template <typename CharTypeA, typename CharTypeB>
 bool CompareStringSpans(std::span<CharTypeA const> a, std::span<CharTypeB const> b)
 {
    if constexpr (sizeof(CharTypeA) == sizeof(CharTypeB))
    {
        // Can just compare code units directly, since they are the same size.
        return std::equal(a.data(), a.data() + a.size(), b.data(), b.data() + b.size());
    }
    else // Strings are heterogeneous Unicode encodings.
    {
        typename CharacterReaderResolver<sizeof(CharTypeA)>::Type readerA(a);
        typename CharacterReaderResolver<sizeof(CharTypeB)>::Type readerB(b);

        while (true)
        {
            if (bool aIsDone = readerA.IsAtEnd(), bIsDone = readerB.IsAtEnd(); aIsDone || bIsDone)
            {
                return aIsDone == bIsDone; // Return false if reached the end of one string before the other.
            }
            if (readerA.ReadNext() != readerB.ReadNext())
            {
                return false;
            }
        }
        return true;
    }
 }

 /*
 Usage:
    std::string a = "Hello"; // Encoding is actually UTF-8.
    std::u8string b = u8"Hello";
    bool result = StringHelpers::CompareStrings(a, b);

    std::wstring a = L"Hello";
    std::u8string b = u8"Hello";
    bool result = StringHelpers::CompareStrings(a, b);

    std::u32string a = U"Hello";
    std::string b = "Hello";
    bool result = StringHelpers::CompareStrings(a, b);

    std::u16string a = u"Hello";
    char32_t b[5] = {'H','e','l','l','o'};
    bool result = StringHelpers::CompareStrings(a, std::spanb);
 */
 template <typename StringTypeA, typename StringTypeB>
 inline bool CompareStrings(StringTypeA const& a, StringTypeB const& b)
 // TODO: Figure out why requires with std::data doesn't work as expected on raw C arrays.
 //requires requires (StringTypeA a, StringTypeB b) { std::data(a); std::size(a); std::data(b); std::size(b); }
 {
    // Sadly we can't just rely on template type deduction here because mutable spans then
    // no CompareStringSpans overload because CompareStringSpans expects span<T const>.
    using CharTypeA = std::remove_reference_t<decltype(*std::data(a))>;
    using CharTypeB = std::remove_reference_t<decltype(*std::data(b))>;
    return CompareStringSpans(std::span<CharTypeA const>(a), std::span<CharTypeB const>(b));
 }

 } // namespace StringHelpers
No results found