Skip to content

Instantly share code, notes, and snippets.

@siberex
Last active July 15, 2025 20:55
Show Gist options
  • Save siberex/94886ca5eec99e964cd5a45c703eaf53 to your computer and use it in GitHub Desktop.
Save siberex/94886ca5eec99e964cd5a45c703eaf53 to your computer and use it in GitHub Desktop.
[FAILED] Attempt to implement string toUpperCase with LLM
#include <cctype>
#include <cstdint>
#include <iostream>
#include <string>
#include <string_view>
static char32_t to_upper(char32_t cp) {
if (cp >= 'a' && cp <= 'z') {
return cp - ('a' - 'A');
}
if (cp >= 0x00E0 && cp <= 0x00FE && cp != 0x00F7) {
return cp - 0x20;
}
if (cp == 0x00FF) {
return 0x0178;
}
if (cp >= 0x0100 && cp <= 0x017F) {
if (cp & 1) {
return cp - 1;
}
}
switch (cp) {
case 0x0131:
return 'I';
case 0x017F:
return 'S';
default:
break;
}
return cp;
}
std::string_view toUpperCase(const std::string_view &str) {
thread_local std::string buffer;
buffer.clear();
buffer.reserve(str.size() * 4);
auto it = str.begin();
while (it != str.end()) {
unsigned char c = static_cast<unsigned char>(*it);
if (c < 0x80) {
buffer.push_back(static_cast<char>(std::toupper(c)));
++it;
} else {
int n = 0;
if ((c & 0xE0) == 0xC0) {
n = 2;
} else if ((c & 0xF0) == 0xE0) {
n = 3;
} else if ((c & 0xF8) == 0xF0) {
n = 4;
} else {
buffer.push_back(*it);
++it;
continue;
}
if (static_cast<std::size_t>(std::distance(it, str.end())) < static_cast<std::size_t>(n)) {
while (it != str.end()) {
buffer.push_back(*it);
++it;
}
break;
}
char32_t code_point = 0;
bool valid = true;
auto next_it = it;
if (n >= 2) {
++next_it;
if (next_it == str.end() || (static_cast<unsigned char>(*next_it) & 0xC0) != 0x80) {
valid = false;
}
}
if (n >= 3 && valid) {
++next_it;
if (next_it == str.end() || (static_cast<unsigned char>(*next_it) & 0xC0) != 0x80) {
valid = false;
}
}
if (n >= 4 && valid) {
++next_it;
if (next_it == str.end() || (static_cast<unsigned char>(*next_it) & 0xC0) != 0x80) {
valid = false;
}
}
if (!valid) {
buffer.push_back(*it);
++it;
continue;
}
next_it = it;
if (n == 2) {
code_point = (static_cast<unsigned char>(*next_it) & 0x1F) << 6;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F);
} else if (n == 3) {
code_point = (static_cast<unsigned char>(*next_it) & 0x0F) << 12;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F) << 6;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F);
} else if (n == 4) {
code_point = (static_cast<unsigned char>(*next_it) & 0x07) << 18;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F) << 12;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F) << 6;
++next_it;
code_point |= (static_cast<unsigned char>(*next_it) & 0x3F);
}
char32_t upper_cp = to_upper(code_point);
if (upper_cp <= 0x7F) {
buffer.push_back(static_cast<char>(upper_cp));
} else if (upper_cp <= 0x7FF) {
buffer.push_back(static_cast<char>(0xC0 | (upper_cp >> 6)));
buffer.push_back(static_cast<char>(0x80 | (upper_cp & 0x3F)));
} else if (upper_cp <= 0xFFFF) {
buffer.push_back(static_cast<char>(0xE0 | (upper_cp >> 12)));
buffer.push_back(static_cast<char>(0x80 | ((upper_cp >> 6) & 0x3F)));
buffer.push_back(static_cast<char>(0x80 | (upper_cp & 0x3F)));
} else if (upper_cp <= 0x10FFFF) {
buffer.push_back(static_cast<char>(0xF0 | (upper_cp >> 18)));
buffer.push_back(static_cast<char>(0x80 | ((upper_cp >> 12) & 0x3F)));
buffer.push_back(static_cast<char>(0x80 | ((upper_cp >> 6) & 0x3F)));
buffer.push_back(static_cast<char>(0x80 | (upper_cp & 0x3F)));
} else {
for (int i = 0; i < n; ++i) {
buffer.push_back(*(it + i));
}
}
it += n;
}
}
return buffer;
}
/*
PROMPT:
Write C++23 implementation for string toUpperCase() function for UTF-8 strings. Do not use Boost or ICU.
Function signature: std::string_view toUpperCase(const std::string_view &str);
Use standard library methods like locale::toupper, function should be portable (should compile with either GCC, LLVM Clang or MSVC) and should work in the upcoming C++26 standard (do not use anything that is going to be removed in C++26).
Full Unicode compatibility is not required, but it should correctly convert strings like "naïve" (expected output is "NAÏVE" and not "NAïVE").
It should accept std::string_view (not std::wstring_view) and output std::string_view.
*/
// g++ -std=c++23 toupper_deepseek.cpp -o /tmp/toupper_deepseek && /tmp/toupper_deepseek
int main() {
std::string strTestUpper = "naïve";
std::cout << std::format(
"Narrow string: {0} → {1}\n",
strTestUpper,
toUpperCase(strTestUpper)
);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment