Created
June 20, 2013 14:50
-
-
Save yamamushi/5823402 to your computer and use it in GitHub Desktop.
Int to UTF-8 String function (C++)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// IntToUTF8String.cpp | |
// The ASCII Project | |
// | |
// Created by Yamamushi on 6/20/13. | |
// Copyright (c) 2013 TAP. All rights reserved. | |
// | |
#include <string> | |
std::string IntToUTF8String(int convertMe){ | |
// We only care about plane 1 right now, | |
// but know that we have other options (0x10FFFF) | |
// Technically UTF-8 is "limited" to 4 bytes, so it's not | |
// Like it matters much anyways these days | |
if(convertMe == 0) | |
return " "; | |
if( (convertMe <= 0x7F) && (convertMe > 0x00) ){ | |
std::string out("."); // We shove 1 character into the string to act as a buffer for the conversion process. | |
std::bitset<8> x(convertMe); | |
unsigned long l = x.to_ulong(); | |
unsigned char c = static_cast<unsigned char>(l); | |
out[0] = c; | |
return out; | |
} else if ( (convertMe >= 0x80) && (convertMe <= 0x07FF) ) { | |
std::string out(".."); // We shove 2 characters into the string to act as a buffer for the conversion process. | |
int firstShift = (convertMe >> 0x06) ^ 0xC0; | |
int secondShift = ((convertMe ^ 0xFFC0) | 0x80) & ~0x40; | |
std::bitset<8> first(firstShift); | |
std::bitset<8> last(secondShift); | |
unsigned long l = first.to_ulong(); | |
unsigned char c = static_cast<unsigned char>(l); | |
out[0] = c; | |
unsigned long ltwo = last.to_ulong(); | |
unsigned char ctwo = static_cast<unsigned char>(ltwo); | |
out[1] = ctwo; | |
return out; | |
} else if( (convertMe >= 0x0800) && (convertMe <= 0xFFFF) ){ | |
std::string out("..."); // We shove 3 characters into the string to act as a buffer for the conversion process. | |
int firstShift = ((convertMe ^ 0xFC0FFF) >> 0x0C) | 0xE0; | |
int secondShift = (((convertMe ^ 0xFFF03F) >> 0x06) | 0x80) & ~0x40; | |
int thirdShift = ((convertMe ^ 0xFFFC0) | 0x80) & ~0x40; | |
std::bitset<8> first(firstShift); | |
std::bitset<8> second(secondShift); | |
std::bitset<8> third(thirdShift); | |
unsigned long lone = first.to_ulong(); | |
unsigned char cone = static_cast<unsigned char>(lone); | |
out[0] = cone; | |
unsigned long ltwo = second.to_ulong(); | |
unsigned char ctwo = static_cast<unsigned char>(ltwo); | |
out[1] = ctwo; | |
unsigned long lthree = third.to_ulong(); | |
unsigned char cthree = static_cast<unsigned char>(lthree); | |
out[2] = cthree; | |
return out; | |
} else{ | |
return " "; // If the integer doesn't represent a valid UTF-8 character, we return a string containing a single space. | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment