Skip to content

Instantly share code, notes, and snippets.

@yamamushi
Created June 20, 2013 14:50
Show Gist options
  • Save yamamushi/5823402 to your computer and use it in GitHub Desktop.
Save yamamushi/5823402 to your computer and use it in GitHub Desktop.
Int to UTF-8 String function (C++)
//
// IntToUTF8String.cpp
// The ASCII Project
//
// Created by Yamamushi on 6/20/13.
// Copyright (c) 2013 TAP. All rights reserved.
//
#include <string>
std::string IntToUTF8String(int convertMe){
// We only care about plane 1 right now,
// but know that we have other options (0x10FFFF)
// Technically UTF-8 is "limited" to 4 bytes, so it's not
// Like it matters much anyways these days
if(convertMe == 0)
return " ";
if( (convertMe <= 0x7F) && (convertMe > 0x00) ){
std::string out("."); // We shove 1 character into the string to act as a buffer for the conversion process.
std::bitset<8> x(convertMe);
unsigned long l = x.to_ulong();
unsigned char c = static_cast<unsigned char>(l);
out[0] = c;
return out;
} else if ( (convertMe >= 0x80) && (convertMe <= 0x07FF) ) {
std::string out(".."); // We shove 2 characters into the string to act as a buffer for the conversion process.
int firstShift = (convertMe >> 0x06) ^ 0xC0;
int secondShift = ((convertMe ^ 0xFFC0) | 0x80) & ~0x40;
std::bitset<8> first(firstShift);
std::bitset<8> last(secondShift);
unsigned long l = first.to_ulong();
unsigned char c = static_cast<unsigned char>(l);
out[0] = c;
unsigned long ltwo = last.to_ulong();
unsigned char ctwo = static_cast<unsigned char>(ltwo);
out[1] = ctwo;
return out;
} else if( (convertMe >= 0x0800) && (convertMe <= 0xFFFF) ){
std::string out("..."); // We shove 3 characters into the string to act as a buffer for the conversion process.
int firstShift = ((convertMe ^ 0xFC0FFF) >> 0x0C) | 0xE0;
int secondShift = (((convertMe ^ 0xFFF03F) >> 0x06) | 0x80) & ~0x40;
int thirdShift = ((convertMe ^ 0xFFFC0) | 0x80) & ~0x40;
std::bitset<8> first(firstShift);
std::bitset<8> second(secondShift);
std::bitset<8> third(thirdShift);
unsigned long lone = first.to_ulong();
unsigned char cone = static_cast<unsigned char>(lone);
out[0] = cone;
unsigned long ltwo = second.to_ulong();
unsigned char ctwo = static_cast<unsigned char>(ltwo);
out[1] = ctwo;
unsigned long lthree = third.to_ulong();
unsigned char cthree = static_cast<unsigned char>(lthree);
out[2] = cthree;
return out;
} else{
return " "; // If the integer doesn't represent a valid UTF-8 character, we return a string containing a single space.
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment