Created
April 1, 2012 12:50
-
-
Save programus/2275148 to your computer and use it in GitHub Desktop.
Functions to convert string(utf-8 in c++) to \uXXXX unicode escape
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// convert utf-8 string to \uXXXX format. | |
// return actual length of the escaped string. | |
// | |
size_t escapeUnicode(const char* fromStr, char* const toStr, const size_t& maxSize) | |
{ | |
// initialize iconv | |
iconv_t cd = iconv_open("UTF-16BE", "UTF-8"); | |
// set iconv parameters | |
int arg = 1; | |
iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg); | |
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg); | |
// get the size to be converted. | |
size_t fromLen = strlen(fromStr) + 1; | |
size_t afromLen = fromLen; | |
// set the size to contain the result as twice of utf-8 length | |
// (if there is all ascii, twice is needed.) | |
size_t toLen = fromLen << 1; | |
// prepare a buffer to contain the UTF-16 string. | |
char* buff = (char*)calloc(toLen, sizeof(char)); | |
// because iconv() function will move the pointer, prepare a backup. | |
char* abuff = buff; | |
size_t size = iconv(cd, (const char**)&fromStr, &afromLen, &abuff, &toLen); | |
iconv_close(cd); | |
// buffer for escaped string | |
char* escBuff = (char*)calloc((fromLen << 1) + 1, sizeof(char)); | |
// size of the escaped string. | |
size_t n = 0; | |
for (size_t i = 0; (buff[i] != 0 || buff[i+1] != 0); i += 2) { | |
const size_t UNI_ESC_SIZE = 6; | |
char uni[UNI_ESC_SIZE + 1] = {0}; // \uXXXX | |
if (buff[i] != 0) { | |
// unicode escape format | |
sprintf(uni, "\\u%02X%02X", (unsigned char)buff[i], (unsigned char)buff[i+1]); | |
// concat escaped unicode | |
memcpy(escBuff + n, uni, UNI_ESC_SIZE); | |
n += UNI_ESC_SIZE; | |
} else { | |
// ascii | |
escBuff[n++] = buff[i + 1]; | |
} | |
} | |
escBuff[n] = '\0'; // terminate the string. | |
strncpy(toStr, escBuff, maxSize); | |
free(buff); | |
buff = 0; | |
free(escBuff); | |
escBuff = 0; | |
return n; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Web; | |
using System.Text; | |
namespace CIS_StdWebServices.Utils | |
{ | |
public class JsonUtilities | |
{ | |
private static Encoding unicode = Encoding.BigEndianUnicode; | |
public static string escapeUnicode(string uniStr) | |
{ | |
byte[] unicodeBytes = unicode.GetBytes(uniStr); | |
StringBuilder sb = new StringBuilder(); | |
for (int i = 0; i < unicodeBytes.Length - 1; i += 2) | |
{ | |
if (unicodeBytes[i] != 0) | |
{ | |
sb. | |
Append("\\u"). | |
Append(unicodeBytes[i].ToString("X2")). | |
Append(unicodeBytes[i + 1].ToString("X2")); | |
} | |
else | |
{ | |
char ascii = (char) unicodeBytes[i + 1]; | |
sb.Append(ascii); | |
} | |
} | |
return sb.ToString(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment