Last active
September 29, 2019 10:43
-
-
Save willard1218/620711ec97083504fd1e941c8701121b to your computer and use it in GitHub Desktop.
Using iconv to convert charset.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.cpp | |
// UnicodeEncoding | |
// | |
// Created by willard on 2019/9/25. | |
// Copyright 2019 willard. All rights reserved. | |
// | |
#include <iostream> | |
#include <iconv.h> | |
char *convertCharset(const char *strCharsetOfFromEncoding, | |
const char *strCharsetOfToEncoding, | |
char *strSrc, | |
size_t *outputLen) { | |
iconv_t cd = NULL; | |
cd = iconv_open(strCharsetOfToEncoding, strCharsetOfFromEncoding); | |
if (cd == (iconv_t)-1) | |
{ | |
perror("iconv_open"); | |
return NULL; | |
} | |
size_t srcStringLen = strlen(strSrc); | |
size_t dstStringLen = srcStringLen * 4; | |
char *strDst = (char *)malloc(sizeof(char) * dstStringLen); | |
memset(strDst, 0, sizeof(char) * dstStringLen); | |
char *srcSrcProcessed = strSrc; | |
char *strDstProcessed = strDst; | |
size_t ret = iconv(cd, &srcSrcProcessed, &srcStringLen, &strDstProcessed, &dstStringLen); | |
if (ret == -1) { | |
perror("iconv error"); | |
return NULL; | |
} | |
iconv_close(cd); | |
*outputLen = strDstProcessed - strDst; | |
char *strDstNew = (char *)malloc(sizeof(char) * *outputLen); | |
strncpy(strDstNew, strDst, *outputLen); | |
free(strDst); | |
return strDstNew; | |
} | |
wchar_t *charToWChar(char *str, size_t len) { | |
size_t newLen = sizeof(wchar_t) * (len / 2); | |
wchar_t *result = (wchar_t *)malloc(newLen); | |
memset(result, 0, newLen); | |
int count = 0; | |
for (int i = 0; i < len; i += 2) { | |
unsigned int highByte = (str[i] & 0xff) << 8; | |
unsigned int lowByte = (str[i+1] & 0xff); | |
wchar_t ch = lowByte | highByte; | |
result[count++] = ch; | |
} | |
return result; | |
} | |
void printHexdump(char *arr, int size) { | |
const unsigned int bytesOfType = sizeof( char); | |
const int numOfItemInRow = (0x10 / bytesOfType); | |
printf("=================================\n"); | |
int count = 0; | |
for (int i = 0; i <= size / numOfItemInRow; i++) { | |
printf("%06x ", i); | |
for (int j = 0; j < numOfItemInRow && count < size; j++) { | |
printf("%02x ", 0xff & arr[count++]); | |
} | |
printf("\n"); | |
} | |
printf("\n\n\n"); | |
printf("array size : %d\n", size); | |
printf("total %d bytes\n", size * bytesOfType); | |
} | |
#include <unistd.h> | |
#include<iostream> | |
#include<fstream> | |
using namespace std; | |
void convertUTF8ToUTF16(const char *srcFilePath, const char *dstFilePath) { | |
ofstream out(dstFilePath); | |
fstream fin; | |
fin.open(srcFilePath,ios::in); | |
char line[1024] = {0}; | |
while(fin.getline(line,sizeof(line))){ | |
line[strlen(line)] = '\n'; | |
size_t outputLength = 0; | |
char *dst = convertCharset("utf8", "ucs-2", line, &outputLength); | |
out.write(dst, outputLength); | |
free(dst); | |
memset(line, 0, sizeof(line)); | |
} | |
out.close(); | |
fin.close(); | |
} | |
int main(int argc, const char * argv[]) { | |
int n = 10; | |
int arr[n]; | |
memset(arr,1,n*sizeof(int)); | |
int k = arr[1]; | |
int x =0 ; | |
typeof(x) _min1 = (x); | |
char cwd[PATH_MAX] = {0}; | |
if (getcwd(cwd, sizeof(cwd)) != NULL) { | |
printf("Current working dir: %s\n", cwd); | |
} | |
convertUTF8ToUTF16("test", "output2.txt"); | |
char str[] = "中文123|||"; | |
size_t length = 0; | |
char *dst = convertCharset("utf8", "ucs-2", str, &length); | |
printHexdump(dst, length); | |
wchar_t *wchar = charToWChar(dst, length); | |
// printf("%s\n", str); | |
wprintf(L"%ls\n", wchar); | |
std::cout << "Hello, World!\n"; | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment