Created
July 7, 2012 16:27
-
-
Save flying19880517/3067078 to your computer and use it in GitHub Desktop.
检查文件编码是否是utf-8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bool MainWindow::isUtf8File(QIODevice *file) | |
{ | |
const int testSize = 1024; | |
char str[testSize];/// | |
int size = file->peek(str, testSize); | |
// char buf[3]; | |
// if (f->peek(buf, sizeof(buf)) == sizeof(buf)) | |
// return (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF); | |
int encodingBytesCount = 0; | |
bool allTextsAreASCIIChars = true; | |
for (int i = 0; i < size; ++i){ | |
char current = str[i]; | |
if ((current & 0x80) == 0x80) | |
allTextsAreASCIIChars = false; | |
// First byte | |
if (encodingBytesCount == 0){ | |
if ((current & 0x80) == 0) | |
continue;// ASCII chars, from 0x00-0x7F | |
if ((current & 0xC0) == 0xC0){ | |
encodingBytesCount = 1; | |
current <<= 2; | |
// More than two bytes used to encoding a unicode char. | |
// Calculate the real length. | |
while ((current & 0x80) == 0x80){ | |
current <<= 1; | |
++encodingBytesCount; | |
} | |
}else{ | |
// Invalid bits structure for UTF8 encoding rule. | |
return false; | |
} | |
}else{ | |
// Following bytes, must start with 10. | |
if ((current & 0xC0) == 0x80) | |
--encodingBytesCount; | |
else | |
return false; | |
} | |
} | |
// if(encodingBytesCount != 0) | |
// { | |
// // Invalid bits structure for UTF8 encoding rule. | |
// // Wrong following bytes count. | |
// return false; | |
// } | |
// Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding. | |
return !allTextsAreASCIIChars; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment