Skip to content

Instantly share code, notes, and snippets.

@otakustay
Created September 18, 2017 06:55
Show Gist options
  • Select an option

  • Save otakustay/bc56fb0a144987b1a7829c26f707a8e0 to your computer and use it in GitHub Desktop.

Select an option

Save otakustay/bc56fb0a144987b1a7829c26f707a8e0 to your computer and use it in GitHub Desktop.
isUTF8.java
public class App {
static boolean isUTF8(int[] buf) {
int i = 0;
int len = buf.length;
while(i < len) {
// UTF8-1 = %x00-7F
if (buf[i] <= 0x7F) {
i++;
continue;
}
// UTF8-2 = %xC2-DF UTF8-tail
if (buf[i] >= 0xC2 && buf[i] <= 0xDF) {
// if(buf[i + 1] >= 0x80 && buf[i + 1] <= 0xBF) {
if (buf[i + 1] >> 6 == 2) {
i += 2;
continue;
} else {
return false;
}
}
// UTF8-3 = %xE0 %xA0-BF UTF8-tail
// UTF8-3 = %xED %x80-9F UTF8-tail
if (
(
(buf[i] == 0xE0 && buf[i + 1] >= 0xA0 && buf[i + 1] <= 0xBF) ||
(buf[i] == 0xED && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x9F)
) && buf[i + 2] >> 6 == 2
) {
i += 3;
continue;
}
// UTF8-3 = %xE1-EC 2( UTF8-tail )
// UTF8-3 = %xEE-EF 2( UTF8-tail )
if (
(
(buf[i] >= 0xE1 && buf[i] <= 0xEC) ||
(buf[i] >= 0xEE && buf[i] <= 0xEF)
) &&
buf[i + 1] >> 6 == 2 &&
buf[i + 2] >> 6 == 2
) {
i += 3;
continue;
}
// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail )
// %xF1-F3 3( UTF8-tail )
// %xF4 %x80-8F 2( UTF8-tail )
if (
(
(buf[i] == 0xF0 && buf[i + 1] >= 0x90 && buf[i + 1] <= 0xBF) ||
(buf[i] >= 0xF1 && buf[i] <= 0xF3 && buf[i + 1] >> 6 == 2) ||
(buf[i] == 0xF4 && buf[i + 1] >= 0x80 && buf[i + 1] <= 0x8F)
) &&
buf[i + 2] >> 6 == 2 &&
buf[i + 3] >> 6 == 2
) {
i += 4;
continue;
}
return false;
}
return true;
}
public static void main(String args[]) {
int[] bytes = {0x20, 0xce, 0xd2, 0xca, 0xc7, 0xcb, 0xad, 0x0a};
boolean result = App.isUTF8(bytes);
System.out.println(result);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment