Skip to content

Instantly share code, notes, and snippets.

@Chen-tao
Last active January 5, 2017 07:34
Show Gist options
  • Save Chen-tao/098ee6daefa41adb6481a183c6ee32d9 to your computer and use it in GitHub Desktop.
Save Chen-tao/098ee6daefa41adb6481a183c6ee32d9 to your computer and use it in GitHub Desktop.
// 根据Unicode编码完美的判断中文汉字和符号
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
//------以下是特殊字符集,数学.科学.工程,光学等.
|| ub == Character.UnicodeBlock.GREEK
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.LATIN_1_SUPPLEMENT
|| ub == Character.UnicodeBlock.LATIN_EXTENDED_A
|| ub == Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
|| ub == Character.UnicodeBlock.LATIN_EXTENDED_B
|| ub == Character.UnicodeBlock.SMALL_FORM_VARIANTS
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
|| ub == Character.UnicodeBlock.LETTERLIKE_SYMBOLS
|| ub == Character.UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS
|| ub == Character.UnicodeBlock.ARABIC
|| ub == Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
|| ub == Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
|| ub == Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
|| ub == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY
|| ub == Character.UnicodeBlock.CONTROL_PICTURES
|| ub == Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
|| ub == Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
|| ub == Character.UnicodeBlock.COMBINING_HALF_MARKS
|| ub == Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
|| ub == Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
|| ub == Character.UnicodeBlock.AEGEAN_NUMBERS
|| ub == Character.UnicodeBlock.NUMBER_FORMS
|| ub == Character.UnicodeBlock.SINHALA
|| ub == Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
|| ub == Character.UnicodeBlock.ARROWS
|| ub == Character.UnicodeBlock.SUPPLEMENTAL_ARROWS_A
|| ub == Character.UnicodeBlock.SUPPLEMENTAL_ARROWS_B
|| ub == Character.UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS
|| ub == Character.UnicodeBlock.LETTERLIKE_SYMBOLS
|| ub == Character.UnicodeBlock.MATHEMATICAL_OPERATORS
|| ub == Character.UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS
|| ub == Character.UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
|| ub == Character.UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
|| ub == Character.UnicodeBlock.GEOMETRIC_SHAPES
|| ub == Character.UnicodeBlock.BOX_DRAWING
|| ub == Character.UnicodeBlock.BLOCK_ELEMENTS
|| ub == Character.UnicodeBlock.BRAILLE_PATTERNS
|| ub == Character.UnicodeBlock.CURRENCY_SYMBOLS
|| ub == Character.UnicodeBlock.DINGBATS
|| ub == Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
|| ub == Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS
|| ub == Character.UnicodeBlock.MUSICAL_SYMBOLS
|| ub == Character.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS
|| ub == Character.UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS
|| ub == Character.UnicodeBlock.TAI_XUAN_JING_SYMBOLS
|| ub == Character.UnicodeBlock.SPECIALS
|| ub == Character.UnicodeBlock.SPECIALS
|| ub == Character.UnicodeBlock.TAGS
|| ub == Character.UnicodeBlock.VARIATION_SELECTORS
|| ub == Character.UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
|| ub == Character.UnicodeBlock.PRIVATE_USE_AREA
|| ub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A
|| ub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B
|| ub == Character.UnicodeBlock.HIGH_SURROGATES
|| ub == Character.UnicodeBlock.LOW_SURROGATES
|| ub == Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
|| c == 12539
|| c == 12541
|| c == 7620
|| c == 3655
|| c == 3665
|| c == 3636
|| c == 3633
|| c == 1054
|| c == 3056
|| c == 12443
|| c == 12444
|| c == 3237
|| c == 3589
|| c == 3591
|| c == 1079
|| c == 7618
|| c == 12615
|| c == 3766
|| c == 1086
|| c == 3232
|| c == 1076
|| c == 6094
|| c == 652
|| c == 41460
|| c == 7500
|| c == 5051
|| c == 3677
|| c == 8034
|| c == 5596
|| c == 3667
|| c == 8034
|| c == 5596
|| c == 3567
|| c == 664
|| c == 666
|| c == 3662
|| c == 661
|| c == 7506
|| c == 3642
|| c == 1161
|| c == 1160
|| c == 41370
|| c == 3637
|| c == 1374
|| c == 4041
|| c == 5649
|| c == 1285
|| c == 3970
|| c == 1342
|| c == 3977
|| c == 5147
|| c == 12542
|| c == 12540
|| c == 3964
|| c == 3954
|| c == 12494
|| c == 42154
|| c == 42155
|| c == 12511
|| c == 12506
|| c == 12505
|| c == 12504
|| c == 12502
|| c == 42163
|| c == 1426
|| c == 2370
|| c == 3874
|| c == 12472
|| c == 3899
|| c == 3902
|| c == 3875
|| c == 50724
|| c == 12471
|| c == 12105
|| c == 2403
|| c == 4314
|| c == 3863
|| (c >=0 && c<=255) ) {
return true;
}
return false;
}
// 完整的判断中文汉字和符号和英文
public static boolean isChinese(String strName) {
char[] ch = strName.toCharArray();
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!isChinese(c)) {
throw new Exception(10000190,
"text:"+strName+"noChinese Char:"+c
+" int:"+(int)c
+" Unicode:"+Character.UnicodeBlock.of(c));
}
}
return true;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment