Last active
April 5, 2018 09:09
-
-
Save VictorZhang2014/ff1d5d406b672aa1fe0cad55ee9de7eb to your computer and use it in GitHub Desktop.
PinyinUtil for finding the diacritic's position, removing diacritics in a pinyin string or transforming Mandarine to pinyin on iOS platform written by Objective-C
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
+ (void)test { | |
NSString *zhua = @"zhuā"; | |
NSString *tong = @"tóng"; | |
NSString *bie = @"biě"; | |
NSString *yi = @"yì"; | |
int zhuaPosition = [self detectDiacriticsPosition:zhua]; // 输出1 | |
NSLog(@"%@%d", zhua, zhuaPosition); | |
int tongPosition = [self detectDiacriticsPosition:tong]; // 输出2 | |
NSLog(@"%@%d", tong, tongPosition); | |
int biePosition = [self detectDiacriticsPosition:bie]; // 输出3 | |
NSLog(@"%@%d", bie, biePosition); | |
int yiPosition = [self detectDiacriticsPosition:yi]; // 输出4 | |
NSLog(@"%@%d", yi, yiPosition); | |
NSString *zhongGuo = @"中国"; | |
NSString *tiananMen = @"天安门"; | |
NSString *ziJinCheng = @"紫禁城"; | |
NSString *zhongGuoPinyin = [self transferMandarinToPinyin:zhongGuo]; | |
NSLog(@"%@=%@", zhongGuo, zhongGuoPinyin); // 输出 中国=zhōng guó | |
NSString *tiananMenPinyin = [self transferMandarinToPinyin:tiananMen]; | |
NSLog(@"%@=%@", tiananMen, tiananMenPinyin); // 输出 天安门=tiān ān mén | |
NSString *ziJinChengPinyin = [self transferMandarinToPinyin:ziJinCheng]; | |
NSLog(@"%@=%@", ziJinCheng, ziJinChengPinyin);// 输出 紫禁城=zǐ jìn chéng | |
NSLog(@"%@", [self stripPinyinDiacritics:zhongGuoPinyin]); // 输出 zhōng guó = zhong guo | |
NSLog(@"%@", [self stripPinyinDiacritics:tiananMenPinyin]); // 输出 tiān ān mén = tian an men | |
NSLog(@"%@", [self stripPinyinDiacritics:ziJinChengPinyin]);// 输出 zǐ jìn chéng = zi jin cheng | |
} | |
/** | |
* 将汉字转换为拼音,且带上注音(变音)符号 | |
* 遇到多音字时,默认会返回第一个发音,(默认排序未知) | |
* 比如:中国 = zhōng guó | |
天安门 = tiān ān mén | |
紫禁城 = zǐ jìn chéng | |
**/ | |
+ (NSString *)transferMandarinToPinyin:(NSString *)originalStr { | |
CFStringRef originalStrRef = (__bridge_retained CFStringRef)originalStr; | |
CFMutableStringRef string = CFStringCreateMutableCopy(NULL, 0, originalStrRef); | |
CFStringTransform(string, NULL, kCFStringTransformMandarinLatin, false); | |
return (__bridge_transfer NSString *)string; | |
} | |
/** | |
* 将拼音带上注音(变音)符号的去掉 | |
* 比如: zhōng guó = zhong guo | |
tiān ān mén = tian an men | |
zǐ jìn chéng = zi jin cheng | |
**/ | |
+ (NSString *)stripPinyinDiacritics:(NSString *)originalStr { | |
CFStringRef originalStrRef = (__bridge_retained CFStringRef)originalStr; | |
CFMutableStringRef string = CFStringCreateMutableCopy(NULL, 0, originalStrRef); | |
CFStringTransform(string, NULL, kCFStringTransformStripDiacritics, false); | |
return (__bridge_transfer NSString *)string; | |
} | |
/** | |
* 检测声调位置 | |
* 例如:sūn, 对于 ū 就返回1 | |
céng,对于 é 就返回2 | |
biě, 对于 ě 就返回3 | |
zhàn,对于 à 就返回4 | |
**/ | |
+ (int)detectDiacriticsPosition:(NSString *)originalStr { | |
// 所有的拼音列表:https://www.wenku1.com/news/40F4D52AD33EF075.html | |
// 这些字母都是没有变音符号的 b, c, d, f, h, j, k, l, m, n, p, q, r, s, t, v, w, x, y, z | |
NSArray<NSArray<NSString *> *> *pinyinDiacritics = @[ | |
@[ @"ā", @"á", @"ǎ", @"à" ], | |
@[ @"ē", @"é", @"ě", @"è" ], | |
@[ @"ī", @"í", @"ǐ", @"ì" ], | |
@[ @"ō", @"ó", @"ǒ", @"ò" ], | |
@[ @"ū", @"ú", @"ǔ", @"ù" ] | |
]; | |
BOOL isBreaking = NO; | |
int foundPosition = 0; | |
for (int i = 0; i < originalStr.length; i++) { | |
NSString *letter = [originalStr substringWithRange:NSMakeRange(i, 1)]; | |
for (int j = 0; j < pinyinDiacritics.count; j++) { | |
foundPosition = 0; | |
for (int k = 0; k < pinyinDiacritics[j].count; k++) { | |
NSString *toneLetter = pinyinDiacritics[j][k]; | |
if ([letter isEqualToString:toneLetter]) { | |
foundPosition = k + 1; | |
isBreaking = YES; | |
break; | |
} | |
} | |
if (isBreaking) { | |
break; | |
} | |
} | |
if (isBreaking) { | |
break; | |
} | |
} | |
return foundPosition; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment