Skip to content

Instantly share code, notes, and snippets.

@VictorZhang2014
Last active April 5, 2018 09:09
Show Gist options
  • Save VictorZhang2014/ff1d5d406b672aa1fe0cad55ee9de7eb to your computer and use it in GitHub Desktop.
Save VictorZhang2014/ff1d5d406b672aa1fe0cad55ee9de7eb to your computer and use it in GitHub Desktop.
PinyinUtil for finding the diacritic's position, removing diacritics in a pinyin string or transforming Mandarine to pinyin on iOS platform written by Objective-C
+ (void)test {
NSString *zhua = @"zhuā";
NSString *tong = @"tóng";
NSString *bie = @"biě";
NSString *yi = @"yì";
int zhuaPosition = [self detectDiacriticsPosition:zhua]; // 输出1
NSLog(@"%@%d", zhua, zhuaPosition);
int tongPosition = [self detectDiacriticsPosition:tong]; // 输出2
NSLog(@"%@%d", tong, tongPosition);
int biePosition = [self detectDiacriticsPosition:bie]; // 输出3
NSLog(@"%@%d", bie, biePosition);
int yiPosition = [self detectDiacriticsPosition:yi]; // 输出4
NSLog(@"%@%d", yi, yiPosition);
NSString *zhongGuo = @"中国";
NSString *tiananMen = @"天安门";
NSString *ziJinCheng = @"紫禁城";
NSString *zhongGuoPinyin = [self transferMandarinToPinyin:zhongGuo];
NSLog(@"%@=%@", zhongGuo, zhongGuoPinyin); // 输出 中国=zhōng guó
NSString *tiananMenPinyin = [self transferMandarinToPinyin:tiananMen];
NSLog(@"%@=%@", tiananMen, tiananMenPinyin); // 输出 天安门=tiān ān mén
NSString *ziJinChengPinyin = [self transferMandarinToPinyin:ziJinCheng];
NSLog(@"%@=%@", ziJinCheng, ziJinChengPinyin);// 输出 紫禁城=zǐ jìn chéng
NSLog(@"%@", [self stripPinyinDiacritics:zhongGuoPinyin]); // 输出 zhōng guó = zhong guo
NSLog(@"%@", [self stripPinyinDiacritics:tiananMenPinyin]); // 输出 tiān ān mén = tian an men
NSLog(@"%@", [self stripPinyinDiacritics:ziJinChengPinyin]);// 输出 zǐ jìn chéng = zi jin cheng
}
/**
* 将汉字转换为拼音,且带上注音(变音)符号
* 遇到多音字时,默认会返回第一个发音,(默认排序未知)
* 比如:中国 = zhōng guó
天安门 = tiān ān mén
紫禁城 = zǐ jìn chéng
**/
+ (NSString *)transferMandarinToPinyin:(NSString *)originalStr {
CFStringRef originalStrRef = (__bridge_retained CFStringRef)originalStr;
CFMutableStringRef string = CFStringCreateMutableCopy(NULL, 0, originalStrRef);
CFStringTransform(string, NULL, kCFStringTransformMandarinLatin, false);
return (__bridge_transfer NSString *)string;
}
/**
* 将拼音带上注音(变音)符号的去掉
* 比如: zhōng guó = zhong guo
tiān ān mén = tian an men
zǐ jìn chéng = zi jin cheng
**/
+ (NSString *)stripPinyinDiacritics:(NSString *)originalStr {
CFStringRef originalStrRef = (__bridge_retained CFStringRef)originalStr;
CFMutableStringRef string = CFStringCreateMutableCopy(NULL, 0, originalStrRef);
CFStringTransform(string, NULL, kCFStringTransformStripDiacritics, false);
return (__bridge_transfer NSString *)string;
}
/**
* 检测声调位置
* 例如:sūn, 对于 ū 就返回1
céng,对于 é 就返回2
biě, 对于 ě 就返回3
zhàn,对于 à 就返回4
**/
+ (int)detectDiacriticsPosition:(NSString *)originalStr {
// 所有的拼音列表:https://www.wenku1.com/news/40F4D52AD33EF075.html
// 这些字母都是没有变音符号的 b, c, d, f, h, j, k, l, m, n, p, q, r, s, t, v, w, x, y, z
NSArray<NSArray<NSString *> *> *pinyinDiacritics = @[
@[ @"ā", @"á", @"ǎ", @"à" ],
@[ @"ē", @"é", @"ě", @"è" ],
@[ @"ī", @"í", @"ǐ", @"ì" ],
@[ @"ō", @"ó", @"ǒ", @"ò" ],
@[ @"ū", @"ú", @"ǔ", @"ù" ]
];
BOOL isBreaking = NO;
int foundPosition = 0;
for (int i = 0; i < originalStr.length; i++) {
NSString *letter = [originalStr substringWithRange:NSMakeRange(i, 1)];
for (int j = 0; j < pinyinDiacritics.count; j++) {
foundPosition = 0;
for (int k = 0; k < pinyinDiacritics[j].count; k++) {
NSString *toneLetter = pinyinDiacritics[j][k];
if ([letter isEqualToString:toneLetter]) {
foundPosition = k + 1;
isBreaking = YES;
break;
}
}
if (isBreaking) {
break;
}
}
if (isBreaking) {
break;
}
}
return foundPosition;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment