Last active
August 3, 2021 11:36
-
-
Save FWEugene/a5624840d6f77dbad8f05ef21de9b3a5 to your computer and use it in GitHub Desktop.
Detecting Language
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The result is not guaranteed to be accurate. Typically, the function requires 200-400 characters to reliably guess the language of a string. | |
// Reference: [CFStringTokenizerCopyBestStringLanguage(_:_:)](https://developer.apple.com/reference/corefoundation/1542136-cfstringtokenizercopybeststringl) | |
// | |
import Foundation | |
extension String { | |
func guessLanguage() -> String { | |
let length = self.utf16.count | |
let languageCode = CFStringTokenizerCopyBestStringLanguage(self as CFString, CFRange(location: 0, length: length)) as String? ?? "" | |
let locale = Locale(identifier: languageCode) | |
return locale.localizedString(forLanguageCode: languageCode) ?? "Unknown" | |
} | |
} | |
let names = ["明日香", "碇真嗣", "綾波零", "拓跋玉兒", "陳靖仇", "于小雪"] | |
for name in names { | |
print("\(name) - \(name.guessLanguage())") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment