Last active
May 4, 2024 11:22
-
-
Save mattt/36357b7ffe9370d6fc573088aed0d1f5 to your computer and use it in GitHub Desktop.
NaturalLanguage Framework - NLTagger Language Support Matrix
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import NaturalLanguage | |
| let languages: [NLLanguage] = [ | |
| .amharic, .arabic, .armenian, .bengali, .bulgarian, | |
| .burmese, .catalan, .cherokee, .croatian, .czech, | |
| .danish, .dutch, .english, .finnish, .french, | |
| .georgian, .german, .greek, .gujarati, .hebrew, | |
| .hindi, .hungarian, .icelandic, .indonesian, .italian, | |
| .japanese, .kannada, .khmer, .korean, .lao, | |
| .malay, .malayalam, .marathi, .mongolian, .norwegian, | |
| .oriya, .persian, .polish, .portuguese, .punjabi, | |
| .romanian, .russian, .simplifiedChinese, .sinhalese, .slovak, | |
| .spanish, .swedish, .tamil, .telugu, .thai, | |
| .tibetan, .traditionalChinese, .turkish, .ukrainian, .urdu, | |
| .vietnamese | |
| ].sorted { $0.rawValue < $1.rawValue } | |
| let tagSchemes: [NLTagScheme] = [ | |
| .language, .script, .tokenType, .lexicalClass, .nameType, | |
| .nameTypeOrLexicalClass, .lemma, .sentimentScore | |
| ] | |
| let columns = ["bcp47"] + tagSchemes.map { $0.rawValue } | |
| print(columns.joined(separator: ",")) | |
| for language in languages { | |
| let units: Set<NLTokenUnit> = [.document, .paragraph, .sentence, .word] | |
| let availableTagSchemes: Set<NLTagScheme> = Set(units.flatMap { unit in | |
| NLTagger.availableTagSchemes(for: unit, language: language) | |
| }) | |
| let row = [language.rawValue] + tagSchemes.map { availableTagSchemes.contains($0) ? "true" : "false" } | |
| print(row.joined(separator: ",")) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bcp47 | Language | Script | TokenType | LexicalClass | NameType | NameTypeOrLexicalClass | Lemma | Sentiment | |
|---|---|---|---|---|---|---|---|---|---|
| am | true | true | true | false | false | false | false | false | |
| ar | true | true | true | false | false | false | false | false | |
| bg | true | true | true | false | false | false | false | false | |
| bn | true | true | true | false | false | false | false | false | |
| bo | true | true | true | false | false | false | false | false | |
| ca | true | true | true | false | false | false | false | false | |
| chr | true | true | true | false | false | false | false | false | |
| cs | true | true | true | false | false | false | false | false | |
| da | true | true | true | false | false | false | false | false | |
| de | true | true | true | true | true | true | true | true | |
| el | true | true | true | false | false | false | false | false | |
| en | true | true | true | true | true | true | true | true | |
| es | true | true | true | true | true | true | true | true | |
| fa | true | true | true | false | false | false | false | false | |
| fi | true | true | true | false | false | false | false | false | |
| fr | true | true | true | true | true | true | true | true | |
| gu | true | true | true | false | false | false | false | false | |
| he | true | true | true | false | false | false | false | false | |
| hi | true | true | true | false | false | false | false | false | |
| hr | true | true | true | false | false | false | false | false | |
| hu | true | true | true | false | false | false | false | false | |
| hy | true | true | true | false | false | false | false | false | |
| id | true | true | true | false | false | false | false | false | |
| is | true | true | true | false | false | false | false | false | |
| it | true | true | true | true | true | true | true | true | |
| ja | true | true | true | false | false | false | false | false | |
| ka | true | true | true | false | false | false | false | false | |
| km | true | true | true | false | false | false | false | false | |
| kn | true | true | true | false | false | false | false | false | |
| ko | true | true | true | false | false | false | false | false | |
| lo | true | true | true | false | false | false | false | false | |
| ml | true | true | true | false | false | false | false | false | |
| mn-Mong | true | true | true | false | false | false | false | false | |
| mr | true | true | true | false | false | false | false | false | |
| ms | true | true | true | false | false | false | false | false | |
| my | true | true | true | false | false | false | false | false | |
| nb | true | true | true | false | false | false | false | false | |
| nl | true | true | true | false | false | false | false | false | |
| or | true | true | true | false | false | false | false | false | |
| pa-Guru | true | true | true | false | false | false | false | false | |
| pl | true | true | true | false | false | false | false | false | |
| pt | true | true | true | true | true | true | true | true | |
| ro | true | true | true | false | false | false | false | false | |
| ru | true | true | true | true | true | true | true | false | |
| si | true | true | true | false | false | false | false | false | |
| sk | true | true | true | false | false | false | false | false | |
| sv | true | true | true | false | false | false | false | false | |
| ta | true | true | true | false | false | false | false | false | |
| te | true | true | true | false | false | false | false | false | |
| th | true | true | true | false | false | false | false | false | |
| tr | true | true | true | true | true | true | true | false | |
| uk | true | true | true | false | false | false | false | false | |
| ur | true | true | true | false | false | false | false | false | |
| vi | true | true | true | false | false | false | false | false | |
| zh-Hans | true | true | true | false | false | false | false | false | |
| zh-Hant | true | true | true | false | false | false | false | false |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| am | true | true | true | false | false | false | false | false | |
|---|---|---|---|---|---|---|---|---|---|
| ar | true | true | true | false | false | false | false | false | |
| bg | true | true | true | false | false | false | false | false | |
| bn | true | true | true | false | false | false | false | false | |
| bo | true | true | true | false | false | false | false | false | |
| ca | true | true | true | false | false | false | false | false | |
| chr | true | true | true | false | false | false | false | false | |
| cs | true | true | true | false | false | false | false | false | |
| da | true | true | true | false | false | false | false | false | |
| de | true | true | true | true | true | true | true | true | |
| el | true | true | true | false | false | false | false | false | |
| en | true | true | true | true | true | true | true | true | |
| es | true | true | true | true | true | true | true | true | |
| fa | true | true | true | false | false | false | false | false | |
| fi | true | true | true | false | false | false | false | false | |
| fr | true | true | true | true | true | true | true | true | |
| gu | true | true | true | false | false | false | false | false | |
| he | true | true | true | false | false | false | false | false | |
| hi | true | true | true | false | false | false | false | false | |
| hr | true | true | true | false | false | false | false | false | |
| hu | true | true | true | false | false | false | false | false | |
| hy | true | true | true | false | false | false | false | false | |
| id | true | true | true | false | false | false | false | false | |
| is | true | true | true | false | false | false | false | false | |
| it | true | true | true | true | true | true | true | true | |
| ja | true | true | true | false | false | false | false | false | |
| ka | true | true | true | false | false | false | false | false | |
| km | true | true | true | false | false | false | false | false | |
| kn | true | true | true | false | false | false | false | false | |
| ko | true | true | true | false | false | false | false | false | |
| lo | true | true | true | false | false | false | false | false | |
| ml | true | true | true | false | false | false | false | false | |
| mn-Mong | true | true | true | false | false | false | false | false | |
| mr | true | true | true | false | false | false | false | false | |
| ms | true | true | true | false | false | false | false | false | |
| my | true | true | true | false | false | false | false | false | |
| nb | true | true | true | false | false | false | false | false | |
| nl | true | true | true | false | false | false | false | false | |
| or | true | true | true | false | false | false | false | false | |
| pa-Guru | true | true | true | false | false | false | false | false | |
| pl | true | true | true | false | false | false | false | false | |
| pt | true | true | true | true | true | true | true | true | |
| ro | true | true | true | false | false | false | false | false | |
| ru | true | true | true | true | true | true | true | false | |
| si | true | true | true | false | false | false | false | false | |
| sk | true | true | true | false | false | false | false | false | |
| sv | true | true | true | false | false | false | false | false | |
| ta | true | true | true | false | false | false | false | false | |
| te | true | true | true | false | false | false | false | false | |
| th | true | true | true | false | false | false | false | false | |
| tr | true | true | true | true | true | true | true | false | |
| uk | true | true | true | false | false | false | false | false | |
| ur | true | true | true | false | false | false | false | false | |
| vi | true | true | true | false | false | false | false | false | |
| zh-Hans | true | true | true | false | false | false | false | false | |
| zh-Hant | true | true | true | false | false | false | false | false |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bcp47 | Language | Script | TokenType | LexicalClass | NameType | NameTypeOrLexicalClass | Lemma | Sentiment | |
|---|---|---|---|---|---|---|---|---|---|
| am | true | true | true | false | false | false | false | false | |
| ar | true | true | true | false | false | false | false | false | |
| bg | true | true | true | false | false | false | false | false | |
| bn | true | true | true | false | false | false | false | false | |
| bo | true | true | true | false | false | false | false | false | |
| ca | true | true | true | false | false | false | false | false | |
| chr | true | true | true | false | false | false | false | false | |
| cs | true | true | true | false | false | false | false | false | |
| da | true | true | true | false | false | false | false | false | |
| de | true | true | true | false | false | false | false | false | |
| el | true | true | true | false | false | false | false | false | |
| en | true | true | true | true | true | true | false | false | |
| es | true | true | true | false | false | false | false | false | |
| fa | true | true | true | false | false | false | false | false | |
| fi | true | true | true | false | false | false | false | false | |
| fr | true | true | true | false | false | false | false | false | |
| gu | true | true | true | false | false | false | false | false | |
| he | true | true | true | false | false | false | false | false | |
| hi | true | true | true | false | false | false | false | false | |
| hr | true | true | true | false | false | false | false | false | |
| hu | true | true | true | false | false | false | false | false | |
| hy | true | true | true | false | false | false | false | false | |
| id | true | true | true | false | false | false | false | false | |
| is | true | true | true | false | false | false | false | false | |
| it | true | true | true | false | false | false | false | false | |
| ja | true | true | true | false | false | false | false | false | |
| ka | true | true | true | false | false | false | false | false | |
| km | true | true | true | false | false | false | false | false | |
| kn | true | true | true | false | false | false | false | false | |
| ko | true | true | true | false | false | false | false | false | |
| lo | true | true | true | false | false | false | false | false | |
| ml | true | true | true | false | false | false | false | false | |
| mn-Mong | true | true | true | false | false | false | false | false | |
| mr | true | true | true | false | false | false | false | false | |
| ms | true | true | true | false | false | false | false | false | |
| my | true | true | true | false | false | false | false | false | |
| nb | true | true | true | false | false | false | false | false | |
| nl | true | true | true | false | false | false | false | false | |
| or | true | true | true | false | false | false | false | false | |
| pa-Guru | true | true | true | false | false | false | false | false | |
| pl | true | true | true | false | false | false | false | false | |
| pt | true | true | true | false | false | false | false | false | |
| ro | true | true | true | false | false | false | false | false | |
| ru | true | true | true | false | false | false | false | false | |
| si | true | true | true | false | false | false | false | false | |
| sk | true | true | true | false | false | false | false | false | |
| sv | true | true | true | false | false | false | false | false | |
| ta | true | true | true | false | false | false | false | false | |
| te | true | true | true | false | false | false | false | false | |
| th | true | true | true | false | false | false | false | false | |
| tr | true | true | true | false | false | false | false | false | |
| uk | true | true | true | false | false | false | false | false | |
| ur | true | true | true | false | false | false | false | false | |
| vi | true | true | true | false | false | false | false | false | |
| zh-Hans | true | true | true | false | false | false | false | false | |
| zh-Hant | true | true | true | false | false | false | false | false |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment