Created
June 8, 2017 13:09
-
-
Save gavi/0b5e130f203fb64cd81207394d0b36ee to your computer and use it in GitHub Desktop.
NSLinguisticTagger updates in Foundation for Swift 4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
var str = """ | |
This is some text that needs to be processed. I do not know how fast this runs? | |
日本, | |
Лорем ипсум долор сит амет, перпетуа урбанитас ин про, проприае цонсететур ид сит | |
""" | |
let tagger=NSLinguisticTagger(tagSchemes: [.lemma, .language, .lexicalClass], options:0 ) | |
tagger.string=str | |
let range=NSRange(location:0,length:str.utf16.count) | |
print(tagger.dominantLanguage!) | |
func enumerate(scheme:NSLinguisticTagScheme){ | |
tagger.enumerateTags(in: range, unit: .word, scheme:scheme, options: [.omitPunctuation, .omitWhitespace]) { | |
tag, tokenRange, _ in | |
let token = (str as NSString).substring(with: tokenRange) | |
print("word:\(token.lowercased())") | |
if let tagVal = tag?.rawValue { | |
print("\(scheme.rawValue):\(tagVal.lowercased())") | |
} | |
} | |
} | |
enumerate(scheme: .lexicalClass) | |
enumerate(scheme: .lemma) | |
enumerate(scheme: .language) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment