uhooi · March 17, 2021 12:56
diff --git a/FoundationSample.swift b/FoundationSample.swift
 // ref: https://developer.apple.com/documentation/foundation/nslinguistictagger
 // ref: https://developer.apple.com/documentation/foundation/nslinguistictagger/1410036-enumeratetags
 // ref: https://dev.classmethod.jp/articles/ios10-morphological-analysis-from-speechrecognizer/

 import Foundation

 private func analyzeText(_ text: String, scheme: NSLinguisticTagScheme) {
    let tagger = NSLinguisticTagger(tagSchemes: NSLinguisticTagger.availableTagSchemes(forLanguage: "ja"), options: 0)
    tagger.string = text
    tagger.enumerateTags(
        in: NSRange(location: 0, length: text.count),
        scheme: scheme,
        options: [.omitWhitespace]
    ) { tag, tokenRange, sentenceRange, stop in
        let subString = (text as NSString).substring(with: tokenRange)
        if let tag = tag {
            print("\(subString): \(tag.rawValue)")
        }
    }
 }

 analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType) // 形態素解析
 analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応
diff --git a/FoundationSampleResult.txt b/FoundationSampleResult.txt
 私: Word
 は: Word
 ウ: Word
 ホーイ: Word
 です: Word
 。: Punctuation
 あなた: Word
 は: Word
 誰: Word
 です: Word
 か: Word
 ？: Punctuation
diff --git a/MorphologicalAnalysisSample.swift b/MorphologicalAnalysisSample.swift
 import NaturalLanguage

 private func analyzeText(_ text: String, scheme: NLTagScheme) -> [String] {
    var results: [String] = []
    
    let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
    tagger.string = text
    let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
    tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
        results.append("\(text[tokenRange])")
        return true
    }
    
    return results
 }

 print(analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType))
diff --git a/MorphologicalAnalysisSampleResult.txt b/MorphologicalAnalysisSampleResult.txt
 ["私", "は", "ウ", "ホーイ", "です", "あなた", "は", "誰", "です", "か"]
diff --git a/NaturalLanguageSample.swift b/NaturalLanguageSample.swift
 // ref: https://developer.apple.com/documentation/naturallanguage
 // ref: https://developer.apple.com/documentation/naturallanguage/identifying_parts_of_speech
 // ref: https://developer.apple.com/forums/thread/669890
 // ref: https://dev.classmethod.jp/articles/ios12-natural-language-tokenizing/
 // ref: https://dev.classmethod.jp/articles/ios12-natural-language-identifying-parts-of-speech/

 import NaturalLanguage

 private func analyzeText(_ text: String, scheme: NLTagScheme) {
    let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
    tagger.string = text
    let options: NLTagger.Options = [/* .omitPunctuation, */.omitWhitespace]
    tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
        if let tag = tag {
            print("\(text[tokenRange]): \(tag.rawValue)")
        }
        return true
    }
 }

 analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType) // 形態素解析
 analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応
diff --git a/NaturalLanguageSampleResult.txt b/NaturalLanguageSampleResult.txt
 私: Word
 は: Word
 ウ: Word
 ホーイ: Word
 です: Word
 。: Punctuation
 あなた: Word
 は: Word
 誰: Word
 です: Word
 か: Word
 ？: Punctuation
	// ref: https://developer.apple.com/documentation/foundation/nslinguistictagger
	// ref: https://developer.apple.com/documentation/foundation/nslinguistictagger/1410036-enumeratetags
	// ref: https://dev.classmethod.jp/articles/ios10-morphological-analysis-from-speechrecognizer/

	import Foundation

	private func analyzeText(_ text: String, scheme: NSLinguisticTagScheme) {
	let tagger = NSLinguisticTagger(tagSchemes: NSLinguisticTagger.availableTagSchemes(forLanguage: "ja"), options: 0)
	tagger.string = text
	tagger.enumerateTags(
	in: NSRange(location: 0, length: text.count),
	scheme: scheme,
	options: [.omitWhitespace]
	) { tag, tokenRange, sentenceRange, stop in
	let subString = (text as NSString).substring(with: tokenRange)
	if let tag = tag {
	print("\(subString): \(tag.rawValue)")
	}
	}
	}

	analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType) // 形態素解析
	analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応
	私: Word
	は: Word
	ウ: Word
	ホーイ: Word
	です: Word
	。: Punctuation
	あなた: Word
	は: Word
	誰: Word
	です: Word
	か: Word
	？: Punctuation
	import NaturalLanguage

	private func analyzeText(_ text: String, scheme: NLTagScheme) -> [String] {
	var results: [String] = []

	let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
	tagger.string = text
	let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
	tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
	results.append("\(text[tokenRange])")
	return true
	}

	return results
	}

	print(analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType))
	// ref: https://developer.apple.com/documentation/naturallanguage
	// ref: https://developer.apple.com/documentation/naturallanguage/identifying_parts_of_speech
	// ref: https://developer.apple.com/forums/thread/669890
	// ref: https://dev.classmethod.jp/articles/ios12-natural-language-tokenizing/
	// ref: https://dev.classmethod.jp/articles/ios12-natural-language-identifying-parts-of-speech/

	import NaturalLanguage

	private func analyzeText(_ text: String, scheme: NLTagScheme) {
	let tagger = NLTagger(tagSchemes: NLTagger.availableTagSchemes(for: .word, language: .japanese))
	tagger.string = text
	let options: NLTagger.Options = [/* .omitPunctuation, */.omitWhitespace]
	tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
	if let tag = tag {
	print("\(text[tokenRange]): \(tag.rawValue)")
	}
	return true
	}
	}

	analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .tokenType) // 形態素解析
	analyzeText("私はウホーイです。あなたは誰ですか？", scheme: .lexicalClass) // 品詞分解 // !!!: 日本語非対応