Last active
November 29, 2018 16:32
-
-
Save alexruperez/65cc322a7d318266223cbebaa2a9033c to your computer and use it in GitHub Desktop.
Create your own Siri in Swift | Lil ‘Bits | https://www.youtube.com/watch?v=Sigl3dihEB8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import UIKit | |
import Speech | |
class SiriViewController: UIViewController { | |
private static let locale = Locale(identifier: "es-ES") | |
private let speechRecognizer = SFSpeechRecognizer(locale: SiriViewController.locale)! | |
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? | |
private var recognitionTask: SFSpeechRecognitionTask? | |
private let audioEngine = AVAudioEngine() | |
@IBOutlet var label : UILabel! | |
@IBOutlet var recordButton : UIButton! | |
override public func viewWillAppear(_ animated: Bool) { | |
super.viewWillAppear(animated) | |
SFSpeechRecognizer.requestAuthorization { status in | |
if status == .authorized { | |
OperationQueue.main.addOperation { | |
self.recordButtonTapped() | |
} | |
} | |
} | |
} | |
private func startRecording() throws { | |
if let recognitionTask = recognitionTask { | |
recognitionTask.cancel() | |
self.recognitionTask = nil | |
} | |
try setAudioSessionCategory(AVAudioSessionCategoryRecord) | |
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() | |
guard let recognitionRequest = recognitionRequest else { | |
return | |
} | |
recognitionRequest.shouldReportPartialResults = true | |
recognitionRequest.taskHint = .dictation | |
let inputNode = audioEngine.inputNode | |
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in | |
var isFinal = false | |
var transcription = "" | |
if let result = result { | |
isFinal = result.isFinal | |
transcription = result.bestTranscription.formattedString | |
self.label.attributedText = self.tag(transcription) | |
} | |
if error != nil || isFinal { | |
self.audioEngine.stop() | |
inputNode.removeTap(onBus: 0) | |
self.recognitionRequest = nil | |
self.recognitionTask = nil | |
try? self.read(transcription) | |
self.recordButton.setTitle("¡Oye Siri!", for: []) | |
} | |
} | |
let recordingFormat = inputNode.outputFormat(forBus: 0) | |
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in | |
self.recognitionRequest?.append(buffer) | |
} | |
audioEngine.prepare() | |
try audioEngine.start() | |
label.text = "Cuéntame..." | |
} | |
func setAudioSessionCategory(_ category: String) throws { | |
let audioSession = AVAudioSession.sharedInstance() | |
try audioSession.setCategory(category) | |
try audioSession.setMode(AVAudioSessionModeMeasurement) | |
try audioSession.setActive(true, with: .notifyOthersOnDeactivation) | |
} | |
@IBAction func recordButtonTapped() { | |
if audioEngine.isRunning { | |
audioEngine.stop() | |
recognitionRequest?.endAudio() | |
} else { | |
try? startRecording() | |
recordButton.setTitle("¿Que he dicho?", for: []) | |
} | |
} | |
func tag(_ text: String) -> NSAttributedString { | |
let tagger = NSLinguisticTagger(tagSchemes: [.lemma, .nameTypeOrLexicalClass], options: 0) | |
tagger.string = text | |
let range = NSRange(location: 0, length: text.utf16.count) | |
var words = [String]() | |
var bagOfWords = [String: CGFloat]() | |
var lemmas = [String: String]() | |
tagger.enumerateTags(in: range, unit: .word, scheme: .lemma, options: []) { tag, tokenRange, _ in | |
let word = (text as NSString).substring(with: tokenRange) | |
words.append(word) | |
lemmas[word] = tag?.rawValue | |
if bagOfWords[word] != nil { | |
bagOfWords[word]! += 1 | |
} else { | |
bagOfWords[word] = 1 | |
} | |
} | |
var colors = [String: UIColor]() | |
tagger.enumerateTags(in: range, unit: .word, scheme: .nameTypeOrLexicalClass, options: []) { tag, tokenRange, _ in | |
let word = (text as NSString).substring(with: tokenRange) | |
colors[word] = tag?.colorValue | |
} | |
return compose(words, bagOfWords: bagOfWords, lemmas: lemmas, colors: colors) | |
} | |
private func compose(_ words: [String], bagOfWords: [String: CGFloat], lemmas: [String: String], colors: [String: UIColor]) -> NSAttributedString { | |
let result = NSMutableAttributedString() | |
words.forEach { word in | |
let fontSize = 15 + (bagOfWords[word] ?? 0) | |
let attributedWord = NSAttributedString(string: lemmas[word] ?? word, | |
attributes: [.foregroundColor : colors[word] ?? .black, | |
.font: UIFont.systemFont(ofSize: fontSize)]) | |
result.append(attributedWord) | |
} | |
return result | |
} | |
func read(_ text: String) throws { | |
try setAudioSessionCategory(AVAudioSessionCategoryPlayback) | |
let speechUtterance = AVSpeechUtterance(string: text) | |
speechUtterance.voice = AVSpeechSynthesisVoice(language: SiriViewController.locale.identifier) | |
let speechSynthesizer = AVSpeechSynthesizer() | |
speechSynthesizer.speak(speechUtterance) | |
} | |
} | |
extension NSLinguisticTag { | |
public var colorValue: UIColor { | |
switch self { | |
case .noun: return .red | |
case .verb: return .green | |
case .adjective: return .blue | |
case .adverb: return .cyan | |
case .pronoun: return .yellow | |
case .determiner: return .magenta | |
case .particle: return .gray | |
case .preposition: return .darkGray | |
case .number: return .lightGray | |
case .personalName: return .orange | |
case .placeName: return .purple | |
case .organizationName: return .brown | |
default: return .black | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment