Last active
May 14, 2018 09:02
-
-
Save alexruperez/a259a552c22fbe99d4f817fb945638a0 to your computer and use it in GitHub Desktop.
Create a StoryTeller in Swift with Firebase ML Kit Text Recognition OCR | Lil ‘Bits | https://www.youtube.com/watch?v=kpzyFZYI1PQ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import UIKit | |
import AVFoundation | |
import Firebase | |
class StoryTellerViewController: UIViewController { | |
private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) | |
private lazy var captureSession: AVCaptureSession = { | |
let captureSession = AVCaptureSession() | |
guard let captureDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back), | |
let input = try? AVCaptureDeviceInput(device: captureDevice) else { | |
return captureSession | |
} | |
captureSession.addInput(input) | |
let output = AVCaptureVideoDataOutput() | |
output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "SampleBuffer")) | |
captureSession.addOutput(output) | |
return captureSession | |
}() | |
let speechSynthesizer = AVSpeechSynthesizer() | |
lazy var vision = Vision.vision() | |
var textDetector: VisionTextDetector? | |
private var textsToRead = [String]() | |
override func viewDidLoad() { | |
super.viewDidLoad() | |
speechSynthesizer.delegate = self | |
view.layer.addSublayer(previewLayer) | |
try? setPlaybackAudioSession() | |
} | |
override func viewWillAppear(_ animated: Bool) { | |
super.viewWillAppear(animated) | |
previewLayer.frame = view.frame | |
checkCameraAccess() | |
} | |
func checkCameraAccess() { | |
switch AVCaptureDevice.authorizationStatus(for: .video) { | |
case .authorized: captureSession.startRunning() | |
default: requestCameraAccess() | |
} | |
} | |
func requestCameraAccess() { | |
AVCaptureDevice.requestAccess(for: .video) { _ in | |
self.checkCameraAccess() | |
} | |
} | |
func setPlaybackAudioSession() throws { | |
let audioSession = AVAudioSession.sharedInstance() | |
try audioSession.setCategory(AVAudioSessionCategoryPlayback) | |
try audioSession.setMode(AVAudioSessionModeMeasurement) | |
try audioSession.setActive(true, with: .notifyOthersOnDeactivation) | |
} | |
func read(_ texts: [String]) { | |
textsToRead.append(contentsOf: texts) | |
read(textsToRead.removeFirst()) | |
} | |
func read(_ text: String) { | |
let speechUtterance = AVSpeechUtterance(string: text) | |
let dominantLanguage = NSLinguisticTagger.dominantLanguage(for: text) | |
speechUtterance.voice = AVSpeechSynthesisVoice(language: dominantLanguage) | |
speechSynthesizer.speak(speechUtterance) | |
} | |
} | |
extension StoryTellerViewController: AVSpeechSynthesizerDelegate { | |
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) { | |
if textsToRead.isEmpty { | |
textDetector = nil | |
captureSession.startRunning() | |
} else { | |
read(textsToRead.removeFirst()) | |
} | |
} | |
} | |
extension StoryTellerViewController: AVCaptureVideoDataOutputSampleBufferDelegate { | |
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { | |
if textDetector == nil { | |
let metadata = VisionImageMetadata() | |
metadata.orientation = .rightTop | |
let visionImage = VisionImage(buffer: sampleBuffer) | |
visionImage.metadata = metadata | |
textDetector = vision.textDetector() | |
textDetector?.detect(in: visionImage, completion: handleDetect) | |
} | |
} | |
func handleDetect(features: [VisionText]?, error: Error?) { | |
guard error == nil, let features = features, !features.isEmpty else { | |
textDetector = nil | |
return | |
} | |
captureSession.stopRunning() | |
read(features.compactMap { $0.text }) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment