Last active
October 28, 2021 06:12
-
-
Save imjasonh/5100f11db40671733a376b230418c484 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2016 Google Inc. All rights reserved. | |
// Use of this source code is governed by the Apache 2.0 | |
// license that can be found in the LICENSE file. | |
// Command caption reads an audio file and outputs the transcript for it. | |
package main | |
import ( | |
"io" | |
"log" | |
"os" | |
"golang.org/x/net/context" | |
"google.golang.org/api/option" | |
"google.golang.org/api/transport" | |
speech "google.golang.org/genproto/googleapis/cloud/speech/v1beta1" | |
) | |
func main() { | |
c, err := NewCaption() | |
if err != nil { | |
log.Fatalf("NewCaption: %v", err) | |
} | |
// Pipe stdin to the captioner. | |
go io.Copy(c, os.Stdin) | |
// Read results from the captioner. | |
for t, ok := range c.Chan() { | |
if !ok { | |
log.Println("all done") | |
break | |
} | |
log.Println(t) | |
} | |
} | |
// Captioner is a Writer that accepts audio data and provides a channel to | |
// receive transcribed text. | |
type Captioner interface { | |
io.WriteCloser | |
Chan() <-chan string | |
} | |
type captioner struct { | |
pw io.PipeWriter | |
ch chan string | |
} | |
func (c *captioner) Chan() <-chan string { | |
return c.ch | |
} | |
// NewCaptioner returns a new Captioner ready to receive audio data. | |
func NewCaptioner() (Captioner, error) { | |
ctx := context.Background() | |
conn, err := transport.DialGRPC(ctx, | |
option.WithEndpoint("speech.googleapis.com:443"), | |
option.WithScopes("https://www.googleapis.com/auth/cloud-platform"), | |
) | |
if err != nil { | |
return nil, err | |
} | |
defer conn.Close() | |
stream, err := speech.NewSpeechClient(conn).StreamingRecognize(ctx) | |
if err != nil { | |
return nil, err | |
} | |
// send the initial configuration message. | |
if err := stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_StreamingConfig{ | |
StreamingConfig: &speech.StreamingRecognitionConfig{ | |
Config: &speech.RecognitionConfig{ | |
Encoding: speech.RecognitionConfig_LINEAR16, | |
SampleRate: 16000, | |
}, | |
}, | |
}, | |
}); err != nil { | |
return nil, err | |
} | |
pw, pr := io.Pipe() | |
c := &captioner{pw, make(chan string)} | |
go func() { | |
for { | |
n, err := c.pr.Read() | |
if err == io.EOF { | |
return // nothing else to pipe, kill this goroutine | |
} | |
if err != nil { | |
log.Printf("reading buf error: %v", err) | |
continue | |
} | |
if err = stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_AudioContent{ | |
AudioContent: buf.Bytes()[:n], | |
}, | |
}); err != nil { | |
log.Printf("sending audio error: %v", err) | |
} | |
} | |
}() | |
go func() { | |
for { | |
resp, err := stream.Recv() | |
if err == io.EOF { | |
close(c.ch) // close the chan | |
break // no more results, kill this goroutine | |
} | |
if err != nil { | |
// TODO: handle error | |
continue | |
} | |
if resp.Error != nil { | |
// TODO: handle error | |
continue | |
} | |
for _, result := range resp.Results { | |
c.ch <- result.Alternatives[0].Transcript | |
} | |
} | |
}() | |
return c, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment