// Copyright 2016 Google Inc. All rights reserved. // Use of this source code is governed by the Apache 2.0 // license that can be found in the LICENSE file. // Command caption sends audio data to the Google Speech API // and prints its transcript. package main import ( "context" "flag" "fmt" "io" "io/ioutil" "log" "os" "strings" speech "cloud.google.com/go/speech/apiv1" speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1" ) const usage = `Usage: caption Audio file must be a 16-bit signed little-endian encoded with a sample rate of 16000. The path to the audio file may be a GCS URI (gs://...). ` func main() { flag.Parse() if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, usage) os.Exit(2) } var runFunc func(io.Writer, string) error path := os.Args[1] if strings.Contains(path, "://") { runFunc = recognizeGCS } else { runFunc = recognize } // Perform the request. if err := runFunc(os.Stdout, os.Args[1]); err != nil { log.Fatal(err) } } // [START speech_transcribe_sync_gcs] func recognizeGCS(w io.Writer, gcsURI string) error { ctx := context.Background() client, err := speech.NewClient(ctx) if err != nil { return err } // Send the request with the URI (gs://...) // and sample rate information to be transcripted. resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{ Config: &speechpb.RecognitionConfig{ Encoding: speechpb.RecognitionConfig_LINEAR16, SampleRateHertz: 16000, LanguageCode: "en-US", }, Audio: &speechpb.RecognitionAudio{ AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI}, }, }) // Print the results. for _, result := range resp.Results { for _, alt := range result.Alternatives { fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence) } } return nil } // [END speech_transcribe_sync_gcs] // [START speech_transcribe_sync] func recognize(w io.Writer, file string) error { ctx := context.Background() client, err := speech.NewClient(ctx) if err != nil { return err } data, err := ioutil.ReadFile(file) if err != nil { return err } // Send the contents of the audio file with the encoding and // and sample rate information to be transcripted. resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{ Config: &speechpb.RecognitionConfig{ Encoding: speechpb.RecognitionConfig_LINEAR16, SampleRateHertz: 16000, LanguageCode: "en-US", }, Audio: &speechpb.RecognitionAudio{ AudioSource: &speechpb.RecognitionAudio_Content{Content: data}, }, }) // Print the results. for _, result := range resp.Results { for _, alt := range result.Alternatives { fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence) } } return nil } // [END speech_transcribe_sync]