forked from GoogleCloudPlatform/golang-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcaption.go
124 lines (101 loc) · 2.75 KB
/
caption.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright 2016 Google Inc. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
// Command caption sends audio data to the Google Speech API
// and prints its transcript.
package main
import (
"context"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
speech "cloud.google.com/go/speech/apiv1"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
const usage = `Usage: caption <audiofile>
Audio file must be a 16-bit signed little-endian encoded
with a sample rate of 16000.
The path to the audio file may be a GCS URI (gs://...).
`
func main() {
flag.Parse()
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, usage)
os.Exit(2)
}
var runFunc func(io.Writer, string) error
path := os.Args[1]
if strings.Contains(path, "://") {
runFunc = recognizeGCS
} else {
runFunc = recognize
}
// Perform the request.
if err := runFunc(os.Stdout, os.Args[1]); err != nil {
log.Fatal(err)
}
}
// [START speech_transcribe_sync_gcs]
func recognizeGCS(w io.Writer, gcsURI string) error {
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
return err
}
// Send the request with the URI (gs://...)
// and sample rate information to be transcripted.
resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI},
},
})
// Print the results.
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
}
}
return nil
}
// [END speech_transcribe_sync_gcs]
// [START speech_transcribe_sync]
func recognize(w io.Writer, file string) error {
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
return err
}
data, err := ioutil.ReadFile(file)
if err != nil {
return err
}
// Send the contents of the audio file with the encoding and
// and sample rate information to be transcripted.
resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
},
})
// Print the results.
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
}
}
return nil
}
// [END speech_transcribe_sync]