Go App服务器语音响应仅3-4秒

I want to make Speech-to-Text application with Google speech engine and Go lang HTTP server. Everything is working as per expected but one issue I am facing and unable to find where I am doing wrong. Problem is that, when am start speaking Google speech engine response only 3-4 seconds, after that I need to start again or 1 min expire then start. I am beginner to GO-lang And I spend 2 days only on debugging. Please help me out here. I need your help.

Thanks in Advance

Go-Lang source: main.go , recognize package

package main
import(
    "fmt"
    "log"
    "net/http"
    "strings"
    "vrecognize"

    gmux "github.com/gorilla/mux"
    "github.com/gorilla/websocket"
     speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
 var upgrader = websocket.Upgrader{} // use default options
 var voiceRecognize = new(vrecognize.VoiceRecognize)
 var voiceStream vrecognize.VoiceStream

func main() {
   mux := gmux.NewRouter().StrictSlash(true)

   mux.HandleFunc("/echo", echo)
   mux.PathPrefix("/").Handler(http.FileServer(http.Dir("./public")))
   http.ListenAndServe(":8080", mux)
}

func echo(w http.ResponseWriter, r *http.Request) {
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
    log.Print("upgrade:", err)
    return
}
defer conn.Close()

voiceRecognize := voiceRecognize.NewRecongnize(1, 48000, "en_US")
var stream speechpb.Speech_StreamingRecognizeClient

var msgStr string
for {
    mt, message, err := conn.ReadMessage()
    if err != nil {
        log.Println("read:", err)
        return
    }
    // Message received as Text
    if mt == websocket.TextMessage {
        //Convert byte to string
        msgStr = string(message)
        //is received msg if command of text
        isEcho := strings.HasPrefix(msgStr, "echo")
        //if text, will remove echo
        if isEcho {
            msgStr = msgStr[4:len(msgStr)]
        }
        //start Audio, will initalize Audio
        if msgStr == "start" {
            stream = voiceStream.InitAudio(voiceRecognize.Encoding, voiceRecognize.SampleRateHertz, voiceRecognize.LanguageCode)
            if stream == nil {
                fmt.Println("initAudio failed!!!")
                conn.WriteMessage(websocket.TextMessage, []byte("speechInitFail"))
                break
            } else {
                //Routine will listen the result
                go voiceStream.GetResults(&stream, conn, voiceRecognize)
            }
        } else if msgStr == "stop" { //on stop will close the stream conn
            if err := stream.CloseSend(); err != nil {
                log.Printf("Could not close stream: %v", err)
                break
            }
        } else if isEcho { //echo message
            log.Printf("recv: %s", msgStr)
            err = conn.WriteMessage(mt, []byte(msgStr))
            if err != nil {
                log.Println("write: ", err)
                break
            }
        } else {
            fmt.Println("no handling for: ", string(message))
        }
    } else if mt == websocket.BinaryMessage {
        //Send voice to process
        voiceStream.SendData(&stream, &message)
    }
  }
}
---------------------------
package vrecognize

import (
"io"
"log"

"cloud.google.com/go/speech/apiv1"
"github.com/gorilla/websocket"
"golang.org/x/net/context"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)

//VoiceStream : Voice related configuratin
 type VoiceStream struct {
 }

 //GetResults : Take the stream result whenever available
 func (vs VoiceStream) GetResults(stream 
   *speechpb.Speech_StreamingRecognizeClient, conn *websocket.Conn, vr 
   *VoiceRecognize) {

  defer func() {
    if r := recover(); r != nil {
        log.Println("In getResults got panic: ", r)
        *stream = vs.InitAudio(vr.Encoding, vr.SampleRateHertz, vr.LanguageCode)
    }
}()
for {
    resp, err := (*stream).Recv()
    if err == io.EOF {
        log.Printf("Reciever EOF: %v", err)
        conn.WriteMessage(websocket.TextMessage, []byte("MinuteDone"))
    }

    if err != nil {
        log.Printf("Cannot stream results: %v", err)
        conn.WriteMessage(websocket.TextMessage, []byte("VoiceInterrupted"))
    }
    if err := resp.Error; err != nil {
        log.Printf("Could not recognize: %v", err)
        conn.WriteMessage(websocket.TextMessage, []byte("NotRecognize"))
    }

    for _, result := range resp.Results {

        for _, altr := range result.GetAlternatives() {
            msg := altr.GetTranscript()
            log.Printf("Result: %+v
", msg)
            conn.WriteMessage(websocket.TextMessage, []byte(msg))
        }
    }
    //log.Printf("Loop last...")
}

}

//InitAudio - This will initalize the Audio and will listen the incomming audio
 func (vs VoiceStream) InitAudio(audioEnco speechpb.RecognitionConfig_AudioEncoding, sampleRate int32, lang string) speechpb.Speech_StreamingRecognizeClient {
//log.Printf("Speech Init strat...")
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
    log.Fatal(err)
    return nil
}
stream, err2 := client.StreamingRecognize(ctx)
if err2 != nil {
    log.Fatal(err2)
    return nil
}
//Send 1st send configuration
err = stream.Send(&speechpb.StreamingRecognizeRequest{
    StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
        StreamingConfig: &speechpb.StreamingRecognitionConfig{
            Config: &speechpb.RecognitionConfig{
                Encoding:        audioEnco, 
                SampleRateHertz: sampleRate,
                LanguageCode:    lang,
            },
            InterimResults:  true,
            SingleUtterance: true,
        },
    },
})
if err != nil {
    log.Fatal(err)
    return nil
}
//log.Printf("Speech Init finished...")
return stream
}

 //SendData This will send the data to Speech API
func (vs VoiceStream) SendData(stream *speechpb.Speech_StreamingRecognizeClient, message *[]byte) {
//log.Printf("SendData Called ...")
if err := (*stream).Send(&speechpb.StreamingRecognizeRequest{
    StreamingRequest: &speechpb.StreamingRecognizeRequest_AudioContent{
        AudioContent: *message,
    },
}); err != nil {
    log.Printf("Could not send audio: %v", err)
}
  return
}

//VoiceRecognize Class
type VoiceRecognize struct {
Encoding        speechpb.RecognitionConfig_AudioEncoding
SampleRateHertz int32
LanguageCode    string
}

//NewRecongnize : pass parameters
func (vr VoiceRecognize) NewRecongnize(audioencoding, HertzRate int32, 
  language string) *VoiceRecognize {
  var Encoding speechpb.RecognitionConfig_AudioEncoding
  switch audioencoding {
    case 0:
    // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT]
   [google.rpc.Code.INVALID_ARGUMENT].
    Encoding = speechpb.RecognitionConfig_ENCODING_UNSPECIFIED
case 1:
    // Uncompressed 16-bit signed little-endian samples (Linear PCM).
    Encoding = speechpb.RecognitionConfig_LINEAR16
case 2:
    // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
    // Codec) is the recommended encoding because it is
    // lossless--therefore recognition is not compromised--and
    // requires only about half the bandwidth of `LINEAR16`.
    Encoding = speechpb.RecognitionConfig_FLAC
case 3:
    // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
    Encoding = speechpb.RecognitionConfig_MULAW
case 4:
    // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
    Encoding = speechpb.RecognitionConfig_AMR
case 5:
    // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
    Encoding = speechpb.RecognitionConfig_AMR_WB
case 6:
    // Opus encoded audio frames in Ogg container
    // ([OggOpus](https://wiki.xiph.org/OggOpus)).
    // `sample_rate_hertz` must be 16000.
    Encoding = speechpb.RecognitionConfig_OGG_OPUS
default:
    // Uncompressed 16-bit signed little-endian samples (Linear PCM).
    Encoding = speechpb.RecognitionConfig_LINEAR16
}
return &VoiceRecognize{Encoding, HertzRate, language}
}

And index.html code:

 <!DOCTYPE html>
  <html>
   <head>
    <meta charset="UTF-8">
    <title>Golang WebSocket</title>
  </head>
  <body>
  <form>
   <input id="message" type="text" value="What are you doing">
   <input onclick="wsConnect();" id="connectBtn" value="Connect" 
     type="button"/>
   <input onclick="wsSendMessage();" id="echoBtn" value="Echo" type="button"/>
<br/>
<input onclick="startAudio();" id="startBtn" value="Start Audio" type="button"/>
 </form>
 <br/>
 <h2>Log</h2>
 <pre id="log"></pre>
 <h2>Server Response</h2>
 <pre id="serResp"></pre>
 <script type="text/javascript">
   var webSocket = null;
   var audioStream = null;
   var context = null;
   function __log(e, data) {
    if(e.type !="error"){
        log.innerHTML += "
" + e + " " + (data || '');
    }
  }
  function serverResponse(e, data){
    serResp.innerHTML = "
" + e + " " + (data || '');
  }
  function wsConnect() {
    webSocket = new WebSocket("ws://"+window.location.host+"/echo");

    var message = document.getElementById("message");
    webSocket.onopen = function (message) {
        wsOpen(message);
    };
    webSocket.onmessage = function (message) {
        wsGetMessage(message);
    };
    webSocket.onclose = function (message) {
        wsClose(message);
    };
    webSocket.onerror = function (message) {
        wsError(message);
    };
 }

 function wsError(message) {
    console.log(message);
    __log(message);
 }

 function wsOpen(message) {
    __log("Connected ...");
    if(message.type === "open"){
        document.getElementById("connectBtn").disabled = true;
        document.getElementById("startBtn").disabled = false;
        document.getElementById("echoBtn").disabled = false;
        if(document.getElementById("stopBtn") != undefined){
            document.getElementById("stopBtn").disabled = false;   
        }
    }
 }

 function wsSendMessage() {
    __log("From Browser: "+ message.value);
    webSocket.send("echo"+message.value);
    document.getElementById("message").value = ""; 
 }

 function wsCloseConnection() {
    webSocket.close();
 }

 function wsGetMessage(message) {
    console.log("Server: "+ message.data);
    msg = message.data;
    if(msg == "VoiceInterrupted" || msg == "MinuteDone" || msg == 
  "NotRecognize" || msg == "speechInitFail"){
        stopAudio();
        startAudio();
    }else{
        serverResponse("Server: " + message.data);
    }   
 }

function wsClose(message) {
    __log("Disconnect ... ");
    if(message.type === "close"){
    document.getElementById("connectBtn").disabled = false;
    document.getElementById("startBtn").disabled = true;
    document.getElementById("echoBtn").disabled = true;
    }
 }

 function wserror(message) {
    __log("Error ..."+ message);
 }

 function sendStart() {
    webSocket.send("start")
 }

 function startAudio() {
    var session = {
        audio: true,
        video: false
    };
    sendStart();
    var isMediaReady = true;
    navigator.mediaDevices.getUserMedia(session).then(function (stream) {
        initializeRecorder(stream);
    }).catch(function (err) {
        onError(err);
        document.getElementById("startBtn").disabled = false;
    });
    document.getElementById("startBtn").disabled = true;
    setTimeout(stopAudio,55*1000);
 }

 function onError(err) {
    __log("Error while calling getUserMedia:" + err);
 }

 function initializeRecorder(stream) {
    __log("initializeRecorder called...");
    audioStream = stream;
    var audioContext = window.AudioContext;
    context = new audioContext();
    var audioInput = context.createMediaStreamSource(stream);
    var bufferSize = 1*4*1024;
    // create a javascript node
    var recorder = context.createScriptProcessor(bufferSize, 1, 1);
    // specify the processing function
    recorder.onaudioprocess = recorderProcess;
    // connect stream to our recorder
    audioInput.connect(recorder);
    // connect our recorder to the previous destination
    recorder.connect(context.destination);
 }

 function convertFloat32ToInt16(buffer) {
    l = buffer.length;
    buf = new Int16Array(l);
    while (l--) {
        buf[l] = Math.min(1, buffer[l]) * 0x7FFF;
    }
    return buf.buffer;
 }

 function recorderProcess(e) {
    var left = e.inputBuffer.getChannelData(0);
    webSocket.send(convertFloat32ToInt16(left));
 }

 function stopAudio() {
    __log("stop audio called...");
    document.getElementById("startBtn").disabled = false;
    context.close();
    context = null;
    webSocket.send("stop")
    var audioTrack = audioStream.getAudioTracks();
    var i = 0;

    for (i = 0; i < audioTrack.length; i++) {
        var track = audioTrack[i];
        track.stop();
        audioStream.removeTrack(track);
    }
    audioStream = null;
 }
 </script>
 </body>
 </html>

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

报告相同问题？

关注问题

uni-app vue3 全局注册ant-design 不生效 javascript vue.js
2022-07-11 10:49

回答 1 已采纳找到问题了写错地方了写到vue2那了 import App from './App' // #ifndef VUE3 import Vue from 'vue' Vue.config.produc
请问uni-app如何引用服务器中的图片路径？ vue.js 前端小程序
2022-04-06 11:04

回答 5 已采纳你这是下载？get请求嘛？逻辑上来说是个get请求返还回来的是个二进制流二进制流转一下吧，转成base64 在放到src上面
微信小程序提示@appservice-current-context的原因 javascript 小程序微信小程序
2021-12-22 13:31

回答 2 已采纳开发工具的事情吧
[golang Web开发] 3.golang web开发:处理请求
2023-02-02 15:57

zhoupenghui168的博客 golang web开发:处理请求 net/http, net/url, http.ResponseWriter,http.Request, w.Header
uni-app后端响应时间过长，如何添加uni.showToast过渡？ vue.js 前端小程序
2022-03-04 11:20

回答 2 已采纳在请求前加个wx.showLoading({ title: '加载中...',})请求后wx.hideLoading()关闭
uni-app在Vue3组合式API中如何设置下拉刷新 vue.js 前端前端框架
2022-07-18 18:55

回答 1 已采纳是要先配置的，uni-app 数据下拉刷新功能 https://blog.csdn.net/SmartJunTao/article/details/123684580?utm_source=app&a
用什么方法可以让APP收到后台服务器主动发送的消息？服务器
2017-11-02 14:34

回答 7 已采纳 1.手机的可以使用推送，有很多，比如腾讯的信鸽，友盟的推送，还有极光的； 2.网页版，有几种方式：1.轮询；2.websocket,其实相对来说,websocket会好一些.
语音动画设置 android,Anroid Studio第七期 - 语音动画
2021-06-04 05:42

未央千寻的博客 Ruby：　Ruby诞生于1995年，人气也是非常的高，虽然不如java，c，php那样，但是还是有很大的市场，它是一款动态、响应式、面向对象的通用性编程语言，其同时也是最适合新手入门的编程语言之一，Ruby on Rails......
求教！安卓新闻app的服务器问题！ android
2017-05-20 17:12

回答 2 已采纳新闻内容可以存到mysql的text字段，他适合长文本，图片一般是用文件系统存到到目录下，数据库中只存放图片的文件路径。先从数据库读取文件路径，然后再到对应路径读取文件数据
自学app传图片给服务器，AjaxParams ajax 服务器
2018-02-23 10:21

回答 3 已采纳你这是不是缺少jar包呀!再说app传图片到服务器,这样是不对的吧!反正我们不是这样做的
angular index.html 页面的<app-root>中如何加载组件及显示 angular.js
2018-10-29 08:22

回答 1 已采纳 https://segmentfault.com/q/1010000012588057
U3D Pun2 官方文档学习和翻译
2022-02-18 11:58

什么时候才能坚持做好一件事啊的博客 us USA, West San José usw 可以为每个app设置各自的region filter，dashboard-》Manage-》Edit [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-3GAxNMNv-1645156427978)(data:image/gif...
k8s的dns解析不到问题，安装redis时nslookup redis-app-0.redis-service错误 centos java-ee 缓存
2019-10-06 00:05

回答 1 已采纳 https://blog.csdn.net/weixin_33796177/article/details/91369422
Go学习路线
2022-05-02 14:37

kgduu的博客图形语言 GraphJin - 用于 Postgres 的即时 GraphQL API。无需代码，将 GraphQL 编译为 SQL。 MTProto MTProto - 在纯 Go 上编写的 Telegram API 的完整本实现。天文学 go-fits - FITS（灵活图像传输系统）...
2023.3.17 | Goby红队版可利用漏洞更新
2023-03-17 20:44

Gobysec的博客 Goby也提供了可以自定义的漏洞检查框架，发动了互联网的大量安全从业者贡献POC，保证持续的应急响应能力。同时，我们认为基于实际效果的检查会比基于版本的比对方式更有价值。获取方式，可查看文末⬇⬇⬇ 文章目录 ...
没有解决我的问题, 去提问

悬赏问题

¥15 多址通信方式的抗噪声性能和系统容量对比
¥15 winform的chart曲线生成时有凸起
¥15 msix packaging tool打包问题
¥15 finalshell节点的搭建代码和那个端口代码教程
¥15 Centos / PETSc / PETGEM
¥15 centos7.9 IPv6端口telnet和端口监控问题
¥20 完全没有学习过GAN，看了CSDN的一篇文章，里面有代码但是完全不知道如何操作
¥15 使用ue5插件narrative时如何切换关卡也保存叙事任务记录
¥20 海浪数据南海地区海况数据，波浪数据
¥20 软件测试决策法疑问求解答

码龄粉丝数原力等级 --

Go App服务器语音响应仅3-4秒

0条回答默认最新

悬赏问题

Go App服务器语音响应仅3-4秒

0条回答 默认 最新

悬赏问题

0条回答默认最新