Java整合科大讯飞SDK实现语音转文字

得到文字

public static void readVoice(String strVoiceText) {
        ActiveXComponent ax = null;
        try {
            for (int i = 0; i < 3; i++) {//重复3次叫号
                ax = new ActiveXComponent("Sapi.SpVoice");
                
                Dispatch spVoice = ax.getObject();//运行时输出语音内容
                
                ax.setProperty("Volume", new Variant(80));// 音量 0-100
                
                ax.setProperty("Rate", new Variant(-1));// 语音朗读速度 -10 到 +10
                
                Dispatch.call(spVoice, "Speak", new Variant(strVoiceText));// 朗读

                //构建文件流开始生成语音文件并保存到磁盘中

                ax = new ActiveXComponent("Sapi.SpFileStream");
                Dispatch spFileStream = ax.getObject();

                ax = new ActiveXComponent("Sapi.SpAudioFormat");
                Dispatch spAudioFormat = ax.getObject();

                Dispatch.put(spAudioFormat, "Type", new Variant(20));//设置音频流格式
                
                Dispatch.putRef(spFileStream, "Format", spAudioFormat);//设置文件输出流格式
                
                Dispatch.call(spFileStream, "Open",
                new Variant("E:\\test.wav"), 
                new Variant(3), new Variant(true));//用输出文件流创建一个.wav文件
                //设置声音对象的音频输出流为输出文件对象
                Dispatch.putRef(spVoice, "AudioOutputStream", spFileStream);
                
                Dispatch.put(spVoice, "Volume", new Variant(80));//设置音量 0到100
                
                Dispatch.put(spVoice, "Rate", new Variant(-1));//设置朗读速度
                
                Dispatch.call(spVoice, "Speak", new Variant(strVoiceText));//开始朗读

                
                Dispatch.call(spFileStream, "Close");//关闭输出文件
                Dispatch.putRef(spVoice, "AudioOutputStream", null);

                spAudioFormat.safeRelease();
                spFileStream.safeRelease();
                spVoice.safeRelease();
                ax.safeRelease();
                Thread.sleep(5000);//每5秒重播一次叫号
            }
        } catch (Exception e) { e.printStackTrace();
        }
    }

@GetMapping("readVoice")
public void readVoice(@RequestParam("strVoiceText") String strVoiceText){

    readVoice(strVoiceText);//strVoiceText = 请A001号到2号窗口--文字随意


}

//把客户的A001加入到ArrayBlockingQueue，并创建window窗口略

通过上面代码得到了语音文件test.wav

下面是语音转文字，用讯飞的APPID代码需要,去讯飞注册

下载 SDK

把lib复制到项目下，src同级目录以及以下4个

代码

package com.fan.li.springbootvoice.voice;

import com.iflytek.cloud.speech.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.entity.ContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
/**
 * @author fan
 * @date 2022年05月25日 17:35
 */
@Slf4j
public class VoiceTool {

    /**
     * 音频文件
     */
    public static String audioPath = "E:\\test.wav";///home/asr/my.pcm";
    private StringBuilder sb;
    private SpeechRecognizer speechRecognizer;
    private Object lock = new Object();

    public VoiceTool(String appId){
        SpeechUtility.createUtility(SpeechConstant.APPID + "=" + appId);
    }

    public String RecognizePcmfileByte(MultipartFile audioFile) {
        sb = new StringBuilder();
        try {
            if (speechRecognizer == null) {//参数可以在配置文件设置
                speechRecognizer = SpeechRecognizer.createRecognizer();
                speechRecognizer.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
                speechRecognizer.setParameter( SpeechConstant.RESULT_TYPE, "plain" );
                speechRecognizer.setParameter(SpeechConstant.VAD_BOS,"5000");//前端点超时，
                speechRecognizer.setParameter(SpeechConstant.VAD_EOS,"10000");//后端点超时要与运行SDK时配置的一样
            }
            speechRecognizer.startListening(recListener);
            byte[] buffer = audioFile.getBytes();
            if (buffer == null || buffer.length == 0) {
                log.error("no audio avaible!");
                speechRecognizer.cancel();
            } else {
                int lenRead = buffer.length;
                log.info("文件长度"+buffer.length);
                speechRecognizer.writeAudio( buffer, 0, lenRead );
                speechRecognizer.stopListening();
                synchronized (lock) {
                    lock.wait();//主线程等待
                }
                log.info("输出语音内容：" + sb.toString());
                return sb.toString();
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
    private RecognizerListener recListener = new RecognizerListener() {
        @Override
        public void onBeginOfSpeech() {
            
        }
        @Override
        public void onEndOfSpeech() {
            
        }
        /**
         * 获取听写结果
         */
        @Override
        public void onResult(RecognizerResult results, boolean islast) {
                //用json解析器解析为json格式
            String text = results.getResultString();
            sb.append(text);
            //log.info("解析结果："+curRet.toString());
            if( islast ) {
                synchronized (lock) {
                    lock.notify();//子线程唤醒
                }
            }
        }
        @Override
        public void onVolumeChanged(int volume) {
           
        }

        @Override
        public void onError(SpeechError error) {
            
        }
        @Override
        public void onEvent(int eventType, int arg1, int agr2, String msg) {
            
        }
    };

    
    public static void main(String args[]) throws IOException {
        VoiceTool iatTool = new VoiceTool("you appID 讯飞上注册即可");
        File file = new File(audioPath);
        FileInputStream fileInputStream = new FileInputStream(file);
        MultipartFile multipartFile = new MockMultipartFile(file.getName(), file.getName(),
                ContentType.APPLICATION_OCTET_STREAM.toString(), fileInputStream);
        iatTool.RecognizePcmfileByte(multipartFile);

    }

}

controller

/**
 * @author fan
 * @date 2022年05月25日 17:48
 */
@RestController
@RequestMapping(value = "vc")
public class VoeceController {

    /**
     * 音频文件
     */
    public static String audioPath = "E:\\test.wav";

    @GetMapping(value = "voiceToText")
    public String voiceToText() throws IOException {
        VoiceTool voiceTool = new VoiceTool("you appID 讯飞上注册即可");
        File file = new File(audioPath);
        FileInputStream fileInputStream = new FileInputStream(file);
        MultipartFile multipartFile = new MockMultipartFile(file.getName(), file.getName(),
                ContentType.APPLICATION_OCTET_STREAM.toString(), fileInputStream);
        String text = voiceTool.RecognizePcmfileByte(multipartFile);
        return text;
    }
}

浏览器输入地址

原文链接：https://blog.csdn.net/Zxdwr520/article/details/124992924

你可能也喜欢