346 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			346 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| let ASRTEXT = ''
 | |
| 
 | |
| class HttpASRRecognizer {
 | |
|     constructor() {
 | |
|         this.mediaRecorder = null;
 | |
|         this.audioContext = null;
 | |
|         this.isRecording = false;
 | |
|         this.audioChunks = [];
 | |
|         
 | |
|         // VAD相关属性
 | |
|         this.isSpeaking = false;
 | |
|         this.silenceThreshold = 0.01;
 | |
|         this.silenceTimeout = 1000;
 | |
|         this.minSpeechDuration = 300;
 | |
|         this.silenceTimer = null;
 | |
|         this.speechStartTime = null;
 | |
|         this.audioBuffer = [];
 | |
|         
 | |
|         // API配置
 | |
|         this.apiConfig = {
 | |
|             url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
 | |
|             headers: {
 | |
|                 'X-Api-App-Key': '1988591469',
 | |
|                 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
 | |
|                 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
 | |
|                 'X-Api-Request-Id': this.generateUUID(),
 | |
|                 'X-Api-Sequence': '-1',
 | |
|                 'Content-Type': 'application/json'
 | |
|             }
 | |
|         };
 | |
|         
 | |
|         this.recordBtn = document.getElementById('startVoiceButton');
 | |
|         this.statusDiv = document.getElementById('status');
 | |
|         this.resultsDiv = document.getElementById('results');
 | |
|         
 | |
|         this.initEventListeners();
 | |
|     }
 | |
|     
 | |
|     initEventListeners() {
 | |
|         this.recordBtn.addEventListener('click', () => {
 | |
|             if (this.isRecording) {
 | |
|                 this.stopRecording();
 | |
|             } else {
 | |
|                 this.startRecording();
 | |
|             }
 | |
|         });
 | |
|     }
 | |
|     
 | |
|     // 生成UUID
 | |
|     generateUUID() {
 | |
|         return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
 | |
|             const r = Math.random() * 16 | 0;
 | |
|             const v = c == 'x' ? r : (r & 0x3 | 0x8);
 | |
|             return v.toString(16);
 | |
|         });
 | |
|     }
 | |
|     
 | |
|     // 计算音频能量(音量)
 | |
|     calculateAudioLevel(audioData) {
 | |
|         let sum = 0;
 | |
|         for (let i = 0; i < audioData.length; i++) {
 | |
|             sum += audioData[i] * audioData[i];
 | |
|         }
 | |
|         return Math.sqrt(sum / audioData.length);
 | |
|     }
 | |
|     
 | |
|     // 语音活动检测
 | |
|     detectVoiceActivity(audioData) {
 | |
|         const audioLevel = this.calculateAudioLevel(audioData);
 | |
|         const currentTime = Date.now();
 | |
|         
 | |
|         if (audioLevel > this.silenceThreshold) {
 | |
|             if (!this.isSpeaking) {
 | |
|                 this.isSpeaking = true;
 | |
|                 this.speechStartTime = currentTime;
 | |
|                 this.audioBuffer = [];
 | |
|                 this.updateStatus('检测到语音,开始录音...', 'speaking');
 | |
|                 console.log('开始说话');
 | |
|             }
 | |
|             
 | |
|             if (this.silenceTimer) {
 | |
|                 clearTimeout(this.silenceTimer);
 | |
|                 this.silenceTimer = null;
 | |
|             }
 | |
|             
 | |
|             return true;
 | |
|         } else {
 | |
|             if (this.isSpeaking && !this.silenceTimer) {
 | |
|                 this.silenceTimer = setTimeout(() => {
 | |
|                     this.onSpeechEnd();
 | |
|                 }, this.silenceTimeout);
 | |
|             }
 | |
|             
 | |
|             return this.isSpeaking;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 语音结束处理
 | |
|     async onSpeechEnd() {
 | |
|         if (this.isSpeaking) {
 | |
|             const speechDuration = Date.now() - this.speechStartTime;
 | |
|             
 | |
|             if (speechDuration >= this.minSpeechDuration) {
 | |
|                 console.log(`语音结束,时长: ${speechDuration}ms`);
 | |
|                 await this.processAudioBuffer();
 | |
|                 // this.updateStatus('语音识别中...', 'processing');
 | |
|                 console.log('语音识别中')
 | |
|             } else {
 | |
|                 console.log('说话时长太短,忽略');
 | |
|                 // this.updateStatus('等待语音输入...', 'ready');
 | |
|                 console.log('等待语音输入...')
 | |
| 
 | |
|             }
 | |
|             
 | |
|             this.isSpeaking = false;
 | |
|             this.speechStartTime = null;
 | |
|             this.audioBuffer = [];
 | |
|         }
 | |
|         
 | |
|         if (this.silenceTimer) {
 | |
|             clearTimeout(this.silenceTimer);
 | |
|             this.silenceTimer = null;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 处理音频缓冲区并发送到API
 | |
|     async processAudioBuffer() {
 | |
|         if (this.audioBuffer.length === 0) {
 | |
|             return;
 | |
|         }
 | |
|         
 | |
|         try {
 | |
|             // 合并所有音频数据
 | |
|             const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
 | |
|             const combinedBuffer = new Float32Array(totalLength);
 | |
|             let offset = 0;
 | |
|             
 | |
|             for (const buffer of this.audioBuffer) {
 | |
|                 combinedBuffer.set(buffer, offset);
 | |
|                 offset += buffer.length;
 | |
|             }
 | |
|             
 | |
|             // 转换为WAV格式并编码为base64
 | |
|             const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
 | |
|             const base64Audio = this.arrayBufferToBase64(wavBuffer);
 | |
|             
 | |
|             // 调用ASR API
 | |
|             await this.callASRAPI(base64Audio);
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('处理音频数据失败:', error);
 | |
|             this.updateStatus('识别失败', 'error');
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 调用ASR API
 | |
|     async callASRAPI(base64AudioData) {
 | |
|         try {
 | |
|             const requestBody = {
 | |
|                 user: {
 | |
|                     uid: "1988591469"
 | |
|                 },
 | |
|                 audio: {
 | |
|                     data: base64AudioData
 | |
|                 },
 | |
|                 request: {
 | |
|                     model_name: "bigmodel"
 | |
|                 }
 | |
|             };
 | |
|             
 | |
|             const response = await fetch(this.apiConfig.url, {
 | |
|                 method: 'POST',
 | |
|                 headers: this.apiConfig.headers,
 | |
|                 body: JSON.stringify(requestBody)
 | |
|             });
 | |
|             
 | |
|             if (!response.ok) {
 | |
|                 throw new Error(`HTTP error! status: ${response.status}`);
 | |
|             }
 | |
|             
 | |
|             const result = await response.json();
 | |
|             this.handleASRResponse(result);
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('ASR API调用失败:', error);
 | |
|             this.updateStatus('API调用失败', 'error');
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 处理ASR响应
 | |
|     handleASRResponse(response) {
 | |
|         console.log('ASR响应:', response);
 | |
|         
 | |
|         if (response && response.data && response.data.result) {
 | |
|             ASRTEXT = response.data.result;
 | |
|             // this.displayResult(text);
 | |
|             // this.updateStatus('识别完成', 'completed');
 | |
|             console.log('识别完成')
 | |
|         } else {
 | |
|             console.log('未识别到文字');
 | |
|             // this.updateStatus('未识别到文字', 'ready');
 | |
| 
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 显示识别结果
 | |
|     displayResult(text) {
 | |
|         const resultElement = document.createElement('div');
 | |
|         resultElement.className = 'result-item';
 | |
|         resultElement.innerHTML = `
 | |
|             <span class="timestamp">${new Date().toLocaleTimeString()}</span>
 | |
|             <span class="text">${text}</span>
 | |
|         `;
 | |
|         this.resultsDiv.appendChild(resultElement);
 | |
|         this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
 | |
|     }
 | |
|     
 | |
|     // 更新状态显示
 | |
|     updateStatus(message, status) {
 | |
|         this.statusDiv.textContent = message;
 | |
|         this.statusDiv.className = `status ${status}`;
 | |
|     }
 | |
|     
 | |
|     // 编码WAV格式
 | |
|     encodeWAV(samples, sampleRate) {
 | |
|         const length = samples.length;
 | |
|         const buffer = new ArrayBuffer(44 + length * 2);
 | |
|         const view = new DataView(buffer);
 | |
|         
 | |
|         // WAV文件头
 | |
|         const writeString = (offset, string) => {
 | |
|             for (let i = 0; i < string.length; i++) {
 | |
|                 view.setUint8(offset + i, string.charCodeAt(i));
 | |
|             }
 | |
|         };
 | |
|         
 | |
|         writeString(0, 'RIFF');
 | |
|         view.setUint32(4, 36 + length * 2, true);
 | |
|         writeString(8, 'WAVE');
 | |
|         writeString(12, 'fmt ');
 | |
|         view.setUint32(16, 16, true);
 | |
|         view.setUint16(20, 1, true);
 | |
|         view.setUint16(22, 1, true);
 | |
|         view.setUint32(24, sampleRate, true);
 | |
|         view.setUint32(28, sampleRate * 2, true);
 | |
|         view.setUint16(32, 2, true);
 | |
|         view.setUint16(34, 16, true);
 | |
|         writeString(36, 'data');
 | |
|         view.setUint32(40, length * 2, true);
 | |
|         
 | |
|         // 写入音频数据
 | |
|         let offset = 44;
 | |
|         for (let i = 0; i < length; i++) {
 | |
|             const sample = Math.max(-1, Math.min(1, samples[i]));
 | |
|             view.setInt16(offset, sample * 0x7FFF, true);
 | |
|             offset += 2;
 | |
|         }
 | |
|         
 | |
|         return buffer;
 | |
|     }
 | |
|     
 | |
|     // ArrayBuffer转Base64
 | |
|     arrayBufferToBase64(buffer) {
 | |
|         let binary = '';
 | |
|         const bytes = new Uint8Array(buffer);
 | |
|         for (let i = 0; i < bytes.byteLength; i++) {
 | |
|             binary += String.fromCharCode(bytes[i]);
 | |
|         }
 | |
|         return btoa(binary);
 | |
|     }
 | |
|     
 | |
|     async startRecording() {
 | |
|         try {
 | |
|             const stream = await navigator.mediaDevices.getUserMedia({
 | |
|                 audio: {
 | |
|                     sampleRate: 16000,
 | |
|                     channelCount: 1,
 | |
|                     echoCancellation: true,
 | |
|                     noiseSuppression: true
 | |
|                 }
 | |
|             });
 | |
|             
 | |
|             this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
 | |
|                 sampleRate: 16000
 | |
|             });
 | |
|             
 | |
|             const source = this.audioContext.createMediaStreamSource(stream);
 | |
|             const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
 | |
|             
 | |
|             processor.onaudioprocess = (event) => {
 | |
|                 const inputBuffer = event.inputBuffer;
 | |
|                 const inputData = inputBuffer.getChannelData(0);
 | |
|                 
 | |
|                 // 语音活动检测
 | |
|                 if (this.detectVoiceActivity(inputData)) {
 | |
|                     // 如果检测到语音活动,缓存音频数据
 | |
|                     this.audioBuffer.push(new Float32Array(inputData));
 | |
|                 }
 | |
|             };
 | |
|             
 | |
|             source.connect(processor);
 | |
|             processor.connect(this.audioContext.destination);
 | |
|             
 | |
|             this.isRecording = true;
 | |
|             this.recordBtn.textContent = '停止录音';
 | |
|             this.recordBtn.className = 'btn recording';
 | |
|             // this.updateStatus('等待语音输入...', 'ready');
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('启动录音失败:', error);
 | |
|             // this.updateStatus('录音启动失败', 'error');
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     stopRecording() {
 | |
|         if (this.audioContext) {
 | |
|             this.audioContext.close();
 | |
|             this.audioContext = null;
 | |
|         }
 | |
|         
 | |
|         if (this.silenceTimer) {
 | |
|             clearTimeout(this.silenceTimer);
 | |
|             this.silenceTimer = null;
 | |
|         }
 | |
|         
 | |
|         // 如果正在说话,处理最后的音频
 | |
|         if (this.isSpeaking) {
 | |
|             this.onSpeechEnd();
 | |
|         }
 | |
|         
 | |
|         this.isRecording = false;
 | |
|         this.isSpeaking = false;
 | |
|         this.audioBuffer = [];
 | |
|         
 | |
|         this.recordBtn.textContent = '开始录音';
 | |
|         this.recordBtn.className = 'btn';
 | |
|         console.log('录音已停止');
 | |
|         // this.updateStatus('录音已停止', 'stopped');
 | |
|     }
 | |
| }
 | |
| 
 | |
| // 初始化应用
 | |
| document.addEventListener('DOMContentLoaded', () => {
 | |
|     const asrRecognizer = new HttpASRRecognizer();
 | |
|     console.log('HTTP ASR识别器已初始化');
 | |
| }); |