let ASRTEXT = '' class HttpASRRecognizer { constructor() { this.mediaRecorder = null; this.audioContext = null; this.isRecording = false; this.audioChunks = []; // VAD相关属性 this.isSpeaking = false; this.silenceThreshold = 0.01; this.silenceTimeout = 1000; this.minSpeechDuration = 300; this.silenceTimer = null; this.speechStartTime = null; this.audioBuffer = []; // API配置 this.apiConfig = { url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash', headers: { 'X-Api-App-Key': '1988591469', 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r', 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo', 'X-Api-Request-Id': this.generateUUID(), 'X-Api-Sequence': '-1', 'Content-Type': 'application/json' } }; this.recordBtn = document.getElementById('startVoiceButton'); this.statusDiv = document.getElementById('status'); this.resultsDiv = document.getElementById('results'); this.initEventListeners(); } initEventListeners() { this.recordBtn.addEventListener('click', () => { if (this.isRecording) { this.stopRecording(); } else { this.startRecording(); } }); } // 生成UUID generateUUID() { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { const r = Math.random() * 16 | 0; const v = c == 'x' ? r : (r & 0x3 | 0x8); return v.toString(16); }); } // 计算音频能量(音量) calculateAudioLevel(audioData) { let sum = 0; for (let i = 0; i < audioData.length; i++) { sum += audioData[i] * audioData[i]; } return Math.sqrt(sum / audioData.length); } // 语音活动检测 detectVoiceActivity(audioData) { const audioLevel = this.calculateAudioLevel(audioData); const currentTime = Date.now(); if (audioLevel > this.silenceThreshold) { if (!this.isSpeaking) { this.isSpeaking = true; this.speechStartTime = currentTime; this.audioBuffer = []; this.updateStatus('检测到语音,开始录音...', 'speaking'); console.log('开始说话'); } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } return true; } else { if (this.isSpeaking && !this.silenceTimer) { this.silenceTimer = setTimeout(() => { this.onSpeechEnd(); }, this.silenceTimeout); } return this.isSpeaking; } } // 语音结束处理 async onSpeechEnd() { if (this.isSpeaking) { const speechDuration = Date.now() - this.speechStartTime; if (speechDuration >= this.minSpeechDuration) { console.log(`语音结束,时长: ${speechDuration}ms`); await this.processAudioBuffer(); // this.updateStatus('语音识别中...', 'processing'); console.log('语音识别中') } else { console.log('说话时长太短,忽略'); // this.updateStatus('等待语音输入...', 'ready'); console.log('等待语音输入...') } this.isSpeaking = false; this.speechStartTime = null; this.audioBuffer = []; } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } } // 处理音频缓冲区并发送到API async processAudioBuffer() { if (this.audioBuffer.length === 0) { return; } try { // 合并所有音频数据 const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0); const combinedBuffer = new Float32Array(totalLength); let offset = 0; for (const buffer of this.audioBuffer) { combinedBuffer.set(buffer, offset); offset += buffer.length; } // 转换为WAV格式并编码为base64 const wavBuffer = this.encodeWAV(combinedBuffer, 16000); const base64Audio = this.arrayBufferToBase64(wavBuffer); // 调用ASR API await this.callASRAPI(base64Audio); } catch (error) { console.error('处理音频数据失败:', error); this.updateStatus('识别失败', 'error'); } } // 调用ASR API async callASRAPI(base64AudioData) { try { const requestBody = { user: { uid: "1988591469" }, audio: { data: base64AudioData }, request: { model_name: "bigmodel" } }; const response = await fetch(this.apiConfig.url, { method: 'POST', headers: this.apiConfig.headers, body: JSON.stringify(requestBody) }); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const result = await response.json(); this.handleASRResponse(result); } catch (error) { console.error('ASR API调用失败:', error); this.updateStatus('API调用失败', 'error'); } } // 处理ASR响应 handleASRResponse(response) { console.log('ASR响应:', response); if (response && response.data && response.data.result) { ASRTEXT = response.data.result; // this.displayResult(text); // this.updateStatus('识别完成', 'completed'); console.log('识别完成') } else { console.log('未识别到文字'); // this.updateStatus('未识别到文字', 'ready'); } } // 显示识别结果 displayResult(text) { const resultElement = document.createElement('div'); resultElement.className = 'result-item'; resultElement.innerHTML = ` ${new Date().toLocaleTimeString()} ${text} `; this.resultsDiv.appendChild(resultElement); this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight; } // 更新状态显示 updateStatus(message, status) { this.statusDiv.textContent = message; this.statusDiv.className = `status ${status}`; } // 编码WAV格式 encodeWAV(samples, sampleRate) { const length = samples.length; const buffer = new ArrayBuffer(44 + length * 2); const view = new DataView(buffer); // WAV文件头 const writeString = (offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }; writeString(0, 'RIFF'); view.setUint32(4, 36 + length * 2, true); writeString(8, 'WAVE'); writeString(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true); view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(36, 'data'); view.setUint32(40, length * 2, true); // 写入音频数据 let offset = 44; for (let i = 0; i < length; i++) { const sample = Math.max(-1, Math.min(1, samples[i])); view.setInt16(offset, sample * 0x7FFF, true); offset += 2; } return buffer; } // ArrayBuffer转Base64 arrayBufferToBase64(buffer) { let binary = ''; const bytes = new Uint8Array(buffer); for (let i = 0; i < bytes.byteLength; i++) { binary += String.fromCharCode(bytes[i]); } return btoa(binary); } async startRecording() { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true } }); this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); const source = this.audioContext.createMediaStreamSource(stream); const processor = this.audioContext.createScriptProcessor(4096, 1, 1); processor.onaudioprocess = (event) => { const inputBuffer = event.inputBuffer; const inputData = inputBuffer.getChannelData(0); // 语音活动检测 if (this.detectVoiceActivity(inputData)) { // 如果检测到语音活动,缓存音频数据 this.audioBuffer.push(new Float32Array(inputData)); } }; source.connect(processor); processor.connect(this.audioContext.destination); this.isRecording = true; this.recordBtn.textContent = '停止录音'; this.recordBtn.className = 'btn recording'; // this.updateStatus('等待语音输入...', 'ready'); } catch (error) { console.error('启动录音失败:', error); // this.updateStatus('录音启动失败', 'error'); } } stopRecording() { if (this.audioContext) { this.audioContext.close(); this.audioContext = null; } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } // 如果正在说话,处理最后的音频 if (this.isSpeaking) { this.onSpeechEnd(); } this.isRecording = false; this.isSpeaking = false; this.audioBuffer = []; this.recordBtn.textContent = '开始录音'; this.recordBtn.className = 'btn'; console.log('录音已停止'); // this.updateStatus('录音已停止', 'stopped'); } } // 初始化应用 document.addEventListener('DOMContentLoaded', () => { const asrRecognizer = new HttpASRRecognizer(); console.log('HTTP ASR识别器已初始化'); });