// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能 class AudioProcessor { constructor(options = {}) { this.audioContext = null; this.stream = null; // 添加这一行 this.isRecording = false; this.audioChunks = []; // VAD相关属性 this.isSpeaking = false; this.silenceThreshold = options.silenceThreshold || 0.03; this.silenceTimeout = options.silenceTimeout || 1000; this.minSpeechDuration = options.minSpeechDuration || 300; this.silenceTimer = null; this.speechStartTime = null; this.audioBuffer = []; this.backgroundNoiseLevel = 0; // 添加连续性检测参数 this.consecutiveFramesRequired = 3; this.consecutiveFramesCount = 0; // 当前连续帧计数 this.frameBuffer = []; // 帧缓冲区 this.adaptiveThreshold = options.adaptiveThreshold !== false; this.noiseCalibrationSamples = []; this.isCalibrated = false; // 添加校准状态标志 // API配置 this.apiConfig = { url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash', headers: { 'X-Api-App-Key': '1988591469', 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r', 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo', 'X-Api-Request-Id': this.generateUUID(), 'X-Api-Sequence': '-1', 'Content-Type': 'application/json' } }; // 回调函数 this.onSpeechStart = options.onSpeechStart || (() => {}); this.onSpeechEnd = options.onSpeechEnd || (() => {}); this.onRecognitionResult = options.onRecognitionResult || (() => {}); this.onError = options.onError || (() => {}); this.onStatusUpdate = options.onStatusUpdate || (() => {}); } // 添加背景噪音校准方法 // 改进背景噪音校准方法,添加更多日志 calibrateBackgroundNoise(audioData) { const audioLevel = this.calculateAudioLevel(audioData); this.noiseCalibrationSamples.push(audioLevel); if (this.noiseCalibrationSamples.length >= 100) { this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length; const oldThreshold = this.silenceThreshold; this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值 console.log(`背景噪音校准完成:`); console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`); console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`); console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`); this.noiseCalibrationSamples = []; this.onStatusUpdate('背景噪音校准完成,等待语音输入...', 'ready'); } } // 改进音频能量计算 calculateAudioLevel(audioData) { let sum = 0; let peak = 0; for (let i = 0; i < audioData.length; i++) { const sample = Math.abs(audioData[i]); sum += sample * sample; peak = Math.max(peak, sample); } const rms = Math.sqrt(sum / audioData.length); // 结合RMS和峰值进行更准确的检测 return rms * 0.7 + peak * 0.3; } // 重新校准背景噪音 recalibrateBackground() { this.noiseCalibrationSamples = []; this.isCalibrated = false; this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating'); console.log('开始重新校准背景噪音'); } // 生成UUID generateUUID() { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { const r = Math.random() * 16 | 0; const v = c == 'x' ? r : (r & 0x3 | 0x8); return v.toString(16); }); } // 计算音频能量(音量) // calculateAudioLevel(audioData) { // let sum = 0; // for (let i = 0; i < audioData.length; i++) { // sum += audioData[i] * audioData[i]; // } // return Math.sqrt(sum / audioData.length); // } // 修改语音活动检测方法 // 改进语音活动检测 detectVoiceActivity(audioData) { const audioLevel = this.calculateAudioLevel(audioData); const currentTime = Date.now(); // 连续性检测 if (audioLevel > this.silenceThreshold) { this.consecutiveFramesCount++; // 需要连续几帧都超过阈值才开始录音 if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) { if (!this.isSpeaking) { this.isSpeaking = true; this.speechStartTime = currentTime; this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧 this.onSpeechStart(); this.onStatusUpdate('检测到语音,开始录音...', 'speaking'); console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`); } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } return true; } else { // 还未达到连续帧要求,缓存音频数据 this.frameBuffer.push(new Float32Array(audioData)); if (this.frameBuffer.length > this.consecutiveFramesRequired) { this.frameBuffer.shift(); // 保持缓冲区大小 } return false; } } else { // 重置连续帧计数 this.consecutiveFramesCount = 0; this.frameBuffer = []; if (this.isSpeaking && !this.silenceTimer) { this.silenceTimer = setTimeout(() => { this.handleSpeechEnd(); }, this.silenceTimeout); } return this.isSpeaking; } } // 语音结束处理 async handleSpeechEnd() { if (this.isSpeaking) { const speechDuration = Date.now() - this.speechStartTime; if (speechDuration >= this.minSpeechDuration) { console.log(`语音结束,时长: ${speechDuration}ms`); console.log(window.webrtcApp.currentVideoTag) if (window.webrtcApp.currentVideoTag==="default"){ await this.processAudioBuffer(); } this.onStatusUpdate('语音识别中...', 'processing'); } else { console.log('说话时长太短,忽略'); this.onStatusUpdate('等待语音输入...', 'ready'); } this.isSpeaking = false; this.speechStartTime = null; this.audioBuffer = []; this.onSpeechEnd(); } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } } // 处理音频缓冲区并发送到API async processAudioBuffer() { if (this.audioBuffer.length === 0) { return; } try { // 合并所有音频数据 const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0); const combinedBuffer = new Float32Array(totalLength); let offset = 0; for (const buffer of this.audioBuffer) { combinedBuffer.set(buffer, offset); offset += buffer.length; } // 转换为WAV格式并编码为base64 const wavBuffer = this.encodeWAV(combinedBuffer, 16000); const base64Audio = this.arrayBufferToBase64(wavBuffer); // 调用ASR API await this.callASRAPI(base64Audio); } catch (error) { console.error('处理音频数据失败:', error); this.onError('处理音频数据失败: ' + error.message); } } // 调用ASR API async callASRAPI(base64AudioData) { try { const requestBody = { user: { uid: "1988591469" }, audio: { data: base64AudioData }, request: { model_name: "bigmodel" } }; const response = await fetch(this.apiConfig.url, { method: 'POST', headers: this.apiConfig.headers, body: JSON.stringify(requestBody) }); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const result = await response.json(); this.handleASRResponse(result); } catch (error) { console.error('ASR API调用失败:', error); this.onError('ASR API调用失败: ' + error.message); } } // 处理ASR响应 handleASRResponse(response) { console.log('ASR响应:', response); if (response && response.result) { const recognizedText = response.result.text; this.onRecognitionResult(recognizedText); this.onStatusUpdate('识别完成', 'completed'); } else { console.log('未识别到文字'); this.onStatusUpdate('未识别到文字', 'ready'); } } // 编码WAV格式 encodeWAV(samples, sampleRate) { const length = samples.length; const buffer = new ArrayBuffer(44 + length * 2); const view = new DataView(buffer); // WAV文件头 const writeString = (offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }; writeString(0, 'RIFF'); view.setUint32(4, 36 + length * 2, true); writeString(8, 'WAVE'); writeString(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true); view.setUint32(24, sampleRate, true); view.setUint32(28, sampleRate * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(36, 'data'); view.setUint32(40, length * 2, true); // 写入音频数据 let offset = 44; for (let i = 0; i < length; i++) { const sample = Math.max(-1, Math.min(1, samples[i])); view.setInt16(offset, sample * 0x7FFF, true); offset += 2; } return buffer; } // ArrayBuffer转Base64 arrayBufferToBase64(buffer) { let binary = ''; const bytes = new Uint8Array(buffer); for (let i = 0; i < bytes.byteLength; i++) { binary += String.fromCharCode(bytes[i]); } return btoa(binary); } // 开始录音 async startRecording(existingStream = null) { try { // 如果有外部提供的音频流,使用它;否则获取新的 if (existingStream) { this.stream = existingStream; console.log('使用外部提供的音频流'); } else { this.stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true } }); console.log('获取新的音频流'); } this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); const source = this.audioContext.createMediaStreamSource(this.stream); const processor = this.audioContext.createScriptProcessor(4096, 1, 1); processor.onaudioprocess = (event) => { const inputBuffer = event.inputBuffer; const inputData = inputBuffer.getChannelData(0); // 语音活动检测 if (this.detectVoiceActivity(inputData)) { // 如果检测到语音活动,缓存音频数据 this.audioBuffer.push(new Float32Array(inputData)); } }; source.connect(processor); processor.connect(this.audioContext.destination); // 保存处理器引用以便后续清理 this.processor = processor; this.source = source; this.isRecording = true; this.onStatusUpdate('等待语音输入...', 'ready'); // 在startRecording方法的最后添加 if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) { this.onStatusUpdate('正在校准背景噪音,请保持安静...', 'calibrating'); } return true; } catch (error) { console.error('启动录音失败:', error); this.onError('启动录音失败: ' + error.message); return false; } } // 停止录音 stopRecording() { console.log('开始停止录音...'); // 断开音频节点连接 if (this.source) { this.source.disconnect(); this.source = null; } if (this.processor) { this.processor.disconnect(); this.processor = null; } // 停止所有音频轨道 if (this.stream) { this.stream.getTracks().forEach(track => { track.stop(); console.log(`停止音频轨道: ${track.label}`); }); this.stream = null; } if (this.audioContext) { this.audioContext.close().then(() => { console.log('AudioContext已关闭'); }).catch(err => { console.error('关闭AudioContext时出错:', err); }); this.audioContext = null; } if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } // 如果正在说话,处理最后的音频 if (this.isSpeaking) { this.handleSpeechEnd(); } // 重置所有状态 this.isRecording = false; this.isSpeaking = false; this.audioBuffer = []; this.audioChunks = []; this.consecutiveFramesCount = 0; this.frameBuffer = []; // 重置校准状态,确保下次启动时重新校准 this.noiseCalibrationSamples = []; this.isCalibrated = false; this.onStatusUpdate('录音已完全停止', 'stopped'); console.log('录音已完全停止,所有资源已释放'); } // 获取录音状态 getRecordingStatus() { return { isRecording: this.isRecording, isSpeaking: this.isSpeaking, hasAudioContext: !!this.audioContext }; } } // 导出模块 export { AudioProcessor };