diff --git a/src/audio_processor.js b/src/audio_processor.js index f27155c..e8b4cb4 100644 --- a/src/audio_processor.js +++ b/src/audio_processor.js @@ -8,12 +8,20 @@ class AudioProcessor { // VAD相关属性 this.isSpeaking = false; - this.silenceThreshold = options.silenceThreshold || 0.01; + this.silenceThreshold = options.silenceThreshold || 0.03; this.silenceTimeout = options.silenceTimeout || 1000; this.minSpeechDuration = options.minSpeechDuration || 300; this.silenceTimer = null; this.speechStartTime = null; this.audioBuffer = []; + this.backgroundNoiseLevel = 0; + // 添加连续性检测参数 + this.consecutiveFramesRequired = 3; + this.consecutiveFramesCount = 0; // 当前连续帧计数 + this.frameBuffer = []; // 帧缓冲区 + this.adaptiveThreshold = options.adaptiveThreshold !== false; + this.noiseCalibrationSamples = []; + this.isCalibrated = false; // 添加校准状态标志 // API配置 this.apiConfig = { @@ -35,6 +43,49 @@ class AudioProcessor { this.onError = options.onError || (() => {}); this.onStatusUpdate = options.onStatusUpdate || (() => {}); } + + // 添加背景噪音校准方法 + // 改进背景噪音校准方法,添加更多日志 + calibrateBackgroundNoise(audioData) { + const audioLevel = this.calculateAudioLevel(audioData); + this.noiseCalibrationSamples.push(audioLevel); + + if (this.noiseCalibrationSamples.length >= 100) { + this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length; + const oldThreshold = this.silenceThreshold; + this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值 + + console.log(`背景噪音校准完成:`); + console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`); + console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`); + console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`); + + this.noiseCalibrationSamples = []; + this.onStatusUpdate('背景噪音校准完成,等待语音输入...', 'ready'); + } + } + + // 改进音频能量计算 + calculateAudioLevel(audioData) { + let sum = 0; + let peak = 0; + for (let i = 0; i < audioData.length; i++) { + const sample = Math.abs(audioData[i]); + sum += sample * sample; + peak = Math.max(peak, sample); + } + const rms = Math.sqrt(sum / audioData.length); + // 结合RMS和峰值进行更准确的检测 + return rms * 0.7 + peak * 0.3; + } + + // 重新校准背景噪音 + recalibrateBackground() { + this.noiseCalibrationSamples = []; + this.isCalibrated = false; + this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating'); + console.log('开始重新校准背景噪音'); + } // 生成UUID generateUUID() { @@ -46,36 +97,54 @@ class AudioProcessor { } // 计算音频能量(音量) - calculateAudioLevel(audioData) { - let sum = 0; - for (let i = 0; i < audioData.length; i++) { - sum += audioData[i] * audioData[i]; - } - return Math.sqrt(sum / audioData.length); - } + // calculateAudioLevel(audioData) { + // let sum = 0; + // for (let i = 0; i < audioData.length; i++) { + // sum += audioData[i] * audioData[i]; + // } + // return Math.sqrt(sum / audioData.length); + // } - // 语音活动检测 + // 修改语音活动检测方法 + // 改进语音活动检测 detectVoiceActivity(audioData) { const audioLevel = this.calculateAudioLevel(audioData); const currentTime = Date.now(); + // 连续性检测 if (audioLevel > this.silenceThreshold) { - if (!this.isSpeaking) { - this.isSpeaking = true; - this.speechStartTime = currentTime; - this.audioBuffer = []; - this.onSpeechStart(); - this.onStatusUpdate('检测到语音,开始录音...', 'speaking'); - console.log('开始说话'); - } + this.consecutiveFramesCount++; - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; + // 需要连续几帧都超过阈值才开始录音 + if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) { + if (!this.isSpeaking) { + this.isSpeaking = true; + this.speechStartTime = currentTime; + this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧 + this.onSpeechStart(); + this.onStatusUpdate('检测到语音,开始录音...', 'speaking'); + console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`); + } + + if (this.silenceTimer) { + clearTimeout(this.silenceTimer); + this.silenceTimer = null; + } + + return true; + } else { + // 还未达到连续帧要求,缓存音频数据 + this.frameBuffer.push(new Float32Array(audioData)); + if (this.frameBuffer.length > this.consecutiveFramesRequired) { + this.frameBuffer.shift(); // 保持缓冲区大小 + } + return false; } - - return true; } else { + // 重置连续帧计数 + this.consecutiveFramesCount = 0; + this.frameBuffer = []; + if (this.isSpeaking && !this.silenceTimer) { this.silenceTimer = setTimeout(() => { this.handleSpeechEnd(); @@ -276,6 +345,11 @@ class AudioProcessor { this.isRecording = true; this.onStatusUpdate('等待语音输入...', 'ready'); + + // 在startRecording方法的最后添加 + if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) { + this.onStatusUpdate('正在校准背景噪音,请保持安静...', 'calibrating'); + } return true; @@ -322,4 +396,4 @@ class AudioProcessor { } // 导出模块 -export { AudioProcessor }; \ No newline at end of file +export { AudioProcessor };