WebRtc_QingGan/src/audio_processor.js

// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能

class AudioProcessor {
    constructor(options = {}) {
        this.audioContext = null;
        this.stream = null; // 添加这一行
        this.isRecording = false;
        this.audioChunks = [];

        // VAD相关属性
        this.isSpeaking = false;
        this.silenceThreshold = options.silenceThreshold || 0.03;
        this.silenceTimeout = options.silenceTimeout || 1000;
        this.minSpeechDuration = options.minSpeechDuration || 300;
        this.silenceTimer = null;
        this.speechStartTime = null;
        this.audioBuffer = [];
        this.backgroundNoiseLevel = 0;
        // 添加连续性检测参数
        this.consecutiveFramesRequired = 3;
        this.consecutiveFramesCount = 0;     // 当前连续帧计数
        this.frameBuffer = [];               // 帧缓冲区
        this.adaptiveThreshold = options.adaptiveThreshold !== false;
        this.noiseCalibrationSamples = [];
        this.isCalibrated = false; // 添加校准状态标志

        // API配置
        this.apiConfig = {
            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
            headers: {
                'X-Api-App-Key': '1988591469',
                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
                'X-Api-Request-Id': this.generateUUID(),
                'X-Api-Sequence': '-1',
                'Content-Type': 'application/json'
            }
        };

        // 回调函数
        this.onSpeechStart = options.onSpeechStart || (() => {});
        this.onSpeechEnd = options.onSpeechEnd || (() => {});
        this.onRecognitionResult = options.onRecognitionResult || (() => {});
        this.onError = options.onError || (() => {});
        this.onStatusUpdate = options.onStatusUpdate || (() => {});
    }

    // 添加背景噪音校准方法
    // 改进背景噪音校准方法，添加更多日志
    calibrateBackgroundNoise(audioData) {
        const audioLevel = this.calculateAudioLevel(audioData);
        this.noiseCalibrationSamples.push(audioLevel);

        if (this.noiseCalibrationSamples.length >= 100) {
            this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length;
            const oldThreshold = this.silenceThreshold;
            this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值

            console.log(`背景噪音校准完成:`);
            console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`);
            console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`);
            console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`);

            this.noiseCalibrationSamples = [];
            this.onStatusUpdate('背景噪音校准完成，等待语音输入...', 'ready');
        }
    }

    // 改进音频能量计算
    calculateAudioLevel(audioData) {
        let sum = 0;
        let peak = 0;
        for (let i = 0; i < audioData.length; i++) {
            const sample = Math.abs(audioData[i]);
            sum += sample * sample;
            peak = Math.max(peak, sample);
        }
        const rms = Math.sqrt(sum / audioData.length);
        // 结合RMS和峰值进行更准确的检测
        return rms * 0.7 + peak * 0.3;
    }

    // 重新校准背景噪音
    recalibrateBackground() {
        this.noiseCalibrationSamples = [];
        this.isCalibrated = false;
        this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating');
        console.log('开始重新校准背景噪音');
    }

    // 生成UUID
    generateUUID() {
        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
            const r = Math.random() * 16 | 0;
            const v = c == 'x' ? r : (r & 0x3 | 0x8);
            return v.toString(16);
        });
    }

    // 计算音频能量(音量)
    // calculateAudioLevel(audioData) {
    //     let sum = 0;
    //     for (let i = 0; i < audioData.length; i++) {
    //         sum += audioData[i] * audioData[i];
    //     }
    //     return Math.sqrt(sum / audioData.length);
    // }

    // 修改语音活动检测方法
    // 改进语音活动检测
    detectVoiceActivity(audioData) {
        const audioLevel = this.calculateAudioLevel(audioData);
        const currentTime = Date.now();

        // 连续性检测
        if (audioLevel > this.silenceThreshold) {
            this.consecutiveFramesCount++;

            // 需要连续几帧都超过阈值才开始录音
            if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) {
                if (!this.isSpeaking) {
                    this.isSpeaking = true;
                    this.speechStartTime = currentTime;
                    this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧
                    this.onSpeechStart();
                    this.onStatusUpdate('检测到语音，开始录音...', 'speaking');
                    console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`);
                }

                if (this.silenceTimer) {
                    clearTimeout(this.silenceTimer);
                    this.silenceTimer = null;
                }

                return true;
            } else {
                // 还未达到连续帧要求，缓存音频数据
                this.frameBuffer.push(new Float32Array(audioData));
                if (this.frameBuffer.length > this.consecutiveFramesRequired) {
                    this.frameBuffer.shift(); // 保持缓冲区大小
                }
                return false;
            }
        } else {
            // 重置连续帧计数
            this.consecutiveFramesCount = 0;
            this.frameBuffer = [];

            if (this.isSpeaking && !this.silenceTimer) {
                this.silenceTimer = setTimeout(() => {
                    this.handleSpeechEnd();
                }, this.silenceTimeout);
            }

            return this.isSpeaking;
        }
    }

    // 语音结束处理
    async handleSpeechEnd() {
        if (this.isSpeaking) {
            const speechDuration = Date.now() - this.speechStartTime;

            if (speechDuration >= this.minSpeechDuration) {
                console.log(`语音结束，时长: ${speechDuration}ms`);
                console.log(window.webrtcApp.currentVideoTag)
                if (window.webrtcApp.currentVideoTag==="default"){
                    await this.processAudioBuffer();
                }
                this.onStatusUpdate('语音识别中...', 'processing');
            } else {
                console.log('说话时长太短，忽略');
                this.onStatusUpdate('等待语音输入...', 'ready');
            }

            this.isSpeaking = false;
            this.speechStartTime = null;
            this.audioBuffer = [];
            this.onSpeechEnd();
        }

        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
    }

    // 处理音频缓冲区并发送到API
    async processAudioBuffer() {
        if (this.audioBuffer.length === 0) {
            return;
        }

        try {
            // 合并所有音频数据
            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
            const combinedBuffer = new Float32Array(totalLength);
            let offset = 0;

            for (const buffer of this.audioBuffer) {
                combinedBuffer.set(buffer, offset);
                offset += buffer.length;
            }

            // 转换为WAV格式并编码为base64
            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
            const base64Audio = this.arrayBufferToBase64(wavBuffer);

            // 调用ASR API
            await this.callASRAPI(base64Audio);

        } catch (error) {
            console.error('处理音频数据失败:', error);
            this.onError('处理音频数据失败: ' + error.message);
        }
    }

    // 调用ASR API
    async callASRAPI(base64AudioData) {
        try {
            const requestBody = {
                user: {
                    uid: "1988591469"
                },
                audio: {
                    data: base64AudioData
                },
                request: {
                    model_name: "bigmodel"
                }
            };

            const response = await fetch(this.apiConfig.url, {
                method: 'POST',
                headers: this.apiConfig.headers,
                body: JSON.stringify(requestBody)
            });

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }

            const result = await response.json();
            this.handleASRResponse(result);

        } catch (error) {
            console.error('ASR API调用失败:', error);
            this.onError('ASR API调用失败: ' + error.message);
        }
    }

    // 处理ASR响应
    handleASRResponse(response) {
        console.log('ASR响应:', response);

        if (response && response.result) {
            const recognizedText = response.result.text;
            this.onRecognitionResult(recognizedText);
            this.onStatusUpdate('识别完成', 'completed');
        } else {
            console.log('未识别到文字');
            this.onStatusUpdate('未识别到文字', 'ready');
        }
    }

    // 编码WAV格式
    encodeWAV(samples, sampleRate) {
        const length = samples.length;
        const buffer = new ArrayBuffer(44 + length * 2);
        const view = new DataView(buffer);

        // WAV文件头
        const writeString = (offset, string) => {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        };

        writeString(0, 'RIFF');
        view.setUint32(4, 36 + length * 2, true);
        writeString(8, 'WAVE');
        writeString(12, 'fmt ');
        view.setUint32(16, 16, true);
        view.setUint16(20, 1, true);
        view.setUint16(22, 1, true);
        view.setUint32(24, sampleRate, true);
        view.setUint32(28, sampleRate * 2, true);
        view.setUint16(32, 2, true);
        view.setUint16(34, 16, true);
        writeString(36, 'data');
        view.setUint32(40, length * 2, true);

        // 写入音频数据
        let offset = 44;
        for (let i = 0; i < length; i++) {
            const sample = Math.max(-1, Math.min(1, samples[i]));
            view.setInt16(offset, sample * 0x7FFF, true);
            offset += 2;
        }

        return buffer;
    }

    // ArrayBuffer转Base64
    arrayBufferToBase64(buffer) {
        let binary = '';
        const bytes = new Uint8Array(buffer);
        for (let i = 0; i < bytes.byteLength; i++) {
            binary += String.fromCharCode(bytes[i]);
        }
        return btoa(binary);
    }

    // 开始录音
    async startRecording(existingStream = null) {
        try {
            // 如果有外部提供的音频流，使用它；否则获取新的
            if (existingStream) {
                this.stream = existingStream;
                console.log('使用外部提供的音频流');
            } else {
                this.stream = await navigator.mediaDevices.getUserMedia({
                    audio: {
                        sampleRate: 16000,
                        channelCount: 1,
                        echoCancellation: true,
                        noiseSuppression: true
                    }
                });
                console.log('获取新的音频流');
            }

            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            });

            const source = this.audioContext.createMediaStreamSource(this.stream);
            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);

            processor.onaudioprocess = (event) => {
                const inputBuffer = event.inputBuffer;
                const inputData = inputBuffer.getChannelData(0);

                // 语音活动检测
                if (this.detectVoiceActivity(inputData)) {
                    // 如果检测到语音活动，缓存音频数据
                    this.audioBuffer.push(new Float32Array(inputData));
                }
            };

            source.connect(processor);
            processor.connect(this.audioContext.destination);

            // 保存处理器引用以便后续清理
            this.processor = processor;
            this.source = source;

            this.isRecording = true;
            this.onStatusUpdate('等待语音输入...', 'ready');

            // 在startRecording方法的最后添加
            if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) {
                this.onStatusUpdate('正在校准背景噪音，请保持安静...', 'calibrating');
            }

            return true;

        } catch (error) {
            console.error('启动录音失败:', error);
            this.onError('启动录音失败: ' + error.message);
            return false;
        }
    }

    // 停止录音
    stopRecording() {
        console.log('开始停止录音...');

        // 断开音频节点连接
        if (this.source) {
            this.source.disconnect();
            this.source = null;
        }

        if (this.processor) {
            this.processor.disconnect();
            this.processor = null;
        }

        // 停止所有音频轨道
        if (this.stream) {
            this.stream.getTracks().forEach(track => {
                track.stop();
                console.log(`停止音频轨道: ${track.label}`);
            });
            this.stream = null;
        }

        if (this.audioContext) {
            this.audioContext.close().then(() => {
                console.log('AudioContext已关闭');
            }).catch(err => {
                console.error('关闭AudioContext时出错:', err);
            });
            this.audioContext = null;
        }

        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }

        // 如果正在说话，处理最后的音频
        if (this.isSpeaking) {
            this.handleSpeechEnd();
        }

        // 重置所有状态
        this.isRecording = false;
        this.isSpeaking = false;
        this.audioBuffer = [];
        this.audioChunks = [];
        this.consecutiveFramesCount = 0;
        this.frameBuffer = [];

        // 重置校准状态，确保下次启动时重新校准
        this.noiseCalibrationSamples = [];
        this.isCalibrated = false;

        this.onStatusUpdate('录音已完全停止', 'stopped');
        console.log('录音已完全停止，所有资源已释放');
    }

    // 获取录音状态
    getRecordingStatus() {
        return {
            isRecording: this.isRecording,
            isSpeaking: this.isSpeaking,
            hasAudioContext: !!this.audioContext
        };
    }
}

// 导出模块
export { AudioProcessor };