WebRtc_QingGan/src/new_app.js

let ASRTEXT = ''

class HttpASRRecognizer {
    constructor() {
        this.mediaRecorder = null;
        this.audioContext = null;
        this.isRecording = false;
        this.audioChunks = [];

        // VAD相关属性
        this.isSpeaking = false;
        this.silenceThreshold = 0.01;
        this.silenceTimeout = 1000;
        this.minSpeechDuration = 300;
        this.silenceTimer = null;
        this.speechStartTime = null;
        this.audioBuffer = [];

        // API配置
        this.apiConfig = {
            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
            headers: {
                'X-Api-App-Key': '1988591469',
                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
                'X-Api-Request-Id': this.generateUUID(),
                'X-Api-Sequence': '-1',
                'Content-Type': 'application/json'
            }
        };

        this.recordBtn = document.getElementById('startVoiceButton');
        this.statusDiv = document.getElementById('status');
        this.resultsDiv = document.getElementById('results');

        this.initEventListeners();
    }

    initEventListeners() {
        this.recordBtn.addEventListener('click', () => {
            if (this.isRecording) {
                this.stopRecording();
            } else {
                this.startRecording();
            }
        });
    }

    // 生成UUID
    generateUUID() {
        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
            const r = Math.random() * 16 | 0;
            const v = c == 'x' ? r : (r & 0x3 | 0x8);
            return v.toString(16);
        });
    }

    // 计算音频能量(音量)
    calculateAudioLevel(audioData) {
        let sum = 0;
        for (let i = 0; i < audioData.length; i++) {
            sum += audioData[i] * audioData[i];
        }
        return Math.sqrt(sum / audioData.length);
    }

    // 语音活动检测
    detectVoiceActivity(audioData) {
        const audioLevel = this.calculateAudioLevel(audioData);
        const currentTime = Date.now();

        if (audioLevel > this.silenceThreshold) {
            if (!this.isSpeaking) {
                this.isSpeaking = true;
                this.speechStartTime = currentTime;
                this.audioBuffer = [];
                this.updateStatus('检测到语音，开始录音...', 'speaking');
                console.log('开始说话');
            }

            if (this.silenceTimer) {
                clearTimeout(this.silenceTimer);
                this.silenceTimer = null;
            }

            return true;
        } else {
            if (this.isSpeaking && !this.silenceTimer) {
                this.silenceTimer = setTimeout(() => {
                    this.onSpeechEnd();
                }, this.silenceTimeout);
            }

            return this.isSpeaking;
        }
    }

    // 语音结束处理
    async onSpeechEnd() {
        if (this.isSpeaking) {
            const speechDuration = Date.now() - this.speechStartTime;

            if (speechDuration >= this.minSpeechDuration) {
                console.log(`语音结束，时长: ${speechDuration}ms`);
                await this.processAudioBuffer();
                // this.updateStatus('语音识别中...', 'processing');
                console.log('语音识别中')
            } else {
                console.log('说话时长太短，忽略');
                // this.updateStatus('等待语音输入...', 'ready');
                console.log('等待语音输入...')

            }

            this.isSpeaking = false;
            this.speechStartTime = null;
            this.audioBuffer = [];
        }

        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
    }

    // 处理音频缓冲区并发送到API
    async processAudioBuffer() {
        if (this.audioBuffer.length === 0) {
            return;
        }

        try {
            // 合并所有音频数据
            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
            const combinedBuffer = new Float32Array(totalLength);
            let offset = 0;

            for (const buffer of this.audioBuffer) {
                combinedBuffer.set(buffer, offset);
                offset += buffer.length;
            }

            // 转换为WAV格式并编码为base64
            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
            const base64Audio = this.arrayBufferToBase64(wavBuffer);

            // 调用ASR API
            await this.callASRAPI(base64Audio);

        } catch (error) {
            console.error('处理音频数据失败:', error);
            this.updateStatus('识别失败', 'error');
        }
    }

    // 调用ASR API
    async callASRAPI(base64AudioData) {
        try {
            const requestBody = {
                user: {
                    uid: "1988591469"
                },
                audio: {
                    data: base64AudioData
                },
                request: {
                    model_name: "bigmodel"
                }
            };

            const response = await fetch(this.apiConfig.url, {
                method: 'POST',
                headers: this.apiConfig.headers,
                body: JSON.stringify(requestBody)
            });

            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }

            const result = await response.json();
            this.handleASRResponse(result);

        } catch (error) {
            console.error('ASR API调用失败:', error);
            this.updateStatus('API调用失败', 'error');
        }
    }

    // 处理ASR响应
    handleASRResponse(response) {
        console.log('ASR响应:', response);

        if (response && response.data && response.data.result) {
            ASRTEXT = response.data.result;
            // this.displayResult(text);
            // this.updateStatus('识别完成', 'completed');
            console.log('识别完成')
        } else {
            console.log('未识别到文字');
            // this.updateStatus('未识别到文字', 'ready');

        }
    }

    // 显示识别结果
    displayResult(text) {
        const resultElement = document.createElement('div');
        resultElement.className = 'result-item';
        resultElement.innerHTML = `
            <span class="timestamp">${new Date().toLocaleTimeString()}</span>
            <span class="text">${text}</span>
        `;
        this.resultsDiv.appendChild(resultElement);
        this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
    }

    // 更新状态显示
    updateStatus(message, status) {
        this.statusDiv.textContent = message;
        this.statusDiv.className = `status ${status}`;
    }

    // 编码WAV格式
    encodeWAV(samples, sampleRate) {
        const length = samples.length;
        const buffer = new ArrayBuffer(44 + length * 2);
        const view = new DataView(buffer);

        // WAV文件头
        const writeString = (offset, string) => {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        };

        writeString(0, 'RIFF');
        view.setUint32(4, 36 + length * 2, true);
        writeString(8, 'WAVE');
        writeString(12, 'fmt ');
        view.setUint32(16, 16, true);
        view.setUint16(20, 1, true);
        view.setUint16(22, 1, true);
        view.setUint32(24, sampleRate, true);
        view.setUint32(28, sampleRate * 2, true);
        view.setUint16(32, 2, true);
        view.setUint16(34, 16, true);
        writeString(36, 'data');
        view.setUint32(40, length * 2, true);

        // 写入音频数据
        let offset = 44;
        for (let i = 0; i < length; i++) {
            const sample = Math.max(-1, Math.min(1, samples[i]));
            view.setInt16(offset, sample * 0x7FFF, true);
            offset += 2;
        }

        return buffer;
    }

    // ArrayBuffer转Base64
    arrayBufferToBase64(buffer) {
        let binary = '';
        const bytes = new Uint8Array(buffer);
        for (let i = 0; i < bytes.byteLength; i++) {
            binary += String.fromCharCode(bytes[i]);
        }
        return btoa(binary);
    }

    async startRecording() {
        try {
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: {
                    sampleRate: 16000,
                    channelCount: 1,
                    echoCancellation: true,
                    noiseSuppression: true
                }
            });

            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            });

            const source = this.audioContext.createMediaStreamSource(stream);
            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);

            processor.onaudioprocess = (event) => {
                const inputBuffer = event.inputBuffer;
                const inputData = inputBuffer.getChannelData(0);

                // 语音活动检测
                if (this.detectVoiceActivity(inputData)) {
                    // 如果检测到语音活动，缓存音频数据
                    this.audioBuffer.push(new Float32Array(inputData));
                }
            };

            source.connect(processor);
            processor.connect(this.audioContext.destination);

            this.isRecording = true;
            this.recordBtn.textContent = '停止录音';
            this.recordBtn.className = 'btn recording';
            // this.updateStatus('等待语音输入...', 'ready');

        } catch (error) {
            console.error('启动录音失败:', error);
            // this.updateStatus('录音启动失败', 'error');
        }
    }

    stopRecording() {
        if (this.audioContext) {
            this.audioContext.close();
            this.audioContext = null;
        }

        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }

        // 如果正在说话，处理最后的音频
        if (this.isSpeaking) {
            this.onSpeechEnd();
        }

        this.isRecording = false;
        this.isSpeaking = false;
        this.audioBuffer = [];

        this.recordBtn.textContent = '开始录音';
        this.recordBtn.className = 'btn';
        console.log('录音已停止');
        // this.updateStatus('录音已停止', 'stopped');
    }
}

// 初始化应用
document.addEventListener('DOMContentLoaded', () => {
    const asrRecognizer = new HttpASRRecognizer();
    console.log('HTTP ASR识别器已初始化');
});