 c96c49ff3f
			
		
	
	
		c96c49ff3f
		
	
	
	
		
			
	
		
	
	
		
			All checks were successful
		
		
	
	Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 2m18s
				
			
		
			
				
	
	
		
			446 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			446 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| // 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
 | |
| 
 | |
| class AudioProcessor {
 | |
|     constructor(options = {}) {
 | |
|         this.audioContext = null;
 | |
|         this.stream = null; // 添加这一行
 | |
|         this.isRecording = false;
 | |
|         this.audioChunks = [];
 | |
|         
 | |
|         // VAD相关属性
 | |
|         this.isSpeaking = false;
 | |
|         this.silenceThreshold = options.silenceThreshold || 0.03;
 | |
|         this.silenceTimeout = options.silenceTimeout || 1000;
 | |
|         this.minSpeechDuration = options.minSpeechDuration || 300;
 | |
|         this.silenceTimer = null;
 | |
|         this.speechStartTime = null;
 | |
|         this.audioBuffer = [];
 | |
|         this.backgroundNoiseLevel = 0;
 | |
|         // 添加连续性检测参数
 | |
|         this.consecutiveFramesRequired = 3;
 | |
|         this.consecutiveFramesCount = 0;     // 当前连续帧计数
 | |
|         this.frameBuffer = [];               // 帧缓冲区
 | |
|         this.adaptiveThreshold = options.adaptiveThreshold !== false;
 | |
|         this.noiseCalibrationSamples = [];
 | |
|         this.isCalibrated = false; // 添加校准状态标志
 | |
|         
 | |
|         // API配置
 | |
|         this.apiConfig = {
 | |
|             url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
 | |
|             headers: {
 | |
|                 'X-Api-App-Key': '1988591469',
 | |
|                 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
 | |
|                 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
 | |
|                 'X-Api-Request-Id': this.generateUUID(),
 | |
|                 'X-Api-Sequence': '-1',
 | |
|                 'Content-Type': 'application/json'
 | |
|             }
 | |
|         };
 | |
|         
 | |
|         // 回调函数
 | |
|         this.onSpeechStart = options.onSpeechStart || (() => {});
 | |
|         this.onSpeechEnd = options.onSpeechEnd || (() => {});
 | |
|         this.onRecognitionResult = options.onRecognitionResult || (() => {});
 | |
|         this.onError = options.onError || (() => {});
 | |
|         this.onStatusUpdate = options.onStatusUpdate || (() => {});
 | |
|     }
 | |
| 
 | |
|     // 添加背景噪音校准方法
 | |
|     // 改进背景噪音校准方法,添加更多日志
 | |
|     calibrateBackgroundNoise(audioData) {
 | |
|         const audioLevel = this.calculateAudioLevel(audioData);
 | |
|         this.noiseCalibrationSamples.push(audioLevel);
 | |
|         
 | |
|         if (this.noiseCalibrationSamples.length >= 100) {
 | |
|             this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length;
 | |
|             const oldThreshold = this.silenceThreshold;
 | |
|             this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值
 | |
|             
 | |
|             console.log(`背景噪音校准完成:`);
 | |
|             console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`);
 | |
|             console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`);
 | |
|             console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`);
 | |
|             
 | |
|             this.noiseCalibrationSamples = [];
 | |
|             this.onStatusUpdate('背景噪音校准完成,等待语音输入...', 'ready');
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // 改进音频能量计算
 | |
|     calculateAudioLevel(audioData) {
 | |
|         let sum = 0;
 | |
|         let peak = 0;
 | |
|         for (let i = 0; i < audioData.length; i++) {
 | |
|             const sample = Math.abs(audioData[i]);
 | |
|             sum += sample * sample;
 | |
|             peak = Math.max(peak, sample);
 | |
|         }
 | |
|         const rms = Math.sqrt(sum / audioData.length);
 | |
|         // 结合RMS和峰值进行更准确的检测
 | |
|         return rms * 0.7 + peak * 0.3;
 | |
|     }
 | |
| 
 | |
|     // 重新校准背景噪音
 | |
|     recalibrateBackground() {
 | |
|         this.noiseCalibrationSamples = [];
 | |
|         this.isCalibrated = false;
 | |
|         this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating');
 | |
|         console.log('开始重新校准背景噪音');
 | |
|     }
 | |
|     
 | |
|     // 生成UUID
 | |
|     generateUUID() {
 | |
|         return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
 | |
|             const r = Math.random() * 16 | 0;
 | |
|             const v = c == 'x' ? r : (r & 0x3 | 0x8);
 | |
|             return v.toString(16);
 | |
|         });
 | |
|     }
 | |
|     
 | |
|     // 计算音频能量(音量)
 | |
|     // calculateAudioLevel(audioData) {
 | |
|     //     let sum = 0;
 | |
|     //     for (let i = 0; i < audioData.length; i++) {
 | |
|     //         sum += audioData[i] * audioData[i];
 | |
|     //     }
 | |
|     //     return Math.sqrt(sum / audioData.length);
 | |
|     // }
 | |
|     
 | |
|     // 修改语音活动检测方法
 | |
|     // 改进语音活动检测
 | |
|     detectVoiceActivity(audioData) {
 | |
|         const audioLevel = this.calculateAudioLevel(audioData);
 | |
|         const currentTime = Date.now();
 | |
|         
 | |
|         // 连续性检测
 | |
|         if (audioLevel > this.silenceThreshold) {
 | |
|             this.consecutiveFramesCount++;
 | |
|             
 | |
|             // 需要连续几帧都超过阈值才开始录音
 | |
|             if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) {
 | |
|                 if (!this.isSpeaking) {
 | |
|                     this.isSpeaking = true;
 | |
|                     this.speechStartTime = currentTime;
 | |
|                     this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧
 | |
|                     this.onSpeechStart();
 | |
|                     this.onStatusUpdate('检测到语音,开始录音...', 'speaking');
 | |
|                     console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`);
 | |
|                 }
 | |
|                 
 | |
|                 if (this.silenceTimer) {
 | |
|                     clearTimeout(this.silenceTimer);
 | |
|                     this.silenceTimer = null;
 | |
|                 }
 | |
|                 
 | |
|                 return true;
 | |
|             } else {
 | |
|                 // 还未达到连续帧要求,缓存音频数据
 | |
|                 this.frameBuffer.push(new Float32Array(audioData));
 | |
|                 if (this.frameBuffer.length > this.consecutiveFramesRequired) {
 | |
|                     this.frameBuffer.shift(); // 保持缓冲区大小
 | |
|                 }
 | |
|                 return false;
 | |
|             }
 | |
|         } else {
 | |
|             // 重置连续帧计数
 | |
|             this.consecutiveFramesCount = 0;
 | |
|             this.frameBuffer = [];
 | |
|             
 | |
|             if (this.isSpeaking && !this.silenceTimer) {
 | |
|                 this.silenceTimer = setTimeout(() => {
 | |
|                     this.handleSpeechEnd();
 | |
|                 }, this.silenceTimeout);
 | |
|             }
 | |
|             
 | |
|             return this.isSpeaking;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 语音结束处理
 | |
|     async handleSpeechEnd() {
 | |
|         if (this.isSpeaking) {
 | |
|             const speechDuration = Date.now() - this.speechStartTime;
 | |
|             
 | |
|             if (speechDuration >= this.minSpeechDuration) {
 | |
|                 console.log(`语音结束,时长: ${speechDuration}ms`);
 | |
|                 console.log(window.webrtcApp.currentVideoTag)
 | |
|                 if (window.webrtcApp.currentVideoTag==="default"){
 | |
|                     await this.processAudioBuffer();
 | |
|                 }
 | |
|                 this.onStatusUpdate('语音识别中...', 'processing');
 | |
|             } else {
 | |
|                 console.log('说话时长太短,忽略');
 | |
|                 this.onStatusUpdate('等待语音输入...', 'ready');
 | |
|             }
 | |
|             
 | |
|             this.isSpeaking = false;
 | |
|             this.speechStartTime = null;
 | |
|             this.audioBuffer = [];
 | |
|             this.onSpeechEnd();
 | |
|         }
 | |
|         
 | |
|         if (this.silenceTimer) {
 | |
|             clearTimeout(this.silenceTimer);
 | |
|             this.silenceTimer = null;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 处理音频缓冲区并发送到API
 | |
|     async processAudioBuffer() {
 | |
|         if (this.audioBuffer.length === 0) {
 | |
|             return;
 | |
|         }
 | |
|         
 | |
|         try {
 | |
|             // 合并所有音频数据
 | |
|             const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
 | |
|             const combinedBuffer = new Float32Array(totalLength);
 | |
|             let offset = 0;
 | |
|             
 | |
|             for (const buffer of this.audioBuffer) {
 | |
|                 combinedBuffer.set(buffer, offset);
 | |
|                 offset += buffer.length;
 | |
|             }
 | |
|             
 | |
|             // 转换为WAV格式并编码为base64
 | |
|             const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
 | |
|             const base64Audio = this.arrayBufferToBase64(wavBuffer);
 | |
|             
 | |
|             // 调用ASR API
 | |
|             await this.callASRAPI(base64Audio);
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('处理音频数据失败:', error);
 | |
|             this.onError('处理音频数据失败: ' + error.message);
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 调用ASR API
 | |
|     async callASRAPI(base64AudioData) {
 | |
|         try {
 | |
|             const requestBody = {
 | |
|                 user: {
 | |
|                     uid: "1988591469"
 | |
|                 },
 | |
|                 audio: {
 | |
|                     data: base64AudioData
 | |
|                 },
 | |
|                 request: {
 | |
|                     model_name: "bigmodel"
 | |
|                 }
 | |
|             };
 | |
|             
 | |
|             const response = await fetch(this.apiConfig.url, {
 | |
|                 method: 'POST',
 | |
|                 headers: this.apiConfig.headers,
 | |
|                 body: JSON.stringify(requestBody)
 | |
|             });
 | |
|             
 | |
|             if (!response.ok) {
 | |
|                 throw new Error(`HTTP error! status: ${response.status}`);
 | |
|             }
 | |
|             
 | |
|             const result = await response.json();
 | |
|             this.handleASRResponse(result);
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('ASR API调用失败:', error);
 | |
|             this.onError('ASR API调用失败: ' + error.message);
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 处理ASR响应
 | |
|     handleASRResponse(response) {
 | |
|         console.log('ASR响应:', response);
 | |
|         
 | |
|         if (response && response.result) {
 | |
|             const recognizedText = response.result.text;
 | |
|             this.onRecognitionResult(recognizedText);
 | |
|             this.onStatusUpdate('识别完成', 'completed');
 | |
|         } else {
 | |
|             console.log('未识别到文字');
 | |
|             this.onStatusUpdate('未识别到文字', 'ready');
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 编码WAV格式
 | |
|     encodeWAV(samples, sampleRate) {
 | |
|         const length = samples.length;
 | |
|         const buffer = new ArrayBuffer(44 + length * 2);
 | |
|         const view = new DataView(buffer);
 | |
|         
 | |
|         // WAV文件头
 | |
|         const writeString = (offset, string) => {
 | |
|             for (let i = 0; i < string.length; i++) {
 | |
|                 view.setUint8(offset + i, string.charCodeAt(i));
 | |
|             }
 | |
|         };
 | |
|         
 | |
|         writeString(0, 'RIFF');
 | |
|         view.setUint32(4, 36 + length * 2, true);
 | |
|         writeString(8, 'WAVE');
 | |
|         writeString(12, 'fmt ');
 | |
|         view.setUint32(16, 16, true);
 | |
|         view.setUint16(20, 1, true);
 | |
|         view.setUint16(22, 1, true);
 | |
|         view.setUint32(24, sampleRate, true);
 | |
|         view.setUint32(28, sampleRate * 2, true);
 | |
|         view.setUint16(32, 2, true);
 | |
|         view.setUint16(34, 16, true);
 | |
|         writeString(36, 'data');
 | |
|         view.setUint32(40, length * 2, true);
 | |
|         
 | |
|         // 写入音频数据
 | |
|         let offset = 44;
 | |
|         for (let i = 0; i < length; i++) {
 | |
|             const sample = Math.max(-1, Math.min(1, samples[i]));
 | |
|             view.setInt16(offset, sample * 0x7FFF, true);
 | |
|             offset += 2;
 | |
|         }
 | |
|         
 | |
|         return buffer;
 | |
|     }
 | |
|     
 | |
|     // ArrayBuffer转Base64
 | |
|     arrayBufferToBase64(buffer) {
 | |
|         let binary = '';
 | |
|         const bytes = new Uint8Array(buffer);
 | |
|         for (let i = 0; i < bytes.byteLength; i++) {
 | |
|             binary += String.fromCharCode(bytes[i]);
 | |
|         }
 | |
|         return btoa(binary);
 | |
|     }
 | |
|     
 | |
|     // 开始录音
 | |
|     async startRecording(existingStream = null) {
 | |
|         try {
 | |
|             // 如果有外部提供的音频流,使用它;否则获取新的
 | |
|             if (existingStream) {
 | |
|                 this.stream = existingStream;
 | |
|                 console.log('使用外部提供的音频流');
 | |
|             } else {
 | |
|                 this.stream = await navigator.mediaDevices.getUserMedia({
 | |
|                     audio: {
 | |
|                         sampleRate: 16000,
 | |
|                         channelCount: 1,
 | |
|                         echoCancellation: true,
 | |
|                         noiseSuppression: true
 | |
|                     }
 | |
|                 });
 | |
|                 console.log('获取新的音频流');
 | |
|             }
 | |
|             
 | |
|             this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
 | |
|                 sampleRate: 16000
 | |
|             });
 | |
|             
 | |
|             const source = this.audioContext.createMediaStreamSource(this.stream);
 | |
|             const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
 | |
|             
 | |
|             processor.onaudioprocess = (event) => {
 | |
|                 const inputBuffer = event.inputBuffer;
 | |
|                 const inputData = inputBuffer.getChannelData(0);
 | |
|                 
 | |
|                 // 语音活动检测
 | |
|                 if (this.detectVoiceActivity(inputData)) {
 | |
|                     // 如果检测到语音活动,缓存音频数据
 | |
|                     this.audioBuffer.push(new Float32Array(inputData));
 | |
|                 }
 | |
|             };
 | |
|             
 | |
|             source.connect(processor);
 | |
|             processor.connect(this.audioContext.destination);
 | |
|             
 | |
|             // 保存处理器引用以便后续清理
 | |
|             this.processor = processor;
 | |
|             this.source = source;
 | |
|             
 | |
|             this.isRecording = true;
 | |
|             this.onStatusUpdate('等待语音输入...', 'ready');
 | |
|         
 | |
|             // 在startRecording方法的最后添加
 | |
|             if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) {
 | |
|                 this.onStatusUpdate('正在校准背景噪音,请保持安静...', 'calibrating');
 | |
|             }
 | |
|             
 | |
|             return true;
 | |
|             
 | |
|         } catch (error) {
 | |
|             console.error('启动录音失败:', error);
 | |
|             this.onError('启动录音失败: ' + error.message);
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     // 停止录音
 | |
|     stopRecording() {
 | |
|         console.log('开始停止录音...');
 | |
|         
 | |
|         // 断开音频节点连接
 | |
|         if (this.source) {
 | |
|             this.source.disconnect();
 | |
|             this.source = null;
 | |
|         }
 | |
|         
 | |
|         if (this.processor) {
 | |
|             this.processor.disconnect();
 | |
|             this.processor = null;
 | |
|         }
 | |
|         
 | |
|         // 停止所有音频轨道
 | |
|         if (this.stream) {
 | |
|             this.stream.getTracks().forEach(track => {
 | |
|                 track.stop();
 | |
|                 console.log(`停止音频轨道: ${track.label}`);
 | |
|             });
 | |
|             this.stream = null;
 | |
|         }
 | |
|         
 | |
|         if (this.audioContext) {
 | |
|             this.audioContext.close().then(() => {
 | |
|                 console.log('AudioContext已关闭');
 | |
|             }).catch(err => {
 | |
|                 console.error('关闭AudioContext时出错:', err);
 | |
|             });
 | |
|             this.audioContext = null;
 | |
|         }
 | |
|         
 | |
|         if (this.silenceTimer) {
 | |
|             clearTimeout(this.silenceTimer);
 | |
|             this.silenceTimer = null;
 | |
|         }
 | |
|         
 | |
|         // 如果正在说话,处理最后的音频
 | |
|         if (this.isSpeaking) {
 | |
|             this.handleSpeechEnd();
 | |
|         }
 | |
|         
 | |
|         // 重置所有状态
 | |
|         this.isRecording = false;
 | |
|         this.isSpeaking = false;
 | |
|         this.audioBuffer = [];
 | |
|         this.audioChunks = [];
 | |
|         this.consecutiveFramesCount = 0;
 | |
|         this.frameBuffer = [];
 | |
|         
 | |
|         // 重置校准状态,确保下次启动时重新校准
 | |
|         this.noiseCalibrationSamples = [];
 | |
|         this.isCalibrated = false;
 | |
|         
 | |
|         this.onStatusUpdate('录音已完全停止', 'stopped');
 | |
|         console.log('录音已完全停止,所有资源已释放');
 | |
|     }
 | |
|     
 | |
|     // 获取录音状态
 | |
|     getRecordingStatus() {
 | |
|         return {
 | |
|             isRecording: this.isRecording,
 | |
|             isSpeaking: this.isSpeaking,
 | |
|             hasAudioContext: !!this.audioContext
 | |
|         };
 | |
|     }
 | |
| }
 | |
| 
 | |
| // 导出模块
 | |
| export { AudioProcessor };
 |