WebRtc_QingGan/src/audio_processor.js
Song367 c96c49ff3f
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 2m18s
视频切换,切换三个场景,未实现场景人设切换
2025-08-04 17:36:30 +08:00

446 lines
16 KiB
JavaScript

// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
class AudioProcessor {
constructor(options = {}) {
this.audioContext = null;
this.stream = null; // 添加这一行
this.isRecording = false;
this.audioChunks = [];
// VAD相关属性
this.isSpeaking = false;
this.silenceThreshold = options.silenceThreshold || 0.03;
this.silenceTimeout = options.silenceTimeout || 1000;
this.minSpeechDuration = options.minSpeechDuration || 300;
this.silenceTimer = null;
this.speechStartTime = null;
this.audioBuffer = [];
this.backgroundNoiseLevel = 0;
// 添加连续性检测参数
this.consecutiveFramesRequired = 3;
this.consecutiveFramesCount = 0; // 当前连续帧计数
this.frameBuffer = []; // 帧缓冲区
this.adaptiveThreshold = options.adaptiveThreshold !== false;
this.noiseCalibrationSamples = [];
this.isCalibrated = false; // 添加校准状态标志
// API配置
this.apiConfig = {
url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
headers: {
'X-Api-App-Key': '1988591469',
'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
'X-Api-Request-Id': this.generateUUID(),
'X-Api-Sequence': '-1',
'Content-Type': 'application/json'
}
};
// 回调函数
this.onSpeechStart = options.onSpeechStart || (() => {});
this.onSpeechEnd = options.onSpeechEnd || (() => {});
this.onRecognitionResult = options.onRecognitionResult || (() => {});
this.onError = options.onError || (() => {});
this.onStatusUpdate = options.onStatusUpdate || (() => {});
}
// 添加背景噪音校准方法
// 改进背景噪音校准方法,添加更多日志
calibrateBackgroundNoise(audioData) {
const audioLevel = this.calculateAudioLevel(audioData);
this.noiseCalibrationSamples.push(audioLevel);
if (this.noiseCalibrationSamples.length >= 100) {
this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length;
const oldThreshold = this.silenceThreshold;
this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值
console.log(`背景噪音校准完成:`);
console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`);
console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`);
console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`);
this.noiseCalibrationSamples = [];
this.onStatusUpdate('背景噪音校准完成,等待语音输入...', 'ready');
}
}
// 改进音频能量计算
calculateAudioLevel(audioData) {
let sum = 0;
let peak = 0;
for (let i = 0; i < audioData.length; i++) {
const sample = Math.abs(audioData[i]);
sum += sample * sample;
peak = Math.max(peak, sample);
}
const rms = Math.sqrt(sum / audioData.length);
// 结合RMS和峰值进行更准确的检测
return rms * 0.7 + peak * 0.3;
}
// 重新校准背景噪音
recalibrateBackground() {
this.noiseCalibrationSamples = [];
this.isCalibrated = false;
this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating');
console.log('开始重新校准背景噪音');
}
// 生成UUID
generateUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
// 计算音频能量(音量)
// calculateAudioLevel(audioData) {
// let sum = 0;
// for (let i = 0; i < audioData.length; i++) {
// sum += audioData[i] * audioData[i];
// }
// return Math.sqrt(sum / audioData.length);
// }
// 修改语音活动检测方法
// 改进语音活动检测
detectVoiceActivity(audioData) {
const audioLevel = this.calculateAudioLevel(audioData);
const currentTime = Date.now();
// 连续性检测
if (audioLevel > this.silenceThreshold) {
this.consecutiveFramesCount++;
// 需要连续几帧都超过阈值才开始录音
if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) {
if (!this.isSpeaking) {
this.isSpeaking = true;
this.speechStartTime = currentTime;
this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧
this.onSpeechStart();
this.onStatusUpdate('检测到语音,开始录音...', 'speaking');
console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`);
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
return true;
} else {
// 还未达到连续帧要求,缓存音频数据
this.frameBuffer.push(new Float32Array(audioData));
if (this.frameBuffer.length > this.consecutiveFramesRequired) {
this.frameBuffer.shift(); // 保持缓冲区大小
}
return false;
}
} else {
// 重置连续帧计数
this.consecutiveFramesCount = 0;
this.frameBuffer = [];
if (this.isSpeaking && !this.silenceTimer) {
this.silenceTimer = setTimeout(() => {
this.handleSpeechEnd();
}, this.silenceTimeout);
}
return this.isSpeaking;
}
}
// 语音结束处理
async handleSpeechEnd() {
if (this.isSpeaking) {
const speechDuration = Date.now() - this.speechStartTime;
if (speechDuration >= this.minSpeechDuration) {
console.log(`语音结束,时长: ${speechDuration}ms`);
console.log(window.webrtcApp.currentVideoTag)
if (window.webrtcApp.currentVideoTag==="default"){
await this.processAudioBuffer();
}
this.onStatusUpdate('语音识别中...', 'processing');
} else {
console.log('说话时长太短,忽略');
this.onStatusUpdate('等待语音输入...', 'ready');
}
this.isSpeaking = false;
this.speechStartTime = null;
this.audioBuffer = [];
this.onSpeechEnd();
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
// 处理音频缓冲区并发送到API
async processAudioBuffer() {
if (this.audioBuffer.length === 0) {
return;
}
try {
// 合并所有音频数据
const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
const combinedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of this.audioBuffer) {
combinedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 转换为WAV格式并编码为base64
const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
const base64Audio = this.arrayBufferToBase64(wavBuffer);
// 调用ASR API
await this.callASRAPI(base64Audio);
} catch (error) {
console.error('处理音频数据失败:', error);
this.onError('处理音频数据失败: ' + error.message);
}
}
// 调用ASR API
async callASRAPI(base64AudioData) {
try {
const requestBody = {
user: {
uid: "1988591469"
},
audio: {
data: base64AudioData
},
request: {
model_name: "bigmodel"
}
};
const response = await fetch(this.apiConfig.url, {
method: 'POST',
headers: this.apiConfig.headers,
body: JSON.stringify(requestBody)
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const result = await response.json();
this.handleASRResponse(result);
} catch (error) {
console.error('ASR API调用失败:', error);
this.onError('ASR API调用失败: ' + error.message);
}
}
// 处理ASR响应
handleASRResponse(response) {
console.log('ASR响应:', response);
if (response && response.result) {
const recognizedText = response.result.text;
this.onRecognitionResult(recognizedText);
this.onStatusUpdate('识别完成', 'completed');
} else {
console.log('未识别到文字');
this.onStatusUpdate('未识别到文字', 'ready');
}
}
// 编码WAV格式
encodeWAV(samples, sampleRate) {
const length = samples.length;
const buffer = new ArrayBuffer(44 + length * 2);
const view = new DataView(buffer);
// WAV文件头
const writeString = (offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + length * 2, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, length * 2, true);
// 写入音频数据
let offset = 44;
for (let i = 0; i < length; i++) {
const sample = Math.max(-1, Math.min(1, samples[i]));
view.setInt16(offset, sample * 0x7FFF, true);
offset += 2;
}
return buffer;
}
// ArrayBuffer转Base64
arrayBufferToBase64(buffer) {
let binary = '';
const bytes = new Uint8Array(buffer);
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
// 开始录音
async startRecording(existingStream = null) {
try {
// 如果有外部提供的音频流,使用它;否则获取新的
if (existingStream) {
this.stream = existingStream;
console.log('使用外部提供的音频流');
} else {
this.stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
console.log('获取新的音频流');
}
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
const source = this.audioContext.createMediaStreamSource(this.stream);
const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = (event) => {
const inputBuffer = event.inputBuffer;
const inputData = inputBuffer.getChannelData(0);
// 语音活动检测
if (this.detectVoiceActivity(inputData)) {
// 如果检测到语音活动,缓存音频数据
this.audioBuffer.push(new Float32Array(inputData));
}
};
source.connect(processor);
processor.connect(this.audioContext.destination);
// 保存处理器引用以便后续清理
this.processor = processor;
this.source = source;
this.isRecording = true;
this.onStatusUpdate('等待语音输入...', 'ready');
// 在startRecording方法的最后添加
if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) {
this.onStatusUpdate('正在校准背景噪音,请保持安静...', 'calibrating');
}
return true;
} catch (error) {
console.error('启动录音失败:', error);
this.onError('启动录音失败: ' + error.message);
return false;
}
}
// 停止录音
stopRecording() {
console.log('开始停止录音...');
// 断开音频节点连接
if (this.source) {
this.source.disconnect();
this.source = null;
}
if (this.processor) {
this.processor.disconnect();
this.processor = null;
}
// 停止所有音频轨道
if (this.stream) {
this.stream.getTracks().forEach(track => {
track.stop();
console.log(`停止音频轨道: ${track.label}`);
});
this.stream = null;
}
if (this.audioContext) {
this.audioContext.close().then(() => {
console.log('AudioContext已关闭');
}).catch(err => {
console.error('关闭AudioContext时出错:', err);
});
this.audioContext = null;
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
// 如果正在说话,处理最后的音频
if (this.isSpeaking) {
this.handleSpeechEnd();
}
// 重置所有状态
this.isRecording = false;
this.isSpeaking = false;
this.audioBuffer = [];
this.audioChunks = [];
this.consecutiveFramesCount = 0;
this.frameBuffer = [];
// 重置校准状态,确保下次启动时重新校准
this.noiseCalibrationSamples = [];
this.isCalibrated = false;
this.onStatusUpdate('录音已完全停止', 'stopped');
console.log('录音已完全停止,所有资源已释放');
}
// 获取录音状态
getRecordingStatus() {
return {
isRecording: this.isRecording,
isSpeaking: this.isSpeaking,
hasAudioContext: !!this.audioContext
};
}
}
// 导出模块
export { AudioProcessor };