All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 2m18s
446 lines
16 KiB
JavaScript
446 lines
16 KiB
JavaScript
// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
|
|
|
|
class AudioProcessor {
|
|
constructor(options = {}) {
|
|
this.audioContext = null;
|
|
this.stream = null; // 添加这一行
|
|
this.isRecording = false;
|
|
this.audioChunks = [];
|
|
|
|
// VAD相关属性
|
|
this.isSpeaking = false;
|
|
this.silenceThreshold = options.silenceThreshold || 0.03;
|
|
this.silenceTimeout = options.silenceTimeout || 1000;
|
|
this.minSpeechDuration = options.minSpeechDuration || 300;
|
|
this.silenceTimer = null;
|
|
this.speechStartTime = null;
|
|
this.audioBuffer = [];
|
|
this.backgroundNoiseLevel = 0;
|
|
// 添加连续性检测参数
|
|
this.consecutiveFramesRequired = 3;
|
|
this.consecutiveFramesCount = 0; // 当前连续帧计数
|
|
this.frameBuffer = []; // 帧缓冲区
|
|
this.adaptiveThreshold = options.adaptiveThreshold !== false;
|
|
this.noiseCalibrationSamples = [];
|
|
this.isCalibrated = false; // 添加校准状态标志
|
|
|
|
// API配置
|
|
this.apiConfig = {
|
|
url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
|
|
headers: {
|
|
'X-Api-App-Key': '1988591469',
|
|
'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
|
|
'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
|
|
'X-Api-Request-Id': this.generateUUID(),
|
|
'X-Api-Sequence': '-1',
|
|
'Content-Type': 'application/json'
|
|
}
|
|
};
|
|
|
|
// 回调函数
|
|
this.onSpeechStart = options.onSpeechStart || (() => {});
|
|
this.onSpeechEnd = options.onSpeechEnd || (() => {});
|
|
this.onRecognitionResult = options.onRecognitionResult || (() => {});
|
|
this.onError = options.onError || (() => {});
|
|
this.onStatusUpdate = options.onStatusUpdate || (() => {});
|
|
}
|
|
|
|
// 添加背景噪音校准方法
|
|
// 改进背景噪音校准方法,添加更多日志
|
|
calibrateBackgroundNoise(audioData) {
|
|
const audioLevel = this.calculateAudioLevel(audioData);
|
|
this.noiseCalibrationSamples.push(audioLevel);
|
|
|
|
if (this.noiseCalibrationSamples.length >= 100) {
|
|
this.backgroundNoiseLevel = this.noiseCalibrationSamples.reduce((a, b) => a + b) / this.noiseCalibrationSamples.length;
|
|
const oldThreshold = this.silenceThreshold;
|
|
this.silenceThreshold = Math.max(this.backgroundNoiseLevel * 2.5, 0.005); // 设置最小阈值
|
|
|
|
console.log(`背景噪音校准完成:`);
|
|
console.log(`- 平均背景噪音: ${this.backgroundNoiseLevel.toFixed(4)}`);
|
|
console.log(`- 旧阈值: ${oldThreshold.toFixed(4)}`);
|
|
console.log(`- 新阈值: ${this.silenceThreshold.toFixed(4)}`);
|
|
|
|
this.noiseCalibrationSamples = [];
|
|
this.onStatusUpdate('背景噪音校准完成,等待语音输入...', 'ready');
|
|
}
|
|
}
|
|
|
|
// 改进音频能量计算
|
|
calculateAudioLevel(audioData) {
|
|
let sum = 0;
|
|
let peak = 0;
|
|
for (let i = 0; i < audioData.length; i++) {
|
|
const sample = Math.abs(audioData[i]);
|
|
sum += sample * sample;
|
|
peak = Math.max(peak, sample);
|
|
}
|
|
const rms = Math.sqrt(sum / audioData.length);
|
|
// 结合RMS和峰值进行更准确的检测
|
|
return rms * 0.7 + peak * 0.3;
|
|
}
|
|
|
|
// 重新校准背景噪音
|
|
recalibrateBackground() {
|
|
this.noiseCalibrationSamples = [];
|
|
this.isCalibrated = false;
|
|
this.onStatusUpdate('开始重新校准背景噪音...', 'calibrating');
|
|
console.log('开始重新校准背景噪音');
|
|
}
|
|
|
|
// 生成UUID
|
|
generateUUID() {
|
|
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
|
|
const r = Math.random() * 16 | 0;
|
|
const v = c == 'x' ? r : (r & 0x3 | 0x8);
|
|
return v.toString(16);
|
|
});
|
|
}
|
|
|
|
// 计算音频能量(音量)
|
|
// calculateAudioLevel(audioData) {
|
|
// let sum = 0;
|
|
// for (let i = 0; i < audioData.length; i++) {
|
|
// sum += audioData[i] * audioData[i];
|
|
// }
|
|
// return Math.sqrt(sum / audioData.length);
|
|
// }
|
|
|
|
// 修改语音活动检测方法
|
|
// 改进语音活动检测
|
|
detectVoiceActivity(audioData) {
|
|
const audioLevel = this.calculateAudioLevel(audioData);
|
|
const currentTime = Date.now();
|
|
|
|
// 连续性检测
|
|
if (audioLevel > this.silenceThreshold) {
|
|
this.consecutiveFramesCount++;
|
|
|
|
// 需要连续几帧都超过阈值才开始录音
|
|
if (this.consecutiveFramesCount >= this.consecutiveFramesRequired) {
|
|
if (!this.isSpeaking) {
|
|
this.isSpeaking = true;
|
|
this.speechStartTime = currentTime;
|
|
this.audioBuffer = [...this.frameBuffer]; // 包含之前的帧
|
|
this.onSpeechStart();
|
|
this.onStatusUpdate('检测到语音,开始录音...', 'speaking');
|
|
console.log(`开始说话 - 音量: ${audioLevel.toFixed(4)}, 连续帧: ${this.consecutiveFramesCount}`);
|
|
}
|
|
|
|
if (this.silenceTimer) {
|
|
clearTimeout(this.silenceTimer);
|
|
this.silenceTimer = null;
|
|
}
|
|
|
|
return true;
|
|
} else {
|
|
// 还未达到连续帧要求,缓存音频数据
|
|
this.frameBuffer.push(new Float32Array(audioData));
|
|
if (this.frameBuffer.length > this.consecutiveFramesRequired) {
|
|
this.frameBuffer.shift(); // 保持缓冲区大小
|
|
}
|
|
return false;
|
|
}
|
|
} else {
|
|
// 重置连续帧计数
|
|
this.consecutiveFramesCount = 0;
|
|
this.frameBuffer = [];
|
|
|
|
if (this.isSpeaking && !this.silenceTimer) {
|
|
this.silenceTimer = setTimeout(() => {
|
|
this.handleSpeechEnd();
|
|
}, this.silenceTimeout);
|
|
}
|
|
|
|
return this.isSpeaking;
|
|
}
|
|
}
|
|
|
|
// 语音结束处理
|
|
async handleSpeechEnd() {
|
|
if (this.isSpeaking) {
|
|
const speechDuration = Date.now() - this.speechStartTime;
|
|
|
|
if (speechDuration >= this.minSpeechDuration) {
|
|
console.log(`语音结束,时长: ${speechDuration}ms`);
|
|
console.log(window.webrtcApp.currentVideoTag)
|
|
if (window.webrtcApp.currentVideoTag==="default"){
|
|
await this.processAudioBuffer();
|
|
}
|
|
this.onStatusUpdate('语音识别中...', 'processing');
|
|
} else {
|
|
console.log('说话时长太短,忽略');
|
|
this.onStatusUpdate('等待语音输入...', 'ready');
|
|
}
|
|
|
|
this.isSpeaking = false;
|
|
this.speechStartTime = null;
|
|
this.audioBuffer = [];
|
|
this.onSpeechEnd();
|
|
}
|
|
|
|
if (this.silenceTimer) {
|
|
clearTimeout(this.silenceTimer);
|
|
this.silenceTimer = null;
|
|
}
|
|
}
|
|
|
|
// 处理音频缓冲区并发送到API
|
|
async processAudioBuffer() {
|
|
if (this.audioBuffer.length === 0) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// 合并所有音频数据
|
|
const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
|
|
const combinedBuffer = new Float32Array(totalLength);
|
|
let offset = 0;
|
|
|
|
for (const buffer of this.audioBuffer) {
|
|
combinedBuffer.set(buffer, offset);
|
|
offset += buffer.length;
|
|
}
|
|
|
|
// 转换为WAV格式并编码为base64
|
|
const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
|
|
const base64Audio = this.arrayBufferToBase64(wavBuffer);
|
|
|
|
// 调用ASR API
|
|
await this.callASRAPI(base64Audio);
|
|
|
|
} catch (error) {
|
|
console.error('处理音频数据失败:', error);
|
|
this.onError('处理音频数据失败: ' + error.message);
|
|
}
|
|
}
|
|
|
|
// 调用ASR API
|
|
async callASRAPI(base64AudioData) {
|
|
try {
|
|
const requestBody = {
|
|
user: {
|
|
uid: "1988591469"
|
|
},
|
|
audio: {
|
|
data: base64AudioData
|
|
},
|
|
request: {
|
|
model_name: "bigmodel"
|
|
}
|
|
};
|
|
|
|
const response = await fetch(this.apiConfig.url, {
|
|
method: 'POST',
|
|
headers: this.apiConfig.headers,
|
|
body: JSON.stringify(requestBody)
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP error! status: ${response.status}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
this.handleASRResponse(result);
|
|
|
|
} catch (error) {
|
|
console.error('ASR API调用失败:', error);
|
|
this.onError('ASR API调用失败: ' + error.message);
|
|
}
|
|
}
|
|
|
|
// 处理ASR响应
|
|
handleASRResponse(response) {
|
|
console.log('ASR响应:', response);
|
|
|
|
if (response && response.result) {
|
|
const recognizedText = response.result.text;
|
|
this.onRecognitionResult(recognizedText);
|
|
this.onStatusUpdate('识别完成', 'completed');
|
|
} else {
|
|
console.log('未识别到文字');
|
|
this.onStatusUpdate('未识别到文字', 'ready');
|
|
}
|
|
}
|
|
|
|
// 编码WAV格式
|
|
encodeWAV(samples, sampleRate) {
|
|
const length = samples.length;
|
|
const buffer = new ArrayBuffer(44 + length * 2);
|
|
const view = new DataView(buffer);
|
|
|
|
// WAV文件头
|
|
const writeString = (offset, string) => {
|
|
for (let i = 0; i < string.length; i++) {
|
|
view.setUint8(offset + i, string.charCodeAt(i));
|
|
}
|
|
};
|
|
|
|
writeString(0, 'RIFF');
|
|
view.setUint32(4, 36 + length * 2, true);
|
|
writeString(8, 'WAVE');
|
|
writeString(12, 'fmt ');
|
|
view.setUint32(16, 16, true);
|
|
view.setUint16(20, 1, true);
|
|
view.setUint16(22, 1, true);
|
|
view.setUint32(24, sampleRate, true);
|
|
view.setUint32(28, sampleRate * 2, true);
|
|
view.setUint16(32, 2, true);
|
|
view.setUint16(34, 16, true);
|
|
writeString(36, 'data');
|
|
view.setUint32(40, length * 2, true);
|
|
|
|
// 写入音频数据
|
|
let offset = 44;
|
|
for (let i = 0; i < length; i++) {
|
|
const sample = Math.max(-1, Math.min(1, samples[i]));
|
|
view.setInt16(offset, sample * 0x7FFF, true);
|
|
offset += 2;
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
// ArrayBuffer转Base64
|
|
arrayBufferToBase64(buffer) {
|
|
let binary = '';
|
|
const bytes = new Uint8Array(buffer);
|
|
for (let i = 0; i < bytes.byteLength; i++) {
|
|
binary += String.fromCharCode(bytes[i]);
|
|
}
|
|
return btoa(binary);
|
|
}
|
|
|
|
// 开始录音
|
|
async startRecording(existingStream = null) {
|
|
try {
|
|
// 如果有外部提供的音频流,使用它;否则获取新的
|
|
if (existingStream) {
|
|
this.stream = existingStream;
|
|
console.log('使用外部提供的音频流');
|
|
} else {
|
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
audio: {
|
|
sampleRate: 16000,
|
|
channelCount: 1,
|
|
echoCancellation: true,
|
|
noiseSuppression: true
|
|
}
|
|
});
|
|
console.log('获取新的音频流');
|
|
}
|
|
|
|
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
|
sampleRate: 16000
|
|
});
|
|
|
|
const source = this.audioContext.createMediaStreamSource(this.stream);
|
|
const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
|
|
|
processor.onaudioprocess = (event) => {
|
|
const inputBuffer = event.inputBuffer;
|
|
const inputData = inputBuffer.getChannelData(0);
|
|
|
|
// 语音活动检测
|
|
if (this.detectVoiceActivity(inputData)) {
|
|
// 如果检测到语音活动,缓存音频数据
|
|
this.audioBuffer.push(new Float32Array(inputData));
|
|
}
|
|
};
|
|
|
|
source.connect(processor);
|
|
processor.connect(this.audioContext.destination);
|
|
|
|
// 保存处理器引用以便后续清理
|
|
this.processor = processor;
|
|
this.source = source;
|
|
|
|
this.isRecording = true;
|
|
this.onStatusUpdate('等待语音输入...', 'ready');
|
|
|
|
// 在startRecording方法的最后添加
|
|
if (this.adaptiveThreshold && this.noiseCalibrationSamples.length === 0) {
|
|
this.onStatusUpdate('正在校准背景噪音,请保持安静...', 'calibrating');
|
|
}
|
|
|
|
return true;
|
|
|
|
} catch (error) {
|
|
console.error('启动录音失败:', error);
|
|
this.onError('启动录音失败: ' + error.message);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// 停止录音
|
|
stopRecording() {
|
|
console.log('开始停止录音...');
|
|
|
|
// 断开音频节点连接
|
|
if (this.source) {
|
|
this.source.disconnect();
|
|
this.source = null;
|
|
}
|
|
|
|
if (this.processor) {
|
|
this.processor.disconnect();
|
|
this.processor = null;
|
|
}
|
|
|
|
// 停止所有音频轨道
|
|
if (this.stream) {
|
|
this.stream.getTracks().forEach(track => {
|
|
track.stop();
|
|
console.log(`停止音频轨道: ${track.label}`);
|
|
});
|
|
this.stream = null;
|
|
}
|
|
|
|
if (this.audioContext) {
|
|
this.audioContext.close().then(() => {
|
|
console.log('AudioContext已关闭');
|
|
}).catch(err => {
|
|
console.error('关闭AudioContext时出错:', err);
|
|
});
|
|
this.audioContext = null;
|
|
}
|
|
|
|
if (this.silenceTimer) {
|
|
clearTimeout(this.silenceTimer);
|
|
this.silenceTimer = null;
|
|
}
|
|
|
|
// 如果正在说话,处理最后的音频
|
|
if (this.isSpeaking) {
|
|
this.handleSpeechEnd();
|
|
}
|
|
|
|
// 重置所有状态
|
|
this.isRecording = false;
|
|
this.isSpeaking = false;
|
|
this.audioBuffer = [];
|
|
this.audioChunks = [];
|
|
this.consecutiveFramesCount = 0;
|
|
this.frameBuffer = [];
|
|
|
|
// 重置校准状态,确保下次启动时重新校准
|
|
this.noiseCalibrationSamples = [];
|
|
this.isCalibrated = false;
|
|
|
|
this.onStatusUpdate('录音已完全停止', 'stopped');
|
|
console.log('录音已完全停止,所有资源已释放');
|
|
}
|
|
|
|
// 获取录音状态
|
|
getRecordingStatus() {
|
|
return {
|
|
isRecording: this.isRecording,
|
|
isSpeaking: this.isSpeaking,
|
|
hasAudioContext: !!this.audioContext
|
|
};
|
|
}
|
|
}
|
|
|
|
// 导出模块
|
|
export { AudioProcessor };
|