WebRtc_QingGan/src/new_app.js

346 lines
11 KiB
JavaScript

let ASRTEXT = ''
class HttpASRRecognizer {
constructor() {
this.mediaRecorder = null;
this.audioContext = null;
this.isRecording = false;
this.audioChunks = [];
// VAD相关属性
this.isSpeaking = false;
this.silenceThreshold = 0.01;
this.silenceTimeout = 1000;
this.minSpeechDuration = 300;
this.silenceTimer = null;
this.speechStartTime = null;
this.audioBuffer = [];
// API配置
this.apiConfig = {
url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
headers: {
'X-Api-App-Key': '1988591469',
'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
'X-Api-Request-Id': this.generateUUID(),
'X-Api-Sequence': '-1',
'Content-Type': 'application/json'
}
};
this.recordBtn = document.getElementById('startVoiceButton');
this.statusDiv = document.getElementById('status');
this.resultsDiv = document.getElementById('results');
this.initEventListeners();
}
initEventListeners() {
this.recordBtn.addEventListener('click', () => {
if (this.isRecording) {
this.stopRecording();
} else {
this.startRecording();
}
});
}
// 生成UUID
generateUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
// 计算音频能量(音量)
calculateAudioLevel(audioData) {
let sum = 0;
for (let i = 0; i < audioData.length; i++) {
sum += audioData[i] * audioData[i];
}
return Math.sqrt(sum / audioData.length);
}
// 语音活动检测
detectVoiceActivity(audioData) {
const audioLevel = this.calculateAudioLevel(audioData);
const currentTime = Date.now();
if (audioLevel > this.silenceThreshold) {
if (!this.isSpeaking) {
this.isSpeaking = true;
this.speechStartTime = currentTime;
this.audioBuffer = [];
this.updateStatus('检测到语音,开始录音...', 'speaking');
console.log('开始说话');
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
return true;
} else {
if (this.isSpeaking && !this.silenceTimer) {
this.silenceTimer = setTimeout(() => {
this.onSpeechEnd();
}, this.silenceTimeout);
}
return this.isSpeaking;
}
}
// 语音结束处理
async onSpeechEnd() {
if (this.isSpeaking) {
const speechDuration = Date.now() - this.speechStartTime;
if (speechDuration >= this.minSpeechDuration) {
console.log(`语音结束,时长: ${speechDuration}ms`);
await this.processAudioBuffer();
// this.updateStatus('语音识别中...', 'processing');
console.log('语音识别中')
} else {
console.log('说话时长太短,忽略');
// this.updateStatus('等待语音输入...', 'ready');
console.log('等待语音输入...')
}
this.isSpeaking = false;
this.speechStartTime = null;
this.audioBuffer = [];
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
// 处理音频缓冲区并发送到API
async processAudioBuffer() {
if (this.audioBuffer.length === 0) {
return;
}
try {
// 合并所有音频数据
const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
const combinedBuffer = new Float32Array(totalLength);
let offset = 0;
for (const buffer of this.audioBuffer) {
combinedBuffer.set(buffer, offset);
offset += buffer.length;
}
// 转换为WAV格式并编码为base64
const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
const base64Audio = this.arrayBufferToBase64(wavBuffer);
// 调用ASR API
await this.callASRAPI(base64Audio);
} catch (error) {
console.error('处理音频数据失败:', error);
this.updateStatus('识别失败', 'error');
}
}
// 调用ASR API
async callASRAPI(base64AudioData) {
try {
const requestBody = {
user: {
uid: "1988591469"
},
audio: {
data: base64AudioData
},
request: {
model_name: "bigmodel"
}
};
const response = await fetch(this.apiConfig.url, {
method: 'POST',
headers: this.apiConfig.headers,
body: JSON.stringify(requestBody)
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const result = await response.json();
this.handleASRResponse(result);
} catch (error) {
console.error('ASR API调用失败:', error);
this.updateStatus('API调用失败', 'error');
}
}
// 处理ASR响应
handleASRResponse(response) {
console.log('ASR响应:', response);
if (response && response.data && response.data.result) {
ASRTEXT = response.data.result;
// this.displayResult(text);
// this.updateStatus('识别完成', 'completed');
console.log('识别完成')
} else {
console.log('未识别到文字');
// this.updateStatus('未识别到文字', 'ready');
}
}
// 显示识别结果
displayResult(text) {
const resultElement = document.createElement('div');
resultElement.className = 'result-item';
resultElement.innerHTML = `
<span class="timestamp">${new Date().toLocaleTimeString()}</span>
<span class="text">${text}</span>
`;
this.resultsDiv.appendChild(resultElement);
this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
}
// 更新状态显示
updateStatus(message, status) {
this.statusDiv.textContent = message;
this.statusDiv.className = `status ${status}`;
}
// 编码WAV格式
encodeWAV(samples, sampleRate) {
const length = samples.length;
const buffer = new ArrayBuffer(44 + length * 2);
const view = new DataView(buffer);
// WAV文件头
const writeString = (offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + length * 2, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, length * 2, true);
// 写入音频数据
let offset = 44;
for (let i = 0; i < length; i++) {
const sample = Math.max(-1, Math.min(1, samples[i]));
view.setInt16(offset, sample * 0x7FFF, true);
offset += 2;
}
return buffer;
}
// ArrayBuffer转Base64
arrayBufferToBase64(buffer) {
let binary = '';
const bytes = new Uint8Array(buffer);
for (let i = 0; i < bytes.byteLength; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
}
async startRecording() {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
const source = this.audioContext.createMediaStreamSource(stream);
const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = (event) => {
const inputBuffer = event.inputBuffer;
const inputData = inputBuffer.getChannelData(0);
// 语音活动检测
if (this.detectVoiceActivity(inputData)) {
// 如果检测到语音活动,缓存音频数据
this.audioBuffer.push(new Float32Array(inputData));
}
};
source.connect(processor);
processor.connect(this.audioContext.destination);
this.isRecording = true;
this.recordBtn.textContent = '停止录音';
this.recordBtn.className = 'btn recording';
// this.updateStatus('等待语音输入...', 'ready');
} catch (error) {
console.error('启动录音失败:', error);
// this.updateStatus('录音启动失败', 'error');
}
}
stopRecording() {
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
// 如果正在说话,处理最后的音频
if (this.isSpeaking) {
this.onSpeechEnd();
}
this.isRecording = false;
this.isSpeaking = false;
this.audioBuffer = [];
this.recordBtn.textContent = '开始录音';
this.recordBtn.className = 'btn';
console.log('录音已停止');
// this.updateStatus('录音已停止', 'stopped');
}
}
// 初始化应用
document.addEventListener('DOMContentLoaded', () => {
const asrRecognizer = new HttpASRRecognizer();
console.log('HTTP ASR识别器已初始化');
});