realtime 语音录取

2025-07-27 12:11:13 +08:00 · 2025-07-27 12:11:13 +08:00 · d808bbfe26
commit d808bbfe26
parent c95e6a2552
8 changed files with 1251 additions and 103 deletions
--- a/src/audio_processor.js
+++ b/src/audio_processor.js
@ -0,0 +1,322 @@
 // 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
 class AudioProcessor {
    constructor(options = {}) {
        this.audioContext = null;
        this.isRecording = false;
        this.audioChunks = [];
        // VAD相关属性
        this.isSpeaking = false;
        this.silenceThreshold = options.silenceThreshold || 0.01;
        this.silenceTimeout = options.silenceTimeout || 1000;
        this.minSpeechDuration = options.minSpeechDuration || 300;
        this.silenceTimer = null;
        this.speechStartTime = null;
        this.audioBuffer = [];
        // API配置
        this.apiConfig = {
            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
            headers: {
                'X-Api-App-Key': '1988591469',
                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
                'X-Api-Request-Id': this.generateUUID(),
                'X-Api-Sequence': '-1',
                'Content-Type': 'application/json'
            }
        };
        // 回调函数
        this.onSpeechStart = options.onSpeechStart || (() => {});
        this.onSpeechEnd = options.onSpeechEnd || (() => {});
        this.onRecognitionResult = options.onRecognitionResult || (() => {});
        this.onError = options.onError || (() => {});
        this.onStatusUpdate = options.onStatusUpdate || (() => {});
    }
    // 生成UUID
    generateUUID() {
        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
            const r = Math.random() * 16 | 0;
            const v = c == 'x' ? r : (r & 0x3 | 0x8);
            return v.toString(16);
        });
    }
    // 计算音频能量(音量)
    calculateAudioLevel(audioData) {
        let sum = 0;
        for (let i = 0; i < audioData.length; i++) {
            sum += audioData[i] * audioData[i];
        }
        return Math.sqrt(sum / audioData.length);
    }
    // 语音活动检测
    detectVoiceActivity(audioData) {
        const audioLevel = this.calculateAudioLevel(audioData);
        const currentTime = Date.now();
        if (audioLevel > this.silenceThreshold) {
            if (!this.isSpeaking) {
                this.isSpeaking = true;
                this.speechStartTime = currentTime;
                this.audioBuffer = [];
                this.onSpeechStart();
                this.onStatusUpdate('检测到语音，开始录音...', 'speaking');
                console.log('开始说话');
            }
            if (this.silenceTimer) {
                clearTimeout(this.silenceTimer);
                this.silenceTimer = null;
            }
            return true;
        } else {
            if (this.isSpeaking && !this.silenceTimer) {
                this.silenceTimer = setTimeout(() => {
                    this.handleSpeechEnd();
                }, this.silenceTimeout);
            }
            return this.isSpeaking;
        }
    }
    // 语音结束处理
    async handleSpeechEnd() {
        if (this.isSpeaking) {
            const speechDuration = Date.now() - this.speechStartTime;
            if (speechDuration >= this.minSpeechDuration) {
                console.log(`语音结束，时长: ${speechDuration}ms`);
                await this.processAudioBuffer();
                this.onStatusUpdate('语音识别中...', 'processing');
            } else {
                console.log('说话时长太短，忽略');
                this.onStatusUpdate('等待语音输入...', 'ready');
            }
            this.isSpeaking = false;
            this.speechStartTime = null;
            this.audioBuffer = [];
            this.onSpeechEnd();
        }
        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
    }
    // 处理音频缓冲区并发送到API
    async processAudioBuffer() {
        if (this.audioBuffer.length === 0) {
            return;
        }
        try {
            // 合并所有音频数据
            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
            const combinedBuffer = new Float32Array(totalLength);
            let offset = 0;
            for (const buffer of this.audioBuffer) {
                combinedBuffer.set(buffer, offset);
                offset += buffer.length;
            }
            // 转换为WAV格式并编码为base64
            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
            const base64Audio = this.arrayBufferToBase64(wavBuffer);
            // 调用ASR API
            await this.callASRAPI(base64Audio);
        } catch (error) {
            console.error('处理音频数据失败:', error);
            this.onError('处理音频数据失败: ' + error.message);
        }
    }
    // 调用ASR API
    async callASRAPI(base64AudioData) {
        try {
            const requestBody = {
                user: {
                    uid: "1988591469"
                },
                audio: {
                    data: base64AudioData
                },
                request: {
                    model_name: "bigmodel"
                }
            };
            const response = await fetch(this.apiConfig.url, {
                method: 'POST',
                headers: this.apiConfig.headers,
                body: JSON.stringify(requestBody)
            });
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const result = await response.json();
            this.handleASRResponse(result);
        } catch (error) {
            console.error('ASR API调用失败:', error);
            this.onError('ASR API调用失败: ' + error.message);
        }
    }
    // 处理ASR响应
    handleASRResponse(response) {
        console.log('ASR响应:', response);
        if (response && response.result) {
            const recognizedText = response.result.text;
            this.onRecognitionResult(recognizedText);
            this.onStatusUpdate('识别完成', 'completed');
        } else {
            console.log('未识别到文字');
            this.onStatusUpdate('未识别到文字', 'ready');
        }
    }
    // 编码WAV格式
    encodeWAV(samples, sampleRate) {
        const length = samples.length;
        const buffer = new ArrayBuffer(44 + length * 2);
        const view = new DataView(buffer);
        // WAV文件头
        const writeString = (offset, string) => {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        };
        writeString(0, 'RIFF');
        view.setUint32(4, 36 + length * 2, true);
        writeString(8, 'WAVE');
        writeString(12, 'fmt ');
        view.setUint32(16, 16, true);
        view.setUint16(20, 1, true);
        view.setUint16(22, 1, true);
        view.setUint32(24, sampleRate, true);
        view.setUint32(28, sampleRate * 2, true);
        view.setUint16(32, 2, true);
        view.setUint16(34, 16, true);
        writeString(36, 'data');
        view.setUint32(40, length * 2, true);
        // 写入音频数据
        let offset = 44;
        for (let i = 0; i < length; i++) {
            const sample = Math.max(-1, Math.min(1, samples[i]));
            view.setInt16(offset, sample * 0x7FFF, true);
            offset += 2;
        }
        return buffer;
    }
    // ArrayBuffer转Base64
    arrayBufferToBase64(buffer) {
        let binary = '';
        const bytes = new Uint8Array(buffer);
        for (let i = 0; i < bytes.byteLength; i++) {
            binary += String.fromCharCode(bytes[i]);
        }
        return btoa(binary);
    }
    // 开始录音
    async startRecording() {
        try {
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: {
                    sampleRate: 16000,
                    channelCount: 1,
                    echoCancellation: true,
                    noiseSuppression: true
                }
            });
            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            });
            const source = this.audioContext.createMediaStreamSource(stream);
            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
            processor.onaudioprocess = (event) => {
                const inputBuffer = event.inputBuffer;
                const inputData = inputBuffer.getChannelData(0);
                // 语音活动检测
                if (this.detectVoiceActivity(inputData)) {
                    // 如果检测到语音活动，缓存音频数据
                    this.audioBuffer.push(new Float32Array(inputData));
                }
            };
            source.connect(processor);
            processor.connect(this.audioContext.destination);
            this.isRecording = true;
            this.onStatusUpdate('等待语音输入...', 'ready');
            return true;
        } catch (error) {
            console.error('启动录音失败:', error);
            this.onError('启动录音失败: ' + error.message);
            return false;
        }
    }
    // 停止录音
    stopRecording() {
        if (this.audioContext) {
            this.audioContext.close();
            this.audioContext = null;
        }
        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
        // 如果正在说话，处理最后的音频
        if (this.isSpeaking) {
            this.handleSpeechEnd();
        }
        this.isRecording = false;
        this.isSpeaking = false;
        this.audioBuffer = [];
        this.onStatusUpdate('录音已停止', 'stopped');
        console.log('录音已停止');
    }
    // 获取录音状态
    getRecordingStatus() {
        return {
            isRecording: this.isRecording,
            isSpeaking: this.isSpeaking,
            hasAudioContext: !!this.audioContext
        };
    }
 }
 // 导出模块
 export { AudioProcessor };
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@ -6,6 +6,9 @@ import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from
 // 防止重复播放的标志
 let isPlaying = false;
 // 音频播放队列
 let audioQueue = [];
 let isProcessingQueue = false;
 async function chatWithAudioStream(userInput) {
  // 验证配置
@ -20,31 +23,22 @@ async function chatWithAudioStream(userInput) {
  const minimaxiConfig = getMinimaxiConfig();
  const audioConfig = getAudioConfig();
-  // 1. 请求大模型回答
+  // 清空音频队列
-  console.log('\n=== 请求大模型回答 ===');
+  audioQueue = [];
  const llmResponse = await requestLLMStream({
    apiKey: llmConfig.apiKey,
    model: llmConfig.model,
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: userInput },
    ],
  });
-  // 提取大模型回答内容（现在直接返回内容）
+  // 定义段落处理函数
-  const llmContent = llmResponse;
+  const handleSegment = async (segment) => {
    console.log('\n=== 处理文本段落 ===');
    console.log('段落内容:', segment);
-  console.log('\n=== 大模型回答 ===');
+    try {
-  console.log("llmResponse: ", llmContent);
+      // 为每个段落生成音频
  // 2. 合成音频
  console.log('\n=== 开始合成音频 ===');
      const audioResult = await requestMinimaxi({
        apiKey: minimaxiConfig.apiKey,
        groupId: minimaxiConfig.groupId,
        body: {
          model: audioConfig.model,
-      text: llmContent,
+          text: segment,
          stream: audioConfig.stream,
          language_boost: audioConfig.language_boost,
          output_format: audioConfig.output_format,
@ -54,30 +48,70 @@ async function chatWithAudioStream(userInput) {
        stream: true,
      });
-  // 3. 流式播放音频
+      // 将音频添加到播放队列
-  console.log('\n=== 开始流式播放音频 ===');
+      if (audioResult && audioResult.data && audioResult.data.audio) {
-  // console.log('音频数据长度:', audioResult.data.audio.length);
+        audioQueue.push({
-  await playAudioStream(audioResult.data.audio);
+          text: segment,
          audioHex: audioResult.data.audio
        });
        console.log('音频已添加到队列，队列长度:', audioQueue.length);
        // 开始处理队列
        processAudioQueue();
      }
    } catch (error) {
      console.error('生成音频失败:', error);
    }
  };
  // 1. 请求大模型回答，并实时处理段落
  console.log('\n=== 请求大模型回答 ===');
  const llmResponse = await requestLLMStream({
    apiKey: llmConfig.apiKey,
    model: llmConfig.model,
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: userInput },
    ],
    onSegment: handleSegment // 传入段落处理回调
  });
  console.log('\n=== 大模型完整回答 ===');
  console.log("llmResponse: ", llmResponse);
  return {
    userInput,
-    llmResponse: llmContent,
+    llmResponse,
-    audioResult,
+    audioQueue: audioQueue.map(item => ({ text: item.text, hasAudio: !!item.audioHex }))
  };
 }
 // 处理音频播放队列
 async function processAudioQueue() {
  if (isProcessingQueue) return;
  isProcessingQueue = true;
  // while (audioQueue.length > 0) {
  //   const audioItem = audioQueue.shift();
  //   console.log('\n=== 播放队列中的音频 ===');
  //   console.log('文本:', audioItem.text);
  //   try {
  //     await playAudioStream(audioItem.audioHex);
  //   } catch (error) {
  //     console.error('播放音频失败:', error);
  //   }
  // }
  isProcessingQueue = false;
 }
 // 流式播放音频
 async function playAudioStream(audioHex) {
  if (isPlaying) {
    console.log('音频正在播放中，跳过重复播放');
    return;
  }
  console.log('=== 开始播放音频 ===');
  console.log('音频数据长度:', audioHex.length);
  isPlaying = true;
  // 将hex转换为ArrayBuffer
  const audioBuffer = hexToArrayBuffer(audioHex);
@ -102,13 +136,11 @@ async function playAudioStream(audioHex) {
    return new Promise((resolve) => {
      source.onended = () => {
        console.log('音频播放完成');
        isPlaying = false;
        resolve();
      };
    });
  } catch (error) {
    console.error('音频播放失败:', error);
    isPlaying = false;
    throw error;
  }
 }
@ -175,4 +207,6 @@ async function playAudioStreamNode(audioHex) {
  }
 }
 export { chatWithAudioStream, playAudioStream, playAudioStreamNode};
--- a/src/config.js
+++ b/src/config.js
@ -16,11 +16,11 @@ export const config = {
  audio: {
    model: 'speech-02-hd',
    voiceSetting: {
-      voice_id: 'yantu-qinggang',
+      voice_id: 'yantu-qinggang-2',
      speed: 1,
      vol: 1,
      pitch: 0,
-      emotion: 'happy',
+      // emotion: 'happy',
    },
    audioSetting: {
      sample_rate: 32000,
--- a/副本.html
+++ b/副本.html
@ -0,0 +1,139 @@
 <!DOCTYPE html>
 <html lang="zh-CN">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>实时语音识别</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            background: white;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        .controls {
            text-align: center;
            margin-bottom: 30px;
        }
        .record-btn {
            background: #4CAF50;
            color: white;
            border: none;
            padding: 15px 30px;
            font-size: 18px;
            border-radius: 50px;
            cursor: pointer;
            transition: all 0.3s;
        }
        .record-btn:hover {
            background: #45a049;
        }
        .record-btn.recording {
            background: #f44336;
            animation: pulse 1s infinite;
        }
        @keyframes pulse {
            0% { transform: scale(1); }
            50% { transform: scale(1.05); }
            100% { transform: scale(1); }
        }
        .status {
            margin: 20px 0;
            padding: 10px;
            border-radius: 5px;
            text-align: center;
            font-weight: bold;
        }
        .status.connected {
            background: #d4edda;
            color: #155724;
            border: 1px solid #c3e6cb;
        }
        .status.speaking {
            background: #fff3cd;
            color: #856404;
            border: 1px solid #ffeaa7;
            animation: speaking-pulse 0.5s infinite alternate;
        }
        .status.processing {
            background: #cce7ff;
            color: #004085;
            border: 1px solid #99d6ff;
        }
        .status.disconnected {
            background: #f8d7da;
            color: #721c24;
            border: 1px solid #f5c6cb;
        }
        @keyframes speaking-pulse {
            0% { opacity: 0.7; }
            100% { opacity: 1; }
        }
        .results {
            max-height: 400px;
            overflow-y: auto;
            border: 1px solid #ddd;
            border-radius: 5px;
            padding: 15px;
            background: #fafafa;
        }
        .result-item {
            margin-bottom: 15px;
            padding: 10px;
            background: white;
            border-radius: 5px;
            border-left: 4px solid #4CAF50;
        }
        .timestamp {
            font-size: 12px;
            color: #666;
            margin-bottom: 5px;
        }
        .text {
            font-size: 16px;
            line-height: 1.4;
        }
        .help {
            margin-top: 20px;
            padding: 15px;
            background: #e3f2fd;
            border-radius: 5px;
            font-size: 14px;
            color: #1565c0;
        }
    </style>
 </head>
 <body>
    <div class="container">
        <h1>实时语音识别</h1>
        <div class="controls">
            <button id="recordBtn" class="record-btn">开始录音</button>
        </div>
        <div id="status" class="status disconnected">未连接</div>
        <div class="help">
            <strong>使用说明：</strong><br>
            1. 点击"开始录音"按钮开启麦克风<br>
            2. 系统会自动检测您的语音，只有在检测到说话时才开始录音<br>
            3. 说话结束后会自动发送音频进行识别<br>
            4. 识别结果会显示在下方区域
        </div>
        <h3>识别结果：</h3>
        <div id="results" class="results">
            <!-- 识别结果将显示在这里 -->
        </div>
    </div>
    <script src="new_app.js"></script>
 </body>
 </html>
--- a/src/index.js
+++ b/src/index.js
@ -1,5 +1,6 @@
 // WebRTC 音视频通话应用
 import { chatWithAudioStream } from './chat_with_audio.js';
 import { AudioProcessor } from './audio_processor.js';
 class WebRTCChat {
    constructor() {
@ -15,6 +16,30 @@ class WebRTCChat {
        this.videoStreams = new Map(); // 存储不同视频的MediaStream
        this.currentVideoStream = null;
        // 初始化音频处理器
        this.audioProcessor = new AudioProcessor({
            onSpeechStart: () => {
                this.voiceStatus.textContent = '检测到语音，开始录音...';
                this.logMessage('检测到语音，开始录音...', 'info');
            },
            onSpeechEnd: () => {
                // 语音结束回调
            },
            onRecognitionResult: (text) => {
                // ASRTEXT = text;
                this.voiceStatus.textContent = '识别完成';
                this.logMessage(`语音识别结果: ${text}`, 'success');
                this.handleVoiceInput(text);
            },
            onError: (error) => {
                this.voiceStatus.textContent = '识别失败';
                this.logMessage(error, 'error');
            },
            onStatusUpdate: (message, status) => {
                this.voiceStatus.textContent = message;
            }
        });
        this.initializeElements();
        this.initializeSocket();
        this.loadVideoMapping();
@ -627,65 +652,34 @@ class WebRTCChat {
        });
    }
    // 修改：使用音频处理器的语音录制功能
    async startVoiceRecording() {
-        try {
+        const success = await this.audioProcessor.startRecording();
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
            this.mediaRecorder = new MediaRecorder(stream);
            this.audioChunks = [];
            this.mediaRecorder.ondataavailable = (event) => {
                this.audioChunks.push(event.data);
            };
            this.mediaRecorder.onstop = () => {
                const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
                this.processVoiceInput(audioBlob);
            };
            this.mediaRecorder.start();
            this.isRecording = true;
        if (success) {
            this.startVoiceButton.disabled = true;
            this.stopVoiceButton.disabled = false;
            this.voiceStatus.textContent = '正在录音...';
            this.startVoiceButton.classList.add('recording');
-            
+            this.voiceStatus.textContent = '等待语音输入...';
-            this.logMessage('开始语音录制', 'info');
+            this.logMessage('高级语音录制已启动', 'success');
-        } catch (error) {
+        } else {
-            this.logMessage('无法访问麦克风: ' + error.message, 'error');
+            this.voiceStatus.textContent = '录音启动失败';
        }
    }
    // 修改：停止语音录制
    stopVoiceRecording() {
-        if (this.mediaRecorder && this.isRecording) {
+        this.audioProcessor.stopRecording();
            this.mediaRecorder.stop();
            this.isRecording = false;
        this.startVoiceButton.disabled = false;
        this.stopVoiceButton.disabled = true;
            this.voiceStatus.textContent = '点击开始语音输入';
        this.startVoiceButton.classList.remove('recording');
        this.voiceStatus.textContent = '点击开始语音输入';
-            this.logMessage('停止语音录制', 'info');
+        this.logMessage('语音录制已停止', 'info');
        }
    }
    async processVoiceInput(audioBlob) {
        // 这里可以集成语音识别API，如Web Speech API或第三方服务
        // 为了演示，我们使用一个简单的模拟识别
        const mockText = this.simulateSpeechRecognition();
        this.socket.emit('voice-input', {
            audioData: audioBlob,
            text: mockText
        });
        this.logMessage(`语音识别结果: ${mockText}`, 'info');
        // 根据语音识别结果切换视频流
        await this.handleVoiceInput(mockText);
    }
    // 处理语音输入结果
    async handleVoiceInput(text) {
        // 根据文本查找对应视频
        let videoFile = this.videoMapping['默认'] || this.defaultVideo;
@ -705,7 +699,20 @@ class WebRTCChat {
            type: 'voice', 
            text 
        });
        // 调用大模型处理
        try {
            this.logMessage('正在处理语音输入，请稍候...', 'info');
            const result = await chatWithAudioStream(text);
            this.logMessage(`大模型回答: ${result.llmResponse}`, 'success');
        } catch (error) {
            this.logMessage(`处理语音输入失败: ${error.message}`, 'error');
            console.error('chatWithAudioStream error:', error);
        }
    }
    // 删除原有的简单音频处理方法
    // processVoiceInput() 和 simulateSpeechRecognition() 方法已被移除
    simulateSpeechRecognition() {
        // 模拟语音识别，随机返回预设的文本
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@ -1,6 +1,6 @@
 // 以流式方式请求LLM大模型接口，并打印流式返回内容
-async function requestLLMStream({ apiKey, model, messages }) {
+async function requestLLMStream({ apiKey, model, messages, onSegment }) {
  const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
    method: 'POST',
    headers: {
@ -26,6 +26,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
  let done = false;
  let buffer = '';
  let content = '';
  let pendingText = ''; // 待处理的文本片段
  // 分段分隔符
  const segmentDelimiters = /[，。：；！？,.:;!?]/;
  while (!done) {
    const { value, done: doneReading } = await reader.read();
@ -47,6 +51,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
          if (jsonStr === '[DONE]') {
            console.log('LLM SSE流结束');
            // 处理最后的待处理文本
            if (pendingText.trim() && onSegment) {
              await onSegment(pendingText.trim());
            }
            continue;
          }
@ -55,7 +63,29 @@ async function requestLLMStream({ apiKey, model, messages }) {
            if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
              const deltaContent = obj.choices[0].delta.content;
              content += deltaContent;
              pendingText += deltaContent;
              console.log('LLM内容片段:', deltaContent);
              // 检查是否包含分段分隔符
              if (segmentDelimiters.test(pendingText)) {
                // 按分隔符分割文本
                const segments = pendingText.split(segmentDelimiters);
                // 处理完整的段落（除了最后一个，因为可能不完整）
                for (let i = 0; i < segments.length - 1; i++) {
                  const segment = segments[i].trim();
                  if (segment && onSegment) {
                    // 找到对应的分隔符
                    const delimiterMatch = pendingText.match(segmentDelimiters);
                    const segmentWithDelimiter = segment + (delimiterMatch ? delimiterMatch[0] : '');
                    console.log('检测到完整段落:', segmentWithDelimiter);
                    await onSegment(segmentWithDelimiter);
                  }
                }
                // 保留最后一个不完整的段落
                pendingText = segments[segments.length - 1] || '';
              }
            }
          } catch (e) {
            console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@ -1,5 +1,135 @@
 // 以流式或非流式方式请求 minimaxi 大模型接口，并打印/返回内容
 // 在文件顶部添加音频播放相关的变量和函数
 let audioContext = null;
 let audioQueue = []; // 音频队列
 let isPlaying = false;
 let isProcessingQueue = false; // 队列处理状态
 let nextStartTime = 0; // 添加这行来声明 nextStartTime 变量
 // 初始化音频上下文
 function initAudioContext() {
  if (!audioContext) {
    audioContext = new (window.AudioContext || window.webkitAudioContext)();
  }
  return audioContext;
 }
 // 将hex字符串转换为ArrayBuffer
 function hexToArrayBuffer(hex) {
  const bytes = new Uint8Array(hex.length / 2);
  for (let i = 0; i < hex.length; i += 2) {
    bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
  }
  return bytes.buffer;
 }
 // 将音频添加到队列（不等待播放）
 async function addAudioToQueue(audioHex) {
  if (!audioHex || audioHex.length === 0) return;
  try {
    const ctx = initAudioContext();
    const audioBuffer = hexToArrayBuffer(audioHex);
    const audioData = await ctx.decodeAudioData(audioBuffer);
    // 将解码后的音频数据添加到队列
    audioQueue.push({
      audioData,
      timestamp: Date.now()
    });
    console.log(`音频已添加到队列，队列长度: ${audioQueue.length}`);
    // 启动队列处理器（如果还没有运行）
    if (!isProcessingQueue) {
      processAudioQueue();
    }
  } catch (error) {
    console.error('音频解码失败:', error);
  }
 }
 // 队列处理器 - 独立运行，按顺序播放音频
 async function processAudioQueue() {
  if (isProcessingQueue) return;
  isProcessingQueue = true;
  console.log('开始处理音频队列');
  while (audioQueue.length > 0 || isPlaying) {
    // 如果当前没有音频在播放，且队列中有音频
    if (!isPlaying && audioQueue.length > 0) {
      const audioItem = audioQueue.shift();
      await playAudioData(audioItem.audioData);
    } else {
      // 等待一小段时间再检查
      await new Promise(resolve => setTimeout(resolve, 50));
    }
  }
  isProcessingQueue = false;
  console.log('音频队列处理完成');
 }
 // 播放单个音频数据
 function playAudioData(audioData) {
  return new Promise((resolve) => {
    try {
      const ctx = initAudioContext();
      const source = ctx.createBufferSource();
      source.buffer = audioData;
      source.connect(ctx.destination);
      isPlaying = true;
      source.onended = () => {
        console.log('音频片段播放完成');
        isPlaying = false;
        resolve();
      };
      // 超时保护
      setTimeout(() => {
        if (isPlaying) {
          console.log('音频播放超时，强制结束');
          isPlaying = false;
          resolve();
        }
      }, (audioData.duration + 0.5) * 1000);
      source.start(0);
      console.log(`开始播放音频片段，时长: ${audioData.duration}秒`);
    } catch (error) {
      console.error('播放音频失败:', error);
      isPlaying = false;
      resolve();
    }
  });
 }
 // 修改原来的playAudioChunk函数，改为addAudioToQueue
 const playAudioChunk = addAudioToQueue;
 // 清空音频队列
 function clearAudioQueue() {
  audioQueue.length = 0;
  console.log('音频队列已清空');
 }
 // 获取队列状态
 function getQueueStatus() {
  return {
    queueLength: audioQueue.length,
    isPlaying,
    isProcessingQueue
  };
 }
 // 移除waitForCurrentAudioToFinish函数，不再需要
 async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  const url = `https://api.minimaxi.com/v1/t2a_v2`;
  const reqBody = { ...body, stream };
@ -24,7 +154,7 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
    console.log(JSON.stringify(result, null, 2));
    return result;
  } else {
-    // 流式，解析每个chunk，合并audio
+    // 流式，解析每个chunk，实时播放音频
    const reader = response.body.getReader();
    const decoder = new TextDecoder('utf-8');
    let done = false;
@ -32,25 +162,28 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
    let audioHex = '';
    let lastFullResult = null;
    // 重置播放状态
    nextStartTime = 0;
    if (audioContext) {
      nextStartTime = audioContext.currentTime;
    }
    while (!done) {
      const { value, done: doneReading } = await reader.read();
      done = doneReading;
      if (value) {
        const chunk = decoder.decode(value, { stream: true });
        buffer += chunk;
        // console.log('收到原始chunk:', chunk);
        // 处理SSE格式的数据（以\n分割）
        let lines = buffer.split('\n');
        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
        for (const line of lines) {
          if (!line.trim()) continue;
          // console.log('处理行:', line);
          // 检查是否是SSE格式的数据行
          if (line.startsWith('data:')) {
            const jsonStr = line.substring(6); // 移除 'data: ' 前缀
            // console.log('提取的JSON字符串:', jsonStr);
            if (jsonStr.trim() === '[DONE]') {
              console.log('SSE流结束');
@ -59,17 +192,19 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
            try {
              const obj = JSON.parse(jsonStr);
-              // 流式，解析每个chunk，合并audio
+              // 流式，解析每个chunk，实时播放音频
-              if (obj.data && obj.data.audio) {
+              if (obj.data && obj.data.audio && obj.data.status === 1) {
                console.log('收到音频数据片段!', obj.data.audio.length);
                audioHex += obj.data.audio;
                // 立即播放这个音频片段
                await playAudioChunk(obj.data.audio);
              }
              // status=2为最后一个chunk，记录完整结构
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
                console.log('收到最终状态');
              }
              // 实时打印每个chunk
              console.log('解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析SSE数据失败:', e, '原始数据:', jsonStr);
            }
@ -83,7 +218,11 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
            try {
              const obj = JSON.parse(line);
              if (obj.data && obj.data.audio) {
                console.log('收到无data:音频数据!', obj.data.audio.length);
                audioHex += obj.data.audio;
                // 立即播放这个音频片段
                await playAudioChunk(obj.data.audio);
              }
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
@ -109,4 +248,135 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  }
 }
-export { requestMinimaxi };
+// 火山引擎TTS方法
 async function requestVolcanTTS({ 
  appId, 
  accessKey, 
  resourceId = 'volc.service_type.10029', 
  appKey = 'aGjiRDfUWi',
  body, 
  stream = true 
 }) {
  const url = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
  // 生成请求ID
  const requestId = generateUUID();
  const response = await fetch(url, {
    method: 'POST',
    headers: {
      'X-Api-App-Id': appId,
      'X-Api-Access-Key': accessKey,
      'X-Api-Resource-Id': resourceId,
      'X-Api-App-Key': appKey,
      'X-Api-Request-Id': requestId,
      'Content-Type': 'application/json',
      'Accept': stream ? 'text/event-stream' : 'application/json',
      'Cache-Control': 'no-cache',
    },
    body: JSON.stringify(body),
  });
  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }
  if (!stream) {
    // 非流式，直接返回JSON
    const result = await response.json();
    console.log('火山引擎TTS非流式结果:', JSON.stringify(result, null, 2));
    return result;
  } else {
    // 流式，解析每个chunk，合并audio
    const reader = response.body.getReader();
    const decoder = new TextDecoder('utf-8');
    let done = false;
    let buffer = '';
    let audioBase64 = '';
    let lastFullResult = null;
    while (!done) {
      const { value, done: doneReading } = await reader.read();
      done = doneReading;
      if (value) {
        const chunk = decoder.decode(value, { stream: true });
        buffer += chunk;
        // 处理SSE格式的数据（以\n分割）
        let lines = buffer.split('\n');
        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
        for (const line of lines) {
          if (!line.trim()) continue;
          // 检查是否是SSE格式的数据行
          if (line.startsWith('data:')) {
            const jsonStr = line.substring(6); // 移除 'data: ' 前缀
            if (jsonStr.trim() === '[DONE]') {
              console.log('火山引擎TTS流结束');
              continue;
            }
            try {
              const obj = JSON.parse(jsonStr);
              // 流式，解析每个chunk，合并audio base64数据
              if (obj.data) {
                audioBase64 += obj.data;
                lastFullResult = obj;
              }
              // 实时打印每个chunk
              console.log('火山引擎TTS解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析火山引擎TTS数据失败:', e, '原始数据:', jsonStr);
            }
          } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
            // 忽略SSE的其他字段
            console.log('忽略SSE字段:', line);
            continue;
          } else if (line.trim() && !line.startsWith('data:')) {
            // 尝试直接解析（兼容非SSE格式）
            try {
              const obj = JSON.parse(line);
              if (obj.data) {
                audioBase64 += obj.data;
                lastFullResult = obj;
              }
              console.log('火山引擎TTS直接解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析火山引擎TTS chunk失败:', e, line);
            }
          }
        }
      }
    }
    // 合成最终结构
    console.log('火山引擎TTS音频数据总长度:', audioBase64.length);
    if (lastFullResult) {
      // 更新最终结果的音频数据
      lastFullResult.data = audioBase64;
      console.log('火山引擎TTS最终合成结果:', JSON.stringify(lastFullResult, null, 2));
      return lastFullResult;
    } else {
      // 没有完整结构，返回合成的audio
      return { 
        code: 0, 
        message: '', 
        data: audioBase64 
      };
    }
  }
 }
 // 生成UUID的辅助函数
 function generateUUID() {
  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
    const r = Math.random() * 16 | 0;
    const v = c === 'x' ? r : (r & 0x3 | 0x8);
    return v.toString(16);
  });
 }
 export { requestMinimaxi, requestVolcanTTS };
--- a/src/new_app.js
+++ b/src/new_app.js
@ -0,0 +1,346 @@
 let ASRTEXT = ''
 class HttpASRRecognizer {
    constructor() {
        this.mediaRecorder = null;
        this.audioContext = null;
        this.isRecording = false;
        this.audioChunks = [];
        // VAD相关属性
        this.isSpeaking = false;
        this.silenceThreshold = 0.01;
        this.silenceTimeout = 1000;
        this.minSpeechDuration = 300;
        this.silenceTimer = null;
        this.speechStartTime = null;
        this.audioBuffer = [];
        // API配置
        this.apiConfig = {
            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
            headers: {
                'X-Api-App-Key': '1988591469',
                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
                'X-Api-Request-Id': this.generateUUID(),
                'X-Api-Sequence': '-1',
                'Content-Type': 'application/json'
            }
        };
        this.recordBtn = document.getElementById('startVoiceButton');
        this.statusDiv = document.getElementById('status');
        this.resultsDiv = document.getElementById('results');
        this.initEventListeners();
    }
    initEventListeners() {
        this.recordBtn.addEventListener('click', () => {
            if (this.isRecording) {
                this.stopRecording();
            } else {
                this.startRecording();
            }
        });
    }
    // 生成UUID
    generateUUID() {
        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
            const r = Math.random() * 16 | 0;
            const v = c == 'x' ? r : (r & 0x3 | 0x8);
            return v.toString(16);
        });
    }
    // 计算音频能量(音量)
    calculateAudioLevel(audioData) {
        let sum = 0;
        for (let i = 0; i < audioData.length; i++) {
            sum += audioData[i] * audioData[i];
        }
        return Math.sqrt(sum / audioData.length);
    }
    // 语音活动检测
    detectVoiceActivity(audioData) {
        const audioLevel = this.calculateAudioLevel(audioData);
        const currentTime = Date.now();
        if (audioLevel > this.silenceThreshold) {
            if (!this.isSpeaking) {
                this.isSpeaking = true;
                this.speechStartTime = currentTime;
                this.audioBuffer = [];
                this.updateStatus('检测到语音，开始录音...', 'speaking');
                console.log('开始说话');
            }
            if (this.silenceTimer) {
                clearTimeout(this.silenceTimer);
                this.silenceTimer = null;
            }
            return true;
        } else {
            if (this.isSpeaking && !this.silenceTimer) {
                this.silenceTimer = setTimeout(() => {
                    this.onSpeechEnd();
                }, this.silenceTimeout);
            }
            return this.isSpeaking;
        }
    }
    // 语音结束处理
    async onSpeechEnd() {
        if (this.isSpeaking) {
            const speechDuration = Date.now() - this.speechStartTime;
            if (speechDuration >= this.minSpeechDuration) {
                console.log(`语音结束，时长: ${speechDuration}ms`);
                await this.processAudioBuffer();
                // this.updateStatus('语音识别中...', 'processing');
                console.log('语音识别中')
            } else {
                console.log('说话时长太短，忽略');
                // this.updateStatus('等待语音输入...', 'ready');
                console.log('等待语音输入...')
            }
            this.isSpeaking = false;
            this.speechStartTime = null;
            this.audioBuffer = [];
        }
        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
    }
    // 处理音频缓冲区并发送到API
    async processAudioBuffer() {
        if (this.audioBuffer.length === 0) {
            return;
        }
        try {
            // 合并所有音频数据
            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
            const combinedBuffer = new Float32Array(totalLength);
            let offset = 0;
            for (const buffer of this.audioBuffer) {
                combinedBuffer.set(buffer, offset);
                offset += buffer.length;
            }
            // 转换为WAV格式并编码为base64
            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
            const base64Audio = this.arrayBufferToBase64(wavBuffer);
            // 调用ASR API
            await this.callASRAPI(base64Audio);
        } catch (error) {
            console.error('处理音频数据失败:', error);
            this.updateStatus('识别失败', 'error');
        }
    }
    // 调用ASR API
    async callASRAPI(base64AudioData) {
        try {
            const requestBody = {
                user: {
                    uid: "1988591469"
                },
                audio: {
                    data: base64AudioData
                },
                request: {
                    model_name: "bigmodel"
                }
            };
            const response = await fetch(this.apiConfig.url, {
                method: 'POST',
                headers: this.apiConfig.headers,
                body: JSON.stringify(requestBody)
            });
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const result = await response.json();
            this.handleASRResponse(result);
        } catch (error) {
            console.error('ASR API调用失败:', error);
            this.updateStatus('API调用失败', 'error');
        }
    }
    // 处理ASR响应
    handleASRResponse(response) {
        console.log('ASR响应:', response);
        if (response && response.data && response.data.result) {
            ASRTEXT = response.data.result;
            // this.displayResult(text);
            // this.updateStatus('识别完成', 'completed');
            console.log('识别完成')
        } else {
            console.log('未识别到文字');
            // this.updateStatus('未识别到文字', 'ready');
        }
    }
    // 显示识别结果
    displayResult(text) {
        const resultElement = document.createElement('div');
        resultElement.className = 'result-item';
        resultElement.innerHTML = `
            <span class="timestamp">${new Date().toLocaleTimeString()}</span>
            <span class="text">${text}</span>
        `;
        this.resultsDiv.appendChild(resultElement);
        this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
    }
    // 更新状态显示
    updateStatus(message, status) {
        this.statusDiv.textContent = message;
        this.statusDiv.className = `status ${status}`;
    }
    // 编码WAV格式
    encodeWAV(samples, sampleRate) {
        const length = samples.length;
        const buffer = new ArrayBuffer(44 + length * 2);
        const view = new DataView(buffer);
        // WAV文件头
        const writeString = (offset, string) => {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        };
        writeString(0, 'RIFF');
        view.setUint32(4, 36 + length * 2, true);
        writeString(8, 'WAVE');
        writeString(12, 'fmt ');
        view.setUint32(16, 16, true);
        view.setUint16(20, 1, true);
        view.setUint16(22, 1, true);
        view.setUint32(24, sampleRate, true);
        view.setUint32(28, sampleRate * 2, true);
        view.setUint16(32, 2, true);
        view.setUint16(34, 16, true);
        writeString(36, 'data');
        view.setUint32(40, length * 2, true);
        // 写入音频数据
        let offset = 44;
        for (let i = 0; i < length; i++) {
            const sample = Math.max(-1, Math.min(1, samples[i]));
            view.setInt16(offset, sample * 0x7FFF, true);
            offset += 2;
        }
        return buffer;
    }
    // ArrayBuffer转Base64
    arrayBufferToBase64(buffer) {
        let binary = '';
        const bytes = new Uint8Array(buffer);
        for (let i = 0; i < bytes.byteLength; i++) {
            binary += String.fromCharCode(bytes[i]);
        }
        return btoa(binary);
    }
    async startRecording() {
        try {
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: {
                    sampleRate: 16000,
                    channelCount: 1,
                    echoCancellation: true,
                    noiseSuppression: true
                }
            });
            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            });
            const source = this.audioContext.createMediaStreamSource(stream);
            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
            processor.onaudioprocess = (event) => {
                const inputBuffer = event.inputBuffer;
                const inputData = inputBuffer.getChannelData(0);
                // 语音活动检测
                if (this.detectVoiceActivity(inputData)) {
                    // 如果检测到语音活动，缓存音频数据
                    this.audioBuffer.push(new Float32Array(inputData));
                }
            };
            source.connect(processor);
            processor.connect(this.audioContext.destination);
            this.isRecording = true;
            this.recordBtn.textContent = '停止录音';
            this.recordBtn.className = 'btn recording';
            // this.updateStatus('等待语音输入...', 'ready');
        } catch (error) {
            console.error('启动录音失败:', error);
            // this.updateStatus('录音启动失败', 'error');
        }
    }
    stopRecording() {
        if (this.audioContext) {
            this.audioContext.close();
            this.audioContext = null;
        }
        if (this.silenceTimer) {
            clearTimeout(this.silenceTimer);
            this.silenceTimer = null;
        }
        // 如果正在说话，处理最后的音频
        if (this.isSpeaking) {
            this.onSpeechEnd();
        }
        this.isRecording = false;
        this.isSpeaking = false;
        this.audioBuffer = [];
        this.recordBtn.textContent = '开始录音';
        this.recordBtn.className = 'btn';
        console.log('录音已停止');
        // this.updateStatus('录音已停止', 'stopped');
    }
 }
 // 初始化应用
 document.addEventListener('DOMContentLoaded', () => {
    const asrRecognizer = new HttpASRRecognizer();
    console.log('HTTP ASR识别器已初始化');
 });