realtime 语音录取

2025-07-27 12:11:13 +08:00 · 2025-07-27 12:11:13 +08:00 · d808bbfe26
commit d808bbfe26
parent c95e6a2552
8 changed files with 1251 additions and 103 deletions
--- a/src/audio_processor.js
+++ b/src/audio_processor.js
@ -0,0 +1,322 @@
+// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
+
+class AudioProcessor {
+    constructor(options = {}) {
+        this.audioContext = null;
+        this.isRecording = false;
+        this.audioChunks = [];
+        
+        // VAD相关属性
+        this.isSpeaking = false;
+        this.silenceThreshold = options.silenceThreshold || 0.01;
+        this.silenceTimeout = options.silenceTimeout || 1000;
+        this.minSpeechDuration = options.minSpeechDuration || 300;
+        this.silenceTimer = null;
+        this.speechStartTime = null;
+        this.audioBuffer = [];
+        
+        // API配置
+        this.apiConfig = {
+            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
+            headers: {
+                'X-Api-App-Key': '1988591469',
+                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
+                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
+                'X-Api-Request-Id': this.generateUUID(),
+                'X-Api-Sequence': '-1',
+                'Content-Type': 'application/json'
+            }
+        };
+        
+        // 回调函数
+        this.onSpeechStart = options.onSpeechStart || (() => {});
+        this.onSpeechEnd = options.onSpeechEnd || (() => {});
+        this.onRecognitionResult = options.onRecognitionResult || (() => {});
+        this.onError = options.onError || (() => {});
+        this.onStatusUpdate = options.onStatusUpdate || (() => {});
+    }
+    
+    // 生成UUID
+    generateUUID() {
+        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+            const r = Math.random() * 16 | 0;
+            const v = c == 'x' ? r : (r & 0x3 | 0x8);
+            return v.toString(16);
+        });
+    }
+    
+    // 计算音频能量(音量)
+    calculateAudioLevel(audioData) {
+        let sum = 0;
+        for (let i = 0; i < audioData.length; i++) {
+            sum += audioData[i] * audioData[i];
+        }
+        return Math.sqrt(sum / audioData.length);
+    }
+    
+    // 语音活动检测
+    detectVoiceActivity(audioData) {
+        const audioLevel = this.calculateAudioLevel(audioData);
+        const currentTime = Date.now();
+        
+        if (audioLevel > this.silenceThreshold) {
+            if (!this.isSpeaking) {
+                this.isSpeaking = true;
+                this.speechStartTime = currentTime;
+                this.audioBuffer = [];
+                this.onSpeechStart();
+                this.onStatusUpdate('检测到语音，开始录音...', 'speaking');
+                console.log('开始说话');
+            }
+            
+            if (this.silenceTimer) {
+                clearTimeout(this.silenceTimer);
+                this.silenceTimer = null;
+            }
+            
+            return true;
+        } else {
+            if (this.isSpeaking && !this.silenceTimer) {
+                this.silenceTimer = setTimeout(() => {
+                    this.handleSpeechEnd();
+                }, this.silenceTimeout);
+            }
+            
+            return this.isSpeaking;
+        }
+    }
+    
+    // 语音结束处理
+    async handleSpeechEnd() {
+        if (this.isSpeaking) {
+            const speechDuration = Date.now() - this.speechStartTime;
+            
+            if (speechDuration >= this.minSpeechDuration) {
+                console.log(`语音结束，时长: ${speechDuration}ms`);
+                await this.processAudioBuffer();
+                this.onStatusUpdate('语音识别中...', 'processing');
+            } else {
+                console.log('说话时长太短，忽略');
+                this.onStatusUpdate('等待语音输入...', 'ready');
+            }
+            
+            this.isSpeaking = false;
+            this.speechStartTime = null;
+            this.audioBuffer = [];
+            this.onSpeechEnd();
+        }
+        
+        if (this.silenceTimer) {
+            clearTimeout(this.silenceTimer);
+            this.silenceTimer = null;
+        }
+    }
+    
+    // 处理音频缓冲区并发送到API
+    async processAudioBuffer() {
+        if (this.audioBuffer.length === 0) {
+            return;
+        }
+        
+        try {
+            // 合并所有音频数据
+            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
+            const combinedBuffer = new Float32Array(totalLength);
+            let offset = 0;
+            
+            for (const buffer of this.audioBuffer) {
+                combinedBuffer.set(buffer, offset);
+                offset += buffer.length;
+            }
+            
+            // 转换为WAV格式并编码为base64
+            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
+            const base64Audio = this.arrayBufferToBase64(wavBuffer);
+            
+            // 调用ASR API
+            await this.callASRAPI(base64Audio);
+            
+        } catch (error) {
+            console.error('处理音频数据失败:', error);
+            this.onError('处理音频数据失败: ' + error.message);
+        }
+    }
+    
+    // 调用ASR API
+    async callASRAPI(base64AudioData) {
+        try {
+            const requestBody = {
+                user: {
+                    uid: "1988591469"
+                },
+                audio: {
+                    data: base64AudioData
+                },
+                request: {
+                    model_name: "bigmodel"
+                }
+            };
+            
+            const response = await fetch(this.apiConfig.url, {
+                method: 'POST',
+                headers: this.apiConfig.headers,
+                body: JSON.stringify(requestBody)
+            });
+            
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`);
+            }
+            
+            const result = await response.json();
+            this.handleASRResponse(result);
+            
+        } catch (error) {
+            console.error('ASR API调用失败:', error);
+            this.onError('ASR API调用失败: ' + error.message);
+        }
+    }
+    
+    // 处理ASR响应
+    handleASRResponse(response) {
+        console.log('ASR响应:', response);
+        
+        if (response && response.result) {
+            const recognizedText = response.result.text;
+            this.onRecognitionResult(recognizedText);
+            this.onStatusUpdate('识别完成', 'completed');
+        } else {
+            console.log('未识别到文字');
+            this.onStatusUpdate('未识别到文字', 'ready');
+        }
+    }
+    
+    // 编码WAV格式
+    encodeWAV(samples, sampleRate) {
+        const length = samples.length;
+        const buffer = new ArrayBuffer(44 + length * 2);
+        const view = new DataView(buffer);
+        
+        // WAV文件头
+        const writeString = (offset, string) => {
+            for (let i = 0; i < string.length; i++) {
+                view.setUint8(offset + i, string.charCodeAt(i));
+            }
+        };
+        
+        writeString(0, 'RIFF');
+        view.setUint32(4, 36 + length * 2, true);
+        writeString(8, 'WAVE');
+        writeString(12, 'fmt ');
+        view.setUint32(16, 16, true);
+        view.setUint16(20, 1, true);
+        view.setUint16(22, 1, true);
+        view.setUint32(24, sampleRate, true);
+        view.setUint32(28, sampleRate * 2, true);
+        view.setUint16(32, 2, true);
+        view.setUint16(34, 16, true);
+        writeString(36, 'data');
+        view.setUint32(40, length * 2, true);
+        
+        // 写入音频数据
+        let offset = 44;
+        for (let i = 0; i < length; i++) {
+            const sample = Math.max(-1, Math.min(1, samples[i]));
+            view.setInt16(offset, sample * 0x7FFF, true);
+            offset += 2;
+        }
+        
+        return buffer;
+    }
+    
+    // ArrayBuffer转Base64
+    arrayBufferToBase64(buffer) {
+        let binary = '';
+        const bytes = new Uint8Array(buffer);
+        for (let i = 0; i < bytes.byteLength; i++) {
+            binary += String.fromCharCode(bytes[i]);
+        }
+        return btoa(binary);
+    }
+    
+    // 开始录音
+    async startRecording() {
+        try {
+            const stream = await navigator.mediaDevices.getUserMedia({
+                audio: {
+                    sampleRate: 16000,
+                    channelCount: 1,
+                    echoCancellation: true,
+                    noiseSuppression: true
+                }
+            });
+            
+            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+                sampleRate: 16000
+            });
+            
+            const source = this.audioContext.createMediaStreamSource(stream);
+            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
+            
+            processor.onaudioprocess = (event) => {
+                const inputBuffer = event.inputBuffer;
+                const inputData = inputBuffer.getChannelData(0);
+                
+                // 语音活动检测
+                if (this.detectVoiceActivity(inputData)) {
+                    // 如果检测到语音活动，缓存音频数据
+                    this.audioBuffer.push(new Float32Array(inputData));
+                }
+            };
+            
+            source.connect(processor);
+            processor.connect(this.audioContext.destination);
+            
+            this.isRecording = true;
+            this.onStatusUpdate('等待语音输入...', 'ready');
+            
+            return true;
+            
+        } catch (error) {
+            console.error('启动录音失败:', error);
+            this.onError('启动录音失败: ' + error.message);
+            return false;
+        }
+    }
+    
+    // 停止录音
+    stopRecording() {
+        if (this.audioContext) {
+            this.audioContext.close();
+            this.audioContext = null;
+        }
+        
+        if (this.silenceTimer) {
+            clearTimeout(this.silenceTimer);
+            this.silenceTimer = null;
+        }
+        
+        // 如果正在说话，处理最后的音频
+        if (this.isSpeaking) {
+            this.handleSpeechEnd();
+        }
+        
+        this.isRecording = false;
+        this.isSpeaking = false;
+        this.audioBuffer = [];
+        
+        this.onStatusUpdate('录音已停止', 'stopped');
+        console.log('录音已停止');
+    }
+    
+    // 获取录音状态
+    getRecordingStatus() {
+        return {
+            isRecording: this.isRecording,
+            isSpeaking: this.isSpeaking,
+            hasAudioContext: !!this.audioContext
+        };
+    }
+}
+
+// 导出模块
+export { AudioProcessor };
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@ -6,6 +6,9 @@ import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from

 // 防止重复播放的标志
 let isPlaying = false;
+// 音频播放队列
+let audioQueue = [];
+let isProcessingQueue = false;

 async function chatWithAudioStream(userInput) {
  // 验证配置
@ -20,31 +23,22 @@ async function chatWithAudioStream(userInput) {
  const minimaxiConfig = getMinimaxiConfig();
  const audioConfig = getAudioConfig();
  
-  // 1. 请求大模型回答
-  console.log('\n=== 请求大模型回答 ===');
-  const llmResponse = await requestLLMStream({
-    apiKey: llmConfig.apiKey,
-    model: llmConfig.model,
-    messages: [
-      { role: 'system', content: 'You are a helpful assistant.' },
-      { role: 'user', content: userInput },
-    ],
-  });
+  // 清空音频队列
+  audioQueue = [];
  
-  // 提取大模型回答内容（现在直接返回内容）
-  const llmContent = llmResponse;
+  // 定义段落处理函数
+  const handleSegment = async (segment) => {
+    console.log('\n=== 处理文本段落 ===');
+    console.log('段落内容:', segment);
    
-  console.log('\n=== 大模型回答 ===');
-  console.log("llmResponse: ", llmContent);
-  
-  // 2. 合成音频
-  console.log('\n=== 开始合成音频 ===');
+    try {
+      // 为每个段落生成音频
      const audioResult = await requestMinimaxi({
        apiKey: minimaxiConfig.apiKey,
        groupId: minimaxiConfig.groupId,
        body: {
          model: audioConfig.model,
-      text: llmContent,
+          text: segment,
          stream: audioConfig.stream,
          language_boost: audioConfig.language_boost,
          output_format: audioConfig.output_format,
@ -54,30 +48,70 @@ async function chatWithAudioStream(userInput) {
        stream: true,
      });
      
-  // 3. 流式播放音频
-  console.log('\n=== 开始流式播放音频 ===');
-  // console.log('音频数据长度:', audioResult.data.audio.length);
-  await playAudioStream(audioResult.data.audio);
+      // 将音频添加到播放队列
+      if (audioResult && audioResult.data && audioResult.data.audio) {
+        audioQueue.push({
+          text: segment,
+          audioHex: audioResult.data.audio
+        });
+        console.log('音频已添加到队列，队列长度:', audioQueue.length);
+        
+        // 开始处理队列
+        processAudioQueue();
+      }
+    } catch (error) {
+      console.error('生成音频失败:', error);
+    }
+  };
+  
+  // 1. 请求大模型回答，并实时处理段落
+  console.log('\n=== 请求大模型回答 ===');
+  const llmResponse = await requestLLMStream({
+    apiKey: llmConfig.apiKey,
+    model: llmConfig.model,
+    messages: [
+      { role: 'system', content: 'You are a helpful assistant.' },
+      { role: 'user', content: userInput },
+    ],
+    onSegment: handleSegment // 传入段落处理回调
+  });
+  
+  console.log('\n=== 大模型完整回答 ===');
+  console.log("llmResponse: ", llmResponse);
  
  return {
    userInput,
-    llmResponse: llmContent,
-    audioResult,
+    llmResponse,
+    audioQueue: audioQueue.map(item => ({ text: item.text, hasAudio: !!item.audioHex }))
  };
 }

+// 处理音频播放队列
+async function processAudioQueue() {
+  if (isProcessingQueue) return;
+  
+  isProcessingQueue = true;
+  
+  // while (audioQueue.length > 0) {
+  //   const audioItem = audioQueue.shift();
+  //   console.log('\n=== 播放队列中的音频 ===');
+  //   console.log('文本:', audioItem.text);
+    
+  //   try {
+  //     await playAudioStream(audioItem.audioHex);
+  //   } catch (error) {
+  //     console.error('播放音频失败:', error);
+  //   }
+  // }
+  
+  isProcessingQueue = false;
+}
+
 // 流式播放音频
 async function playAudioStream(audioHex) {
-  if (isPlaying) {
-    console.log('音频正在播放中，跳过重复播放');
-    return;
-  }
-  
  console.log('=== 开始播放音频 ===');
  console.log('音频数据长度:', audioHex.length);
  
-  isPlaying = true;
-  
  // 将hex转换为ArrayBuffer
  const audioBuffer = hexToArrayBuffer(audioHex);
  
@ -102,13 +136,11 @@ async function playAudioStream(audioHex) {
    return new Promise((resolve) => {
      source.onended = () => {
        console.log('音频播放完成');
-        isPlaying = false;
        resolve();
      };
    });
  } catch (error) {
    console.error('音频播放失败:', error);
-    isPlaying = false;
    throw error;
  }
 }
@ -175,4 +207,6 @@ async function playAudioStreamNode(audioHex) {
  }
 }

-export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; 
+
+
+export { chatWithAudioStream, playAudioStream, playAudioStreamNode};
--- a/src/config.js
+++ b/src/config.js
@ -16,11 +16,11 @@ export const config = {
  audio: {
    model: 'speech-02-hd',
    voiceSetting: {
-      voice_id: 'yantu-qinggang',
+      voice_id: 'yantu-qinggang-2',
      speed: 1,
      vol: 1,
      pitch: 0,
-      emotion: 'happy',
+      // emotion: 'happy',
    },
    audioSetting: {
      sample_rate: 32000,
--- a/副本.html
+++ b/副本.html
@ -0,0 +1,139 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>实时语音识别</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f5f5f5;
+        }
+        .container {
+            background: white;
+            padding: 30px;
+            border-radius: 10px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+        }
+        .controls {
+            text-align: center;
+            margin-bottom: 30px;
+        }
+        .record-btn {
+            background: #4CAF50;
+            color: white;
+            border: none;
+            padding: 15px 30px;
+            font-size: 18px;
+            border-radius: 50px;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+        .record-btn:hover {
+            background: #45a049;
+        }
+        .record-btn.recording {
+            background: #f44336;
+            animation: pulse 1s infinite;
+        }
+        @keyframes pulse {
+            0% { transform: scale(1); }
+            50% { transform: scale(1.05); }
+            100% { transform: scale(1); }
+        }
+        .status {
+            margin: 20px 0;
+            padding: 10px;
+            border-radius: 5px;
+            text-align: center;
+            font-weight: bold;
+        }
+        .status.connected {
+            background: #d4edda;
+            color: #155724;
+            border: 1px solid #c3e6cb;
+        }
+        .status.speaking {
+            background: #fff3cd;
+            color: #856404;
+            border: 1px solid #ffeaa7;
+            animation: speaking-pulse 0.5s infinite alternate;
+        }
+        .status.processing {
+            background: #cce7ff;
+            color: #004085;
+            border: 1px solid #99d6ff;
+        }
+        .status.disconnected {
+            background: #f8d7da;
+            color: #721c24;
+            border: 1px solid #f5c6cb;
+        }
+        @keyframes speaking-pulse {
+            0% { opacity: 0.7; }
+            100% { opacity: 1; }
+        }
+        .results {
+            max-height: 400px;
+            overflow-y: auto;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 15px;
+            background: #fafafa;
+        }
+        .result-item {
+            margin-bottom: 15px;
+            padding: 10px;
+            background: white;
+            border-radius: 5px;
+            border-left: 4px solid #4CAF50;
+        }
+        .timestamp {
+            font-size: 12px;
+            color: #666;
+            margin-bottom: 5px;
+        }
+        .text {
+            font-size: 16px;
+            line-height: 1.4;
+        }
+        .help {
+            margin-top: 20px;
+            padding: 15px;
+            background: #e3f2fd;
+            border-radius: 5px;
+            font-size: 14px;
+            color: #1565c0;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>实时语音识别</h1>
+        
+        <div class="controls">
+            <button id="recordBtn" class="record-btn">开始录音</button>
+        </div>
+        
+        <div id="status" class="status disconnected">未连接</div>
+        
+        <div class="help">
+            <strong>使用说明：</strong><br>
+            1. 点击"开始录音"按钮开启麦克风<br>
+            2. 系统会自动检测您的语音，只有在检测到说话时才开始录音<br>
+            3. 说话结束后会自动发送音频进行识别<br>
+            4. 识别结果会显示在下方区域
+        </div>
+        
+        <h3>识别结果：</h3>
+        <div id="results" class="results">
+            <!-- 识别结果将显示在这里 -->
+        </div>
+    </div>
+    
+    <script src="new_app.js"></script>
+</body>
+</html>
--- a/src/index.js
+++ b/src/index.js
@ -1,5 +1,6 @@
 // WebRTC 音视频通话应用
 import { chatWithAudioStream } from './chat_with_audio.js';
+import { AudioProcessor } from './audio_processor.js';

 class WebRTCChat {
    constructor() {
@ -15,6 +16,30 @@ class WebRTCChat {
        this.videoStreams = new Map(); // 存储不同视频的MediaStream
        this.currentVideoStream = null;
        
+        // 初始化音频处理器
+        this.audioProcessor = new AudioProcessor({
+            onSpeechStart: () => {
+                this.voiceStatus.textContent = '检测到语音，开始录音...';
+                this.logMessage('检测到语音，开始录音...', 'info');
+            },
+            onSpeechEnd: () => {
+                // 语音结束回调
+            },
+            onRecognitionResult: (text) => {
+                // ASRTEXT = text;
+                this.voiceStatus.textContent = '识别完成';
+                this.logMessage(`语音识别结果: ${text}`, 'success');
+                this.handleVoiceInput(text);
+            },
+            onError: (error) => {
+                this.voiceStatus.textContent = '识别失败';
+                this.logMessage(error, 'error');
+            },
+            onStatusUpdate: (message, status) => {
+                this.voiceStatus.textContent = message;
+            }
+        });
+        
        this.initializeElements();
        this.initializeSocket();
        this.loadVideoMapping();
@ -627,65 +652,34 @@ class WebRTCChat {
        });
    }

+    // 修改：使用音频处理器的语音录制功能
    async startVoiceRecording() {
-        try {
-            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-            this.mediaRecorder = new MediaRecorder(stream);
-            this.audioChunks = [];
-            
-            this.mediaRecorder.ondataavailable = (event) => {
-                this.audioChunks.push(event.data);
-            };
-            
-            this.mediaRecorder.onstop = () => {
-                const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
-                this.processVoiceInput(audioBlob);
-            };
-            
-            this.mediaRecorder.start();
-            this.isRecording = true;
+        const success = await this.audioProcessor.startRecording();
        
+        if (success) {
            this.startVoiceButton.disabled = true;
            this.stopVoiceButton.disabled = false;
-            this.voiceStatus.textContent = '正在录音...';
            this.startVoiceButton.classList.add('recording');
-            
-            this.logMessage('开始语音录制', 'info');
-        } catch (error) {
-            this.logMessage('无法访问麦克风: ' + error.message, 'error');
+            this.voiceStatus.textContent = '等待语音输入...';
+            this.logMessage('高级语音录制已启动', 'success');
+        } else {
+            this.voiceStatus.textContent = '录音启动失败';
        }
    }

+    // 修改：停止语音录制
    stopVoiceRecording() {
-        if (this.mediaRecorder && this.isRecording) {
-            this.mediaRecorder.stop();
-            this.isRecording = false;
+        this.audioProcessor.stopRecording();
        
        this.startVoiceButton.disabled = false;
        this.stopVoiceButton.disabled = true;
-            this.voiceStatus.textContent = '点击开始语音输入';
        this.startVoiceButton.classList.remove('recording');
+        this.voiceStatus.textContent = '点击开始语音输入';
        
-            this.logMessage('停止语音录制', 'info');
-        }
-    }
-
-    async processVoiceInput(audioBlob) {
-        // 这里可以集成语音识别API，如Web Speech API或第三方服务
-        // 为了演示，我们使用一个简单的模拟识别
-        const mockText = this.simulateSpeechRecognition();
-        
-        this.socket.emit('voice-input', {
-            audioData: audioBlob,
-            text: mockText
-        });
-        
-        this.logMessage(`语音识别结果: ${mockText}`, 'info');
-        
-        // 根据语音识别结果切换视频流
-        await this.handleVoiceInput(mockText);
+        this.logMessage('语音录制已停止', 'info');
    }

+    // 处理语音输入结果
    async handleVoiceInput(text) {
        // 根据文本查找对应视频
        let videoFile = this.videoMapping['默认'] || this.defaultVideo;
@ -705,7 +699,20 @@ class WebRTCChat {
            type: 'voice', 
            text 
        });
+        
+        // 调用大模型处理
+        try {
+            this.logMessage('正在处理语音输入，请稍候...', 'info');
+            const result = await chatWithAudioStream(text);
+            this.logMessage(`大模型回答: ${result.llmResponse}`, 'success');
+        } catch (error) {
+            this.logMessage(`处理语音输入失败: ${error.message}`, 'error');
+            console.error('chatWithAudioStream error:', error);
        }
+    }
+
+    // 删除原有的简单音频处理方法
+    // processVoiceInput() 和 simulateSpeechRecognition() 方法已被移除

    simulateSpeechRecognition() {
        // 模拟语音识别，随机返回预设的文本
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@ -1,6 +1,6 @@
 // 以流式方式请求LLM大模型接口，并打印流式返回内容

-async function requestLLMStream({ apiKey, model, messages }) {
+async function requestLLMStream({ apiKey, model, messages, onSegment }) {
  const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
    method: 'POST',
    headers: {
@ -26,6 +26,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
  let done = false;
  let buffer = '';
  let content = '';
+  let pendingText = ''; // 待处理的文本片段
+
+  // 分段分隔符
+  const segmentDelimiters = /[，。：；！？,.:;!?]/;

  while (!done) {
    const { value, done: doneReading } = await reader.read();
@ -47,6 +51,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
          
          if (jsonStr === '[DONE]') {
            console.log('LLM SSE流结束');
+            // 处理最后的待处理文本
+            if (pendingText.trim() && onSegment) {
+              await onSegment(pendingText.trim());
+            }
            continue;
          }
          
@ -55,7 +63,29 @@ async function requestLLMStream({ apiKey, model, messages }) {
            if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
              const deltaContent = obj.choices[0].delta.content;
              content += deltaContent;
+              pendingText += deltaContent;
              console.log('LLM内容片段:', deltaContent);
+              
+              // 检查是否包含分段分隔符
+              if (segmentDelimiters.test(pendingText)) {
+                // 按分隔符分割文本
+                const segments = pendingText.split(segmentDelimiters);
+                
+                // 处理完整的段落（除了最后一个，因为可能不完整）
+                for (let i = 0; i < segments.length - 1; i++) {
+                  const segment = segments[i].trim();
+                  if (segment && onSegment) {
+                    // 找到对应的分隔符
+                    const delimiterMatch = pendingText.match(segmentDelimiters);
+                    const segmentWithDelimiter = segment + (delimiterMatch ? delimiterMatch[0] : '');
+                    console.log('检测到完整段落:', segmentWithDelimiter);
+                    await onSegment(segmentWithDelimiter);
+                  }
+                }
+                
+                // 保留最后一个不完整的段落
+                pendingText = segments[segments.length - 1] || '';
+              }
            }
          } catch (e) {
            console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@ -1,5 +1,135 @@
 // 以流式或非流式方式请求 minimaxi 大模型接口，并打印/返回内容

+// 在文件顶部添加音频播放相关的变量和函数
+let audioContext = null;
+let audioQueue = []; // 音频队列
+let isPlaying = false;
+let isProcessingQueue = false; // 队列处理状态
+let nextStartTime = 0; // 添加这行来声明 nextStartTime 变量
+
+// 初始化音频上下文
+function initAudioContext() {
+  if (!audioContext) {
+    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+  }
+  return audioContext;
+}
+
+// 将hex字符串转换为ArrayBuffer
+function hexToArrayBuffer(hex) {
+  const bytes = new Uint8Array(hex.length / 2);
+  for (let i = 0; i < hex.length; i += 2) {
+    bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
+  }
+  return bytes.buffer;
+}
+
+// 将音频添加到队列（不等待播放）
+async function addAudioToQueue(audioHex) {
+  if (!audioHex || audioHex.length === 0) return;
+  
+  try {
+    const ctx = initAudioContext();
+    const audioBuffer = hexToArrayBuffer(audioHex);
+    const audioData = await ctx.decodeAudioData(audioBuffer);
+    
+    // 将解码后的音频数据添加到队列
+    audioQueue.push({
+      audioData,
+      timestamp: Date.now()
+    });
+    
+    console.log(`音频已添加到队列，队列长度: ${audioQueue.length}`);
+    
+    // 启动队列处理器（如果还没有运行）
+    if (!isProcessingQueue) {
+      processAudioQueue();
+    }
+    
+  } catch (error) {
+    console.error('音频解码失败:', error);
+  }
+}
+
+// 队列处理器 - 独立运行，按顺序播放音频
+async function processAudioQueue() {
+  if (isProcessingQueue) return;
+  
+  isProcessingQueue = true;
+  console.log('开始处理音频队列');
+  
+  while (audioQueue.length > 0 || isPlaying) {
+    // 如果当前没有音频在播放，且队列中有音频
+    if (!isPlaying && audioQueue.length > 0) {
+      const audioItem = audioQueue.shift();
+      await playAudioData(audioItem.audioData);
+    } else {
+      // 等待一小段时间再检查
+      await new Promise(resolve => setTimeout(resolve, 50));
+    }
+  }
+  
+  isProcessingQueue = false;
+  console.log('音频队列处理完成');
+}
+
+// 播放单个音频数据
+function playAudioData(audioData) {
+  return new Promise((resolve) => {
+    try {
+      const ctx = initAudioContext();
+      const source = ctx.createBufferSource();
+      source.buffer = audioData;
+      source.connect(ctx.destination);
+      
+      isPlaying = true;
+      
+      source.onended = () => {
+        console.log('音频片段播放完成');
+        isPlaying = false;
+        resolve();
+      };
+      
+      // 超时保护
+      setTimeout(() => {
+        if (isPlaying) {
+          console.log('音频播放超时，强制结束');
+          isPlaying = false;
+          resolve();
+        }
+      }, (audioData.duration + 0.5) * 1000);
+      
+      source.start(0);
+      console.log(`开始播放音频片段，时长: ${audioData.duration}秒`);
+      
+    } catch (error) {
+      console.error('播放音频失败:', error);
+      isPlaying = false;
+      resolve();
+    }
+  });
+}
+
+// 修改原来的playAudioChunk函数，改为addAudioToQueue
+const playAudioChunk = addAudioToQueue;
+
+// 清空音频队列
+function clearAudioQueue() {
+  audioQueue.length = 0;
+  console.log('音频队列已清空');
+}
+
+// 获取队列状态
+function getQueueStatus() {
+  return {
+    queueLength: audioQueue.length,
+    isPlaying,
+    isProcessingQueue
+  };
+}
+
+// 移除waitForCurrentAudioToFinish函数，不再需要
+
 async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  const url = `https://api.minimaxi.com/v1/t2a_v2`;
  const reqBody = { ...body, stream };
@ -24,7 +154,7 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
    console.log(JSON.stringify(result, null, 2));
    return result;
  } else {
-    // 流式，解析每个chunk，合并audio
+    // 流式，解析每个chunk，实时播放音频
    const reader = response.body.getReader();
    const decoder = new TextDecoder('utf-8');
    let done = false;
@ -32,25 +162,28 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
    let audioHex = '';
    let lastFullResult = null;
    
+    // 重置播放状态
+    nextStartTime = 0;
+    if (audioContext) {
+      nextStartTime = audioContext.currentTime;
+    }
+
    while (!done) {
      const { value, done: doneReading } = await reader.read();
      done = doneReading;
      if (value) {
        const chunk = decoder.decode(value, { stream: true });
        buffer += chunk;
-        // console.log('收到原始chunk:', chunk);
        
        // 处理SSE格式的数据（以\n分割）
        let lines = buffer.split('\n');
        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
        for (const line of lines) {
          if (!line.trim()) continue;
-          // console.log('处理行:', line);
          
          // 检查是否是SSE格式的数据行
          if (line.startsWith('data:')) {
            const jsonStr = line.substring(6); // 移除 'data: ' 前缀
-            // console.log('提取的JSON字符串:', jsonStr);
            
            if (jsonStr.trim() === '[DONE]') {
              console.log('SSE流结束');
@ -59,17 +192,19 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
            
            try {
              const obj = JSON.parse(jsonStr);
-              // 流式，解析每个chunk，合并audio
-              if (obj.data && obj.data.audio) {
+              // 流式，解析每个chunk，实时播放音频
+              if (obj.data && obj.data.audio && obj.data.status === 1) {
+                console.log('收到音频数据片段!', obj.data.audio.length);
                audioHex += obj.data.audio;
+                
+                // 立即播放这个音频片段
+                await playAudioChunk(obj.data.audio);
              }
              // status=2为最后一个chunk，记录完整结构
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
                console.log('收到最终状态');
              }
-              // 实时打印每个chunk
-              console.log('解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析SSE数据失败:', e, '原始数据:', jsonStr);
            }
@ -83,7 +218,11 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
            try {
              const obj = JSON.parse(line);
              if (obj.data && obj.data.audio) {
+                console.log('收到无data:音频数据!', obj.data.audio.length);
                audioHex += obj.data.audio;
+                
+                // 立即播放这个音频片段
+                await playAudioChunk(obj.data.audio);
              }
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
@ -109,4 +248,135 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  }
 }

-export { requestMinimaxi };
+// 火山引擎TTS方法
+async function requestVolcanTTS({ 
+  appId, 
+  accessKey, 
+  resourceId = 'volc.service_type.10029', 
+  appKey = 'aGjiRDfUWi',
+  body, 
+  stream = true 
+}) {
+  const url = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
+  
+  // 生成请求ID
+  const requestId = generateUUID();
+  
+  const response = await fetch(url, {
+    method: 'POST',
+    headers: {
+      'X-Api-App-Id': appId,
+      'X-Api-Access-Key': accessKey,
+      'X-Api-Resource-Id': resourceId,
+      'X-Api-App-Key': appKey,
+      'X-Api-Request-Id': requestId,
+      'Content-Type': 'application/json',
+      'Accept': stream ? 'text/event-stream' : 'application/json',
+      'Cache-Control': 'no-cache',
+    },
+    body: JSON.stringify(body),
+  });
+
+  if (!response.ok) {
+    throw new Error(`HTTP error! status: ${response.status}`);
+  }
+
+  if (!stream) {
+    // 非流式，直接返回JSON
+    const result = await response.json();
+    console.log('火山引擎TTS非流式结果:', JSON.stringify(result, null, 2));
+    return result;
+  } else {
+    // 流式，解析每个chunk，合并audio
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder('utf-8');
+    let done = false;
+    let buffer = '';
+    let audioBase64 = '';
+    let lastFullResult = null;
+
+    while (!done) {
+      const { value, done: doneReading } = await reader.read();
+      done = doneReading;
+      if (value) {
+        const chunk = decoder.decode(value, { stream: true });
+        buffer += chunk;
+        
+        // 处理SSE格式的数据（以\n分割）
+        let lines = buffer.split('\n');
+        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
+        
+        for (const line of lines) {
+          if (!line.trim()) continue;
+          
+          // 检查是否是SSE格式的数据行
+          if (line.startsWith('data:')) {
+            const jsonStr = line.substring(6); // 移除 'data: ' 前缀
+            
+            if (jsonStr.trim() === '[DONE]') {
+              console.log('火山引擎TTS流结束');
+              continue;
+            }
+            
+            try {
+              const obj = JSON.parse(jsonStr);
+              // 流式，解析每个chunk，合并audio base64数据
+              if (obj.data) {
+                audioBase64 += obj.data;
+                lastFullResult = obj;
+              }
+              // 实时打印每个chunk
+              console.log('火山引擎TTS解析成功:', JSON.stringify(obj));
+            } catch (e) {
+              console.error('解析火山引擎TTS数据失败:', e, '原始数据:', jsonStr);
+            }
+          } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
+            // 忽略SSE的其他字段
+            console.log('忽略SSE字段:', line);
+            continue;
+          } else if (line.trim() && !line.startsWith('data:')) {
+            // 尝试直接解析（兼容非SSE格式）
+            try {
+              const obj = JSON.parse(line);
+              if (obj.data) {
+                audioBase64 += obj.data;
+                lastFullResult = obj;
+              }
+              console.log('火山引擎TTS直接解析成功:', JSON.stringify(obj));
+            } catch (e) {
+              console.error('解析火山引擎TTS chunk失败:', e, line);
+            }
+          }
+        }
+      }
+    }
+    
+    // 合成最终结构
+    console.log('火山引擎TTS音频数据总长度:', audioBase64.length);
+    
+    if (lastFullResult) {
+      // 更新最终结果的音频数据
+      lastFullResult.data = audioBase64;
+      console.log('火山引擎TTS最终合成结果:', JSON.stringify(lastFullResult, null, 2));
+      return lastFullResult;
+    } else {
+      // 没有完整结构，返回合成的audio
+      return { 
+        code: 0, 
+        message: '', 
+        data: audioBase64 
+      };
+    }
+  }
+}
+
+// 生成UUID的辅助函数
+function generateUUID() {
+  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+    const r = Math.random() * 16 | 0;
+    const v = c === 'x' ? r : (r & 0x3 | 0x8);
+    return v.toString(16);
+  });
+}
+
+export { requestMinimaxi, requestVolcanTTS };
--- a/src/new_app.js
+++ b/src/new_app.js
@ -0,0 +1,346 @@
+let ASRTEXT = ''
+
+class HttpASRRecognizer {
+    constructor() {
+        this.mediaRecorder = null;
+        this.audioContext = null;
+        this.isRecording = false;
+        this.audioChunks = [];
+        
+        // VAD相关属性
+        this.isSpeaking = false;
+        this.silenceThreshold = 0.01;
+        this.silenceTimeout = 1000;
+        this.minSpeechDuration = 300;
+        this.silenceTimer = null;
+        this.speechStartTime = null;
+        this.audioBuffer = [];
+        
+        // API配置
+        this.apiConfig = {
+            url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
+            headers: {
+                'X-Api-App-Key': '1988591469',
+                'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
+                'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
+                'X-Api-Request-Id': this.generateUUID(),
+                'X-Api-Sequence': '-1',
+                'Content-Type': 'application/json'
+            }
+        };
+        
+        this.recordBtn = document.getElementById('startVoiceButton');
+        this.statusDiv = document.getElementById('status');
+        this.resultsDiv = document.getElementById('results');
+        
+        this.initEventListeners();
+    }
+    
+    initEventListeners() {
+        this.recordBtn.addEventListener('click', () => {
+            if (this.isRecording) {
+                this.stopRecording();
+            } else {
+                this.startRecording();
+            }
+        });
+    }
+    
+    // 生成UUID
+    generateUUID() {
+        return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+            const r = Math.random() * 16 | 0;
+            const v = c == 'x' ? r : (r & 0x3 | 0x8);
+            return v.toString(16);
+        });
+    }
+    
+    // 计算音频能量(音量)
+    calculateAudioLevel(audioData) {
+        let sum = 0;
+        for (let i = 0; i < audioData.length; i++) {
+            sum += audioData[i] * audioData[i];
+        }
+        return Math.sqrt(sum / audioData.length);
+    }
+    
+    // 语音活动检测
+    detectVoiceActivity(audioData) {
+        const audioLevel = this.calculateAudioLevel(audioData);
+        const currentTime = Date.now();
+        
+        if (audioLevel > this.silenceThreshold) {
+            if (!this.isSpeaking) {
+                this.isSpeaking = true;
+                this.speechStartTime = currentTime;
+                this.audioBuffer = [];
+                this.updateStatus('检测到语音，开始录音...', 'speaking');
+                console.log('开始说话');
+            }
+            
+            if (this.silenceTimer) {
+                clearTimeout(this.silenceTimer);
+                this.silenceTimer = null;
+            }
+            
+            return true;
+        } else {
+            if (this.isSpeaking && !this.silenceTimer) {
+                this.silenceTimer = setTimeout(() => {
+                    this.onSpeechEnd();
+                }, this.silenceTimeout);
+            }
+            
+            return this.isSpeaking;
+        }
+    }
+    
+    // 语音结束处理
+    async onSpeechEnd() {
+        if (this.isSpeaking) {
+            const speechDuration = Date.now() - this.speechStartTime;
+            
+            if (speechDuration >= this.minSpeechDuration) {
+                console.log(`语音结束，时长: ${speechDuration}ms`);
+                await this.processAudioBuffer();
+                // this.updateStatus('语音识别中...', 'processing');
+                console.log('语音识别中')
+            } else {
+                console.log('说话时长太短，忽略');
+                // this.updateStatus('等待语音输入...', 'ready');
+                console.log('等待语音输入...')
+
+            }
+            
+            this.isSpeaking = false;
+            this.speechStartTime = null;
+            this.audioBuffer = [];
+        }
+        
+        if (this.silenceTimer) {
+            clearTimeout(this.silenceTimer);
+            this.silenceTimer = null;
+        }
+    }
+    
+    // 处理音频缓冲区并发送到API
+    async processAudioBuffer() {
+        if (this.audioBuffer.length === 0) {
+            return;
+        }
+        
+        try {
+            // 合并所有音频数据
+            const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
+            const combinedBuffer = new Float32Array(totalLength);
+            let offset = 0;
+            
+            for (const buffer of this.audioBuffer) {
+                combinedBuffer.set(buffer, offset);
+                offset += buffer.length;
+            }
+            
+            // 转换为WAV格式并编码为base64
+            const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
+            const base64Audio = this.arrayBufferToBase64(wavBuffer);
+            
+            // 调用ASR API
+            await this.callASRAPI(base64Audio);
+            
+        } catch (error) {
+            console.error('处理音频数据失败:', error);
+            this.updateStatus('识别失败', 'error');
+        }
+    }
+    
+    // 调用ASR API
+    async callASRAPI(base64AudioData) {
+        try {
+            const requestBody = {
+                user: {
+                    uid: "1988591469"
+                },
+                audio: {
+                    data: base64AudioData
+                },
+                request: {
+                    model_name: "bigmodel"
+                }
+            };
+            
+            const response = await fetch(this.apiConfig.url, {
+                method: 'POST',
+                headers: this.apiConfig.headers,
+                body: JSON.stringify(requestBody)
+            });
+            
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`);
+            }
+            
+            const result = await response.json();
+            this.handleASRResponse(result);
+            
+        } catch (error) {
+            console.error('ASR API调用失败:', error);
+            this.updateStatus('API调用失败', 'error');
+        }
+    }
+    
+    // 处理ASR响应
+    handleASRResponse(response) {
+        console.log('ASR响应:', response);
+        
+        if (response && response.data && response.data.result) {
+            ASRTEXT = response.data.result;
+            // this.displayResult(text);
+            // this.updateStatus('识别完成', 'completed');
+            console.log('识别完成')
+        } else {
+            console.log('未识别到文字');
+            // this.updateStatus('未识别到文字', 'ready');
+
+        }
+    }
+    
+    // 显示识别结果
+    displayResult(text) {
+        const resultElement = document.createElement('div');
+        resultElement.className = 'result-item';
+        resultElement.innerHTML = `
+            <span class="timestamp">${new Date().toLocaleTimeString()}</span>
+            <span class="text">${text}</span>
+        `;
+        this.resultsDiv.appendChild(resultElement);
+        this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
+    }
+    
+    // 更新状态显示
+    updateStatus(message, status) {
+        this.statusDiv.textContent = message;
+        this.statusDiv.className = `status ${status}`;
+    }
+    
+    // 编码WAV格式
+    encodeWAV(samples, sampleRate) {
+        const length = samples.length;
+        const buffer = new ArrayBuffer(44 + length * 2);
+        const view = new DataView(buffer);
+        
+        // WAV文件头
+        const writeString = (offset, string) => {
+            for (let i = 0; i < string.length; i++) {
+                view.setUint8(offset + i, string.charCodeAt(i));
+            }
+        };
+        
+        writeString(0, 'RIFF');
+        view.setUint32(4, 36 + length * 2, true);
+        writeString(8, 'WAVE');
+        writeString(12, 'fmt ');
+        view.setUint32(16, 16, true);
+        view.setUint16(20, 1, true);
+        view.setUint16(22, 1, true);
+        view.setUint32(24, sampleRate, true);
+        view.setUint32(28, sampleRate * 2, true);
+        view.setUint16(32, 2, true);
+        view.setUint16(34, 16, true);
+        writeString(36, 'data');
+        view.setUint32(40, length * 2, true);
+        
+        // 写入音频数据
+        let offset = 44;
+        for (let i = 0; i < length; i++) {
+            const sample = Math.max(-1, Math.min(1, samples[i]));
+            view.setInt16(offset, sample * 0x7FFF, true);
+            offset += 2;
+        }
+        
+        return buffer;
+    }
+    
+    // ArrayBuffer转Base64
+    arrayBufferToBase64(buffer) {
+        let binary = '';
+        const bytes = new Uint8Array(buffer);
+        for (let i = 0; i < bytes.byteLength; i++) {
+            binary += String.fromCharCode(bytes[i]);
+        }
+        return btoa(binary);
+    }
+    
+    async startRecording() {
+        try {
+            const stream = await navigator.mediaDevices.getUserMedia({
+                audio: {
+                    sampleRate: 16000,
+                    channelCount: 1,
+                    echoCancellation: true,
+                    noiseSuppression: true
+                }
+            });
+            
+            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+                sampleRate: 16000
+            });
+            
+            const source = this.audioContext.createMediaStreamSource(stream);
+            const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
+            
+            processor.onaudioprocess = (event) => {
+                const inputBuffer = event.inputBuffer;
+                const inputData = inputBuffer.getChannelData(0);
+                
+                // 语音活动检测
+                if (this.detectVoiceActivity(inputData)) {
+                    // 如果检测到语音活动，缓存音频数据
+                    this.audioBuffer.push(new Float32Array(inputData));
+                }
+            };
+            
+            source.connect(processor);
+            processor.connect(this.audioContext.destination);
+            
+            this.isRecording = true;
+            this.recordBtn.textContent = '停止录音';
+            this.recordBtn.className = 'btn recording';
+            // this.updateStatus('等待语音输入...', 'ready');
+            
+        } catch (error) {
+            console.error('启动录音失败:', error);
+            // this.updateStatus('录音启动失败', 'error');
+        }
+    }
+    
+    stopRecording() {
+        if (this.audioContext) {
+            this.audioContext.close();
+            this.audioContext = null;
+        }
+        
+        if (this.silenceTimer) {
+            clearTimeout(this.silenceTimer);
+            this.silenceTimer = null;
+        }
+        
+        // 如果正在说话，处理最后的音频
+        if (this.isSpeaking) {
+            this.onSpeechEnd();
+        }
+        
+        this.isRecording = false;
+        this.isSpeaking = false;
+        this.audioBuffer = [];
+        
+        this.recordBtn.textContent = '开始录音';
+        this.recordBtn.className = 'btn';
+        console.log('录音已停止');
+        // this.updateStatus('录音已停止', 'stopped');
+    }
+}
+
+// 初始化应用
+document.addEventListener('DOMContentLoaded', () => {
+    const asrRecognizer = new HttpASRRecognizer();
+    console.log('HTTP ASR识别器已初始化');
+});