realtime 语音录取
This commit is contained in:
		
							parent
							
								
									c95e6a2552
								
							
						
					
					
						commit
						d808bbfe26
					
				
							
								
								
									
										322
									
								
								src/audio_processor.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										322
									
								
								src/audio_processor.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,322 @@ | ||||
| // 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
 | ||||
| 
 | ||||
| class AudioProcessor { | ||||
|     constructor(options = {}) { | ||||
|         this.audioContext = null; | ||||
|         this.isRecording = false; | ||||
|         this.audioChunks = []; | ||||
|          | ||||
|         // VAD相关属性
 | ||||
|         this.isSpeaking = false; | ||||
|         this.silenceThreshold = options.silenceThreshold || 0.01; | ||||
|         this.silenceTimeout = options.silenceTimeout || 1000; | ||||
|         this.minSpeechDuration = options.minSpeechDuration || 300; | ||||
|         this.silenceTimer = null; | ||||
|         this.speechStartTime = null; | ||||
|         this.audioBuffer = []; | ||||
|          | ||||
|         // API配置
 | ||||
|         this.apiConfig = { | ||||
|             url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash', | ||||
|             headers: { | ||||
|                 'X-Api-App-Key': '1988591469', | ||||
|                 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r', | ||||
|                 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo', | ||||
|                 'X-Api-Request-Id': this.generateUUID(), | ||||
|                 'X-Api-Sequence': '-1', | ||||
|                 'Content-Type': 'application/json' | ||||
|             } | ||||
|         }; | ||||
|          | ||||
|         // 回调函数
 | ||||
|         this.onSpeechStart = options.onSpeechStart || (() => {}); | ||||
|         this.onSpeechEnd = options.onSpeechEnd || (() => {}); | ||||
|         this.onRecognitionResult = options.onRecognitionResult || (() => {}); | ||||
|         this.onError = options.onError || (() => {}); | ||||
|         this.onStatusUpdate = options.onStatusUpdate || (() => {}); | ||||
|     } | ||||
|      | ||||
|     // 生成UUID
 | ||||
|     generateUUID() { | ||||
|         return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { | ||||
|             const r = Math.random() * 16 | 0; | ||||
|             const v = c == 'x' ? r : (r & 0x3 | 0x8); | ||||
|             return v.toString(16); | ||||
|         }); | ||||
|     } | ||||
|      | ||||
|     // 计算音频能量(音量)
 | ||||
|     calculateAudioLevel(audioData) { | ||||
|         let sum = 0; | ||||
|         for (let i = 0; i < audioData.length; i++) { | ||||
|             sum += audioData[i] * audioData[i]; | ||||
|         } | ||||
|         return Math.sqrt(sum / audioData.length); | ||||
|     } | ||||
|      | ||||
|     // 语音活动检测
 | ||||
|     detectVoiceActivity(audioData) { | ||||
|         const audioLevel = this.calculateAudioLevel(audioData); | ||||
|         const currentTime = Date.now(); | ||||
|          | ||||
|         if (audioLevel > this.silenceThreshold) { | ||||
|             if (!this.isSpeaking) { | ||||
|                 this.isSpeaking = true; | ||||
|                 this.speechStartTime = currentTime; | ||||
|                 this.audioBuffer = []; | ||||
|                 this.onSpeechStart(); | ||||
|                 this.onStatusUpdate('检测到语音,开始录音...', 'speaking'); | ||||
|                 console.log('开始说话'); | ||||
|             } | ||||
|              | ||||
|             if (this.silenceTimer) { | ||||
|                 clearTimeout(this.silenceTimer); | ||||
|                 this.silenceTimer = null; | ||||
|             } | ||||
|              | ||||
|             return true; | ||||
|         } else { | ||||
|             if (this.isSpeaking && !this.silenceTimer) { | ||||
|                 this.silenceTimer = setTimeout(() => { | ||||
|                     this.handleSpeechEnd(); | ||||
|                 }, this.silenceTimeout); | ||||
|             } | ||||
|              | ||||
|             return this.isSpeaking; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 语音结束处理
 | ||||
|     async handleSpeechEnd() { | ||||
|         if (this.isSpeaking) { | ||||
|             const speechDuration = Date.now() - this.speechStartTime; | ||||
|              | ||||
|             if (speechDuration >= this.minSpeechDuration) { | ||||
|                 console.log(`语音结束,时长: ${speechDuration}ms`); | ||||
|                 await this.processAudioBuffer(); | ||||
|                 this.onStatusUpdate('语音识别中...', 'processing'); | ||||
|             } else { | ||||
|                 console.log('说话时长太短,忽略'); | ||||
|                 this.onStatusUpdate('等待语音输入...', 'ready'); | ||||
|             } | ||||
|              | ||||
|             this.isSpeaking = false; | ||||
|             this.speechStartTime = null; | ||||
|             this.audioBuffer = []; | ||||
|             this.onSpeechEnd(); | ||||
|         } | ||||
|          | ||||
|         if (this.silenceTimer) { | ||||
|             clearTimeout(this.silenceTimer); | ||||
|             this.silenceTimer = null; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 处理音频缓冲区并发送到API
 | ||||
|     async processAudioBuffer() { | ||||
|         if (this.audioBuffer.length === 0) { | ||||
|             return; | ||||
|         } | ||||
|          | ||||
|         try { | ||||
|             // 合并所有音频数据
 | ||||
|             const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0); | ||||
|             const combinedBuffer = new Float32Array(totalLength); | ||||
|             let offset = 0; | ||||
|              | ||||
|             for (const buffer of this.audioBuffer) { | ||||
|                 combinedBuffer.set(buffer, offset); | ||||
|                 offset += buffer.length; | ||||
|             } | ||||
|              | ||||
|             // 转换为WAV格式并编码为base64
 | ||||
|             const wavBuffer = this.encodeWAV(combinedBuffer, 16000); | ||||
|             const base64Audio = this.arrayBufferToBase64(wavBuffer); | ||||
|              | ||||
|             // 调用ASR API
 | ||||
|             await this.callASRAPI(base64Audio); | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('处理音频数据失败:', error); | ||||
|             this.onError('处理音频数据失败: ' + error.message); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 调用ASR API
 | ||||
|     async callASRAPI(base64AudioData) { | ||||
|         try { | ||||
|             const requestBody = { | ||||
|                 user: { | ||||
|                     uid: "1988591469" | ||||
|                 }, | ||||
|                 audio: { | ||||
|                     data: base64AudioData | ||||
|                 }, | ||||
|                 request: { | ||||
|                     model_name: "bigmodel" | ||||
|                 } | ||||
|             }; | ||||
|              | ||||
|             const response = await fetch(this.apiConfig.url, { | ||||
|                 method: 'POST', | ||||
|                 headers: this.apiConfig.headers, | ||||
|                 body: JSON.stringify(requestBody) | ||||
|             }); | ||||
|              | ||||
|             if (!response.ok) { | ||||
|                 throw new Error(`HTTP error! status: ${response.status}`); | ||||
|             } | ||||
|              | ||||
|             const result = await response.json(); | ||||
|             this.handleASRResponse(result); | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('ASR API调用失败:', error); | ||||
|             this.onError('ASR API调用失败: ' + error.message); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 处理ASR响应
 | ||||
|     handleASRResponse(response) { | ||||
|         console.log('ASR响应:', response); | ||||
|          | ||||
|         if (response && response.result) { | ||||
|             const recognizedText = response.result.text; | ||||
|             this.onRecognitionResult(recognizedText); | ||||
|             this.onStatusUpdate('识别完成', 'completed'); | ||||
|         } else { | ||||
|             console.log('未识别到文字'); | ||||
|             this.onStatusUpdate('未识别到文字', 'ready'); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 编码WAV格式
 | ||||
|     encodeWAV(samples, sampleRate) { | ||||
|         const length = samples.length; | ||||
|         const buffer = new ArrayBuffer(44 + length * 2); | ||||
|         const view = new DataView(buffer); | ||||
|          | ||||
|         // WAV文件头
 | ||||
|         const writeString = (offset, string) => { | ||||
|             for (let i = 0; i < string.length; i++) { | ||||
|                 view.setUint8(offset + i, string.charCodeAt(i)); | ||||
|             } | ||||
|         }; | ||||
|          | ||||
|         writeString(0, 'RIFF'); | ||||
|         view.setUint32(4, 36 + length * 2, true); | ||||
|         writeString(8, 'WAVE'); | ||||
|         writeString(12, 'fmt '); | ||||
|         view.setUint32(16, 16, true); | ||||
|         view.setUint16(20, 1, true); | ||||
|         view.setUint16(22, 1, true); | ||||
|         view.setUint32(24, sampleRate, true); | ||||
|         view.setUint32(28, sampleRate * 2, true); | ||||
|         view.setUint16(32, 2, true); | ||||
|         view.setUint16(34, 16, true); | ||||
|         writeString(36, 'data'); | ||||
|         view.setUint32(40, length * 2, true); | ||||
|          | ||||
|         // 写入音频数据
 | ||||
|         let offset = 44; | ||||
|         for (let i = 0; i < length; i++) { | ||||
|             const sample = Math.max(-1, Math.min(1, samples[i])); | ||||
|             view.setInt16(offset, sample * 0x7FFF, true); | ||||
|             offset += 2; | ||||
|         } | ||||
|          | ||||
|         return buffer; | ||||
|     } | ||||
|      | ||||
|     // ArrayBuffer转Base64
 | ||||
|     arrayBufferToBase64(buffer) { | ||||
|         let binary = ''; | ||||
|         const bytes = new Uint8Array(buffer); | ||||
|         for (let i = 0; i < bytes.byteLength; i++) { | ||||
|             binary += String.fromCharCode(bytes[i]); | ||||
|         } | ||||
|         return btoa(binary); | ||||
|     } | ||||
|      | ||||
|     // 开始录音
 | ||||
|     async startRecording() { | ||||
|         try { | ||||
|             const stream = await navigator.mediaDevices.getUserMedia({ | ||||
|                 audio: { | ||||
|                     sampleRate: 16000, | ||||
|                     channelCount: 1, | ||||
|                     echoCancellation: true, | ||||
|                     noiseSuppression: true | ||||
|                 } | ||||
|             }); | ||||
|              | ||||
|             this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ | ||||
|                 sampleRate: 16000 | ||||
|             }); | ||||
|              | ||||
|             const source = this.audioContext.createMediaStreamSource(stream); | ||||
|             const processor = this.audioContext.createScriptProcessor(4096, 1, 1); | ||||
|              | ||||
|             processor.onaudioprocess = (event) => { | ||||
|                 const inputBuffer = event.inputBuffer; | ||||
|                 const inputData = inputBuffer.getChannelData(0); | ||||
|                  | ||||
|                 // 语音活动检测
 | ||||
|                 if (this.detectVoiceActivity(inputData)) { | ||||
|                     // 如果检测到语音活动,缓存音频数据
 | ||||
|                     this.audioBuffer.push(new Float32Array(inputData)); | ||||
|                 } | ||||
|             }; | ||||
|              | ||||
|             source.connect(processor); | ||||
|             processor.connect(this.audioContext.destination); | ||||
|              | ||||
|             this.isRecording = true; | ||||
|             this.onStatusUpdate('等待语音输入...', 'ready'); | ||||
|              | ||||
|             return true; | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('启动录音失败:', error); | ||||
|             this.onError('启动录音失败: ' + error.message); | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 停止录音
 | ||||
|     stopRecording() { | ||||
|         if (this.audioContext) { | ||||
|             this.audioContext.close(); | ||||
|             this.audioContext = null; | ||||
|         } | ||||
|          | ||||
|         if (this.silenceTimer) { | ||||
|             clearTimeout(this.silenceTimer); | ||||
|             this.silenceTimer = null; | ||||
|         } | ||||
|          | ||||
|         // 如果正在说话,处理最后的音频
 | ||||
|         if (this.isSpeaking) { | ||||
|             this.handleSpeechEnd(); | ||||
|         } | ||||
|          | ||||
|         this.isRecording = false; | ||||
|         this.isSpeaking = false; | ||||
|         this.audioBuffer = []; | ||||
|          | ||||
|         this.onStatusUpdate('录音已停止', 'stopped'); | ||||
|         console.log('录音已停止'); | ||||
|     } | ||||
|      | ||||
|     // 获取录音状态
 | ||||
|     getRecordingStatus() { | ||||
|         return { | ||||
|             isRecording: this.isRecording, | ||||
|             isSpeaking: this.isSpeaking, | ||||
|             hasAudioContext: !!this.audioContext | ||||
|         }; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // 导出模块
 | ||||
| export { AudioProcessor }; | ||||
| @ -6,6 +6,9 @@ import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from | ||||
| 
 | ||||
| // 防止重复播放的标志
 | ||||
| let isPlaying = false; | ||||
| // 音频播放队列
 | ||||
| let audioQueue = []; | ||||
| let isProcessingQueue = false; | ||||
| 
 | ||||
| async function chatWithAudioStream(userInput) { | ||||
|   // 验证配置
 | ||||
| @ -20,31 +23,22 @@ async function chatWithAudioStream(userInput) { | ||||
|   const minimaxiConfig = getMinimaxiConfig(); | ||||
|   const audioConfig = getAudioConfig(); | ||||
|    | ||||
|   // 1. 请求大模型回答
 | ||||
|   console.log('\n=== 请求大模型回答 ==='); | ||||
|   const llmResponse = await requestLLMStream({ | ||||
|     apiKey: llmConfig.apiKey, | ||||
|     model: llmConfig.model, | ||||
|     messages: [ | ||||
|       { role: 'system', content: 'You are a helpful assistant.' }, | ||||
|       { role: 'user', content: userInput }, | ||||
|     ], | ||||
|   }); | ||||
|   // 清空音频队列
 | ||||
|   audioQueue = []; | ||||
|    | ||||
|   // 提取大模型回答内容(现在直接返回内容)
 | ||||
|   const llmContent = llmResponse; | ||||
|   // 定义段落处理函数
 | ||||
|   const handleSegment = async (segment) => { | ||||
|     console.log('\n=== 处理文本段落 ==='); | ||||
|     console.log('段落内容:', segment); | ||||
|      | ||||
|   console.log('\n=== 大模型回答 ==='); | ||||
|   console.log("llmResponse: ", llmContent); | ||||
|    | ||||
|   // 2. 合成音频
 | ||||
|   console.log('\n=== 开始合成音频 ==='); | ||||
|     try { | ||||
|       // 为每个段落生成音频
 | ||||
|       const audioResult = await requestMinimaxi({ | ||||
|         apiKey: minimaxiConfig.apiKey, | ||||
|         groupId: minimaxiConfig.groupId, | ||||
|         body: { | ||||
|           model: audioConfig.model, | ||||
|       text: llmContent, | ||||
|           text: segment, | ||||
|           stream: audioConfig.stream, | ||||
|           language_boost: audioConfig.language_boost, | ||||
|           output_format: audioConfig.output_format, | ||||
| @ -54,30 +48,70 @@ async function chatWithAudioStream(userInput) { | ||||
|         stream: true, | ||||
|       }); | ||||
|        | ||||
|   // 3. 流式播放音频
 | ||||
|   console.log('\n=== 开始流式播放音频 ==='); | ||||
|   // console.log('音频数据长度:', audioResult.data.audio.length);
 | ||||
|   await playAudioStream(audioResult.data.audio); | ||||
|       // 将音频添加到播放队列
 | ||||
|       if (audioResult && audioResult.data && audioResult.data.audio) { | ||||
|         audioQueue.push({ | ||||
|           text: segment, | ||||
|           audioHex: audioResult.data.audio | ||||
|         }); | ||||
|         console.log('音频已添加到队列,队列长度:', audioQueue.length); | ||||
|          | ||||
|         // 开始处理队列
 | ||||
|         processAudioQueue(); | ||||
|       } | ||||
|     } catch (error) { | ||||
|       console.error('生成音频失败:', error); | ||||
|     } | ||||
|   }; | ||||
|    | ||||
|   // 1. 请求大模型回答,并实时处理段落
 | ||||
|   console.log('\n=== 请求大模型回答 ==='); | ||||
|   const llmResponse = await requestLLMStream({ | ||||
|     apiKey: llmConfig.apiKey, | ||||
|     model: llmConfig.model, | ||||
|     messages: [ | ||||
|       { role: 'system', content: 'You are a helpful assistant.' }, | ||||
|       { role: 'user', content: userInput }, | ||||
|     ], | ||||
|     onSegment: handleSegment // 传入段落处理回调
 | ||||
|   }); | ||||
|    | ||||
|   console.log('\n=== 大模型完整回答 ==='); | ||||
|   console.log("llmResponse: ", llmResponse); | ||||
|    | ||||
|   return { | ||||
|     userInput, | ||||
|     llmResponse: llmContent, | ||||
|     audioResult, | ||||
|     llmResponse, | ||||
|     audioQueue: audioQueue.map(item => ({ text: item.text, hasAudio: !!item.audioHex })) | ||||
|   }; | ||||
| } | ||||
| 
 | ||||
| // 处理音频播放队列
 | ||||
| async function processAudioQueue() { | ||||
|   if (isProcessingQueue) return; | ||||
|    | ||||
|   isProcessingQueue = true; | ||||
|    | ||||
|   // while (audioQueue.length > 0) {
 | ||||
|   //   const audioItem = audioQueue.shift();
 | ||||
|   //   console.log('\n=== 播放队列中的音频 ===');
 | ||||
|   //   console.log('文本:', audioItem.text);
 | ||||
|      | ||||
|   //   try {
 | ||||
|   //     await playAudioStream(audioItem.audioHex);
 | ||||
|   //   } catch (error) {
 | ||||
|   //     console.error('播放音频失败:', error);
 | ||||
|   //   }
 | ||||
|   // }
 | ||||
|    | ||||
|   isProcessingQueue = false; | ||||
| } | ||||
| 
 | ||||
| // 流式播放音频
 | ||||
| async function playAudioStream(audioHex) { | ||||
|   if (isPlaying) { | ||||
|     console.log('音频正在播放中,跳过重复播放'); | ||||
|     return; | ||||
|   } | ||||
|    | ||||
|   console.log('=== 开始播放音频 ==='); | ||||
|   console.log('音频数据长度:', audioHex.length); | ||||
|    | ||||
|   isPlaying = true; | ||||
|    | ||||
|   // 将hex转换为ArrayBuffer
 | ||||
|   const audioBuffer = hexToArrayBuffer(audioHex); | ||||
|    | ||||
| @ -102,13 +136,11 @@ async function playAudioStream(audioHex) { | ||||
|     return new Promise((resolve) => { | ||||
|       source.onended = () => { | ||||
|         console.log('音频播放完成'); | ||||
|         isPlaying = false; | ||||
|         resolve(); | ||||
|       }; | ||||
|     }); | ||||
|   } catch (error) { | ||||
|     console.error('音频播放失败:', error); | ||||
|     isPlaying = false; | ||||
|     throw error; | ||||
|   } | ||||
| } | ||||
| @ -175,4 +207,6 @@ async function playAudioStreamNode(audioHex) { | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| export { chatWithAudioStream, playAudioStream, playAudioStreamNode };  | ||||
| 
 | ||||
| 
 | ||||
| export { chatWithAudioStream, playAudioStream, playAudioStreamNode}; | ||||
| @ -16,11 +16,11 @@ export const config = { | ||||
|   audio: { | ||||
|     model: 'speech-02-hd', | ||||
|     voiceSetting: { | ||||
|       voice_id: 'yantu-qinggang', | ||||
|       voice_id: 'yantu-qinggang-2', | ||||
|       speed: 1, | ||||
|       vol: 1, | ||||
|       pitch: 0, | ||||
|       emotion: 'happy', | ||||
|       // emotion: 'happy',
 | ||||
|     }, | ||||
|     audioSetting: { | ||||
|       sample_rate: 32000, | ||||
|  | ||||
							
								
								
									
										139
									
								
								src/index - 副本.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								src/index - 副本.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,139 @@ | ||||
| <!DOCTYPE html> | ||||
| <html lang="zh-CN"> | ||||
| <head> | ||||
|     <meta charset="UTF-8"> | ||||
|     <meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||||
|     <title>实时语音识别</title> | ||||
|     <style> | ||||
|         body { | ||||
|             font-family: Arial, sans-serif; | ||||
|             max-width: 800px; | ||||
|             margin: 0 auto; | ||||
|             padding: 20px; | ||||
|             background-color: #f5f5f5; | ||||
|         } | ||||
|         .container { | ||||
|             background: white; | ||||
|             padding: 30px; | ||||
|             border-radius: 10px; | ||||
|             box-shadow: 0 2px 10px rgba(0,0,0,0.1); | ||||
|         } | ||||
|         .controls { | ||||
|             text-align: center; | ||||
|             margin-bottom: 30px; | ||||
|         } | ||||
|         .record-btn { | ||||
|             background: #4CAF50; | ||||
|             color: white; | ||||
|             border: none; | ||||
|             padding: 15px 30px; | ||||
|             font-size: 18px; | ||||
|             border-radius: 50px; | ||||
|             cursor: pointer; | ||||
|             transition: all 0.3s; | ||||
|         } | ||||
|         .record-btn:hover { | ||||
|             background: #45a049; | ||||
|         } | ||||
|         .record-btn.recording { | ||||
|             background: #f44336; | ||||
|             animation: pulse 1s infinite; | ||||
|         } | ||||
|         @keyframes pulse { | ||||
|             0% { transform: scale(1); } | ||||
|             50% { transform: scale(1.05); } | ||||
|             100% { transform: scale(1); } | ||||
|         } | ||||
|         .status { | ||||
|             margin: 20px 0; | ||||
|             padding: 10px; | ||||
|             border-radius: 5px; | ||||
|             text-align: center; | ||||
|             font-weight: bold; | ||||
|         } | ||||
|         .status.connected { | ||||
|             background: #d4edda; | ||||
|             color: #155724; | ||||
|             border: 1px solid #c3e6cb; | ||||
|         } | ||||
|         .status.speaking { | ||||
|             background: #fff3cd; | ||||
|             color: #856404; | ||||
|             border: 1px solid #ffeaa7; | ||||
|             animation: speaking-pulse 0.5s infinite alternate; | ||||
|         } | ||||
|         .status.processing { | ||||
|             background: #cce7ff; | ||||
|             color: #004085; | ||||
|             border: 1px solid #99d6ff; | ||||
|         } | ||||
|         .status.disconnected { | ||||
|             background: #f8d7da; | ||||
|             color: #721c24; | ||||
|             border: 1px solid #f5c6cb; | ||||
|         } | ||||
|         @keyframes speaking-pulse { | ||||
|             0% { opacity: 0.7; } | ||||
|             100% { opacity: 1; } | ||||
|         } | ||||
|         .results { | ||||
|             max-height: 400px; | ||||
|             overflow-y: auto; | ||||
|             border: 1px solid #ddd; | ||||
|             border-radius: 5px; | ||||
|             padding: 15px; | ||||
|             background: #fafafa; | ||||
|         } | ||||
|         .result-item { | ||||
|             margin-bottom: 15px; | ||||
|             padding: 10px; | ||||
|             background: white; | ||||
|             border-radius: 5px; | ||||
|             border-left: 4px solid #4CAF50; | ||||
|         } | ||||
|         .timestamp { | ||||
|             font-size: 12px; | ||||
|             color: #666; | ||||
|             margin-bottom: 5px; | ||||
|         } | ||||
|         .text { | ||||
|             font-size: 16px; | ||||
|             line-height: 1.4; | ||||
|         } | ||||
|         .help { | ||||
|             margin-top: 20px; | ||||
|             padding: 15px; | ||||
|             background: #e3f2fd; | ||||
|             border-radius: 5px; | ||||
|             font-size: 14px; | ||||
|             color: #1565c0; | ||||
|         } | ||||
|     </style> | ||||
| </head> | ||||
| <body> | ||||
|     <div class="container"> | ||||
|         <h1>实时语音识别</h1> | ||||
|          | ||||
|         <div class="controls"> | ||||
|             <button id="recordBtn" class="record-btn">开始录音</button> | ||||
|         </div> | ||||
|          | ||||
|         <div id="status" class="status disconnected">未连接</div> | ||||
|          | ||||
|         <div class="help"> | ||||
|             <strong>使用说明:</strong><br> | ||||
|             1. 点击"开始录音"按钮开启麦克风<br> | ||||
|             2. 系统会自动检测您的语音,只有在检测到说话时才开始录音<br> | ||||
|             3. 说话结束后会自动发送音频进行识别<br> | ||||
|             4. 识别结果会显示在下方区域 | ||||
|         </div> | ||||
|          | ||||
|         <h3>识别结果:</h3> | ||||
|         <div id="results" class="results"> | ||||
|             <!-- 识别结果将显示在这里 --> | ||||
|         </div> | ||||
|     </div> | ||||
|      | ||||
|     <script src="new_app.js"></script> | ||||
| </body> | ||||
| </html> | ||||
							
								
								
									
										93
									
								
								src/index.js
									
									
									
									
									
								
							
							
						
						
									
										93
									
								
								src/index.js
									
									
									
									
									
								
							| @ -1,5 +1,6 @@ | ||||
| // WebRTC 音视频通话应用
 | ||||
| import { chatWithAudioStream } from './chat_with_audio.js'; | ||||
| import { AudioProcessor } from './audio_processor.js'; | ||||
| 
 | ||||
| class WebRTCChat { | ||||
|     constructor() { | ||||
| @ -15,6 +16,30 @@ class WebRTCChat { | ||||
|         this.videoStreams = new Map(); // 存储不同视频的MediaStream
 | ||||
|         this.currentVideoStream = null; | ||||
|          | ||||
|         // 初始化音频处理器
 | ||||
|         this.audioProcessor = new AudioProcessor({ | ||||
|             onSpeechStart: () => { | ||||
|                 this.voiceStatus.textContent = '检测到语音,开始录音...'; | ||||
|                 this.logMessage('检测到语音,开始录音...', 'info'); | ||||
|             }, | ||||
|             onSpeechEnd: () => { | ||||
|                 // 语音结束回调
 | ||||
|             }, | ||||
|             onRecognitionResult: (text) => { | ||||
|                 // ASRTEXT = text;
 | ||||
|                 this.voiceStatus.textContent = '识别完成'; | ||||
|                 this.logMessage(`语音识别结果: ${text}`, 'success'); | ||||
|                 this.handleVoiceInput(text); | ||||
|             }, | ||||
|             onError: (error) => { | ||||
|                 this.voiceStatus.textContent = '识别失败'; | ||||
|                 this.logMessage(error, 'error'); | ||||
|             }, | ||||
|             onStatusUpdate: (message, status) => { | ||||
|                 this.voiceStatus.textContent = message; | ||||
|             } | ||||
|         }); | ||||
|          | ||||
|         this.initializeElements(); | ||||
|         this.initializeSocket(); | ||||
|         this.loadVideoMapping(); | ||||
| @ -627,65 +652,34 @@ class WebRTCChat { | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     // 修改:使用音频处理器的语音录制功能
 | ||||
|     async startVoiceRecording() { | ||||
|         try { | ||||
|             const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||||
|             this.mediaRecorder = new MediaRecorder(stream); | ||||
|             this.audioChunks = []; | ||||
|              | ||||
|             this.mediaRecorder.ondataavailable = (event) => { | ||||
|                 this.audioChunks.push(event.data); | ||||
|             }; | ||||
|              | ||||
|             this.mediaRecorder.onstop = () => { | ||||
|                 const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' }); | ||||
|                 this.processVoiceInput(audioBlob); | ||||
|             }; | ||||
|              | ||||
|             this.mediaRecorder.start(); | ||||
|             this.isRecording = true; | ||||
|         const success = await this.audioProcessor.startRecording(); | ||||
|          | ||||
|         if (success) { | ||||
|             this.startVoiceButton.disabled = true; | ||||
|             this.stopVoiceButton.disabled = false; | ||||
|             this.voiceStatus.textContent = '正在录音...'; | ||||
|             this.startVoiceButton.classList.add('recording'); | ||||
|              | ||||
|             this.logMessage('开始语音录制', 'info'); | ||||
|         } catch (error) { | ||||
|             this.logMessage('无法访问麦克风: ' + error.message, 'error'); | ||||
|             this.voiceStatus.textContent = '等待语音输入...'; | ||||
|             this.logMessage('高级语音录制已启动', 'success'); | ||||
|         } else { | ||||
|             this.voiceStatus.textContent = '录音启动失败'; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // 修改:停止语音录制
 | ||||
|     stopVoiceRecording() { | ||||
|         if (this.mediaRecorder && this.isRecording) { | ||||
|             this.mediaRecorder.stop(); | ||||
|             this.isRecording = false; | ||||
|         this.audioProcessor.stopRecording(); | ||||
|          | ||||
|         this.startVoiceButton.disabled = false; | ||||
|         this.stopVoiceButton.disabled = true; | ||||
|             this.voiceStatus.textContent = '点击开始语音输入'; | ||||
|         this.startVoiceButton.classList.remove('recording'); | ||||
|         this.voiceStatus.textContent = '点击开始语音输入'; | ||||
|          | ||||
|             this.logMessage('停止语音录制', 'info'); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     async processVoiceInput(audioBlob) { | ||||
|         // 这里可以集成语音识别API,如Web Speech API或第三方服务
 | ||||
|         // 为了演示,我们使用一个简单的模拟识别
 | ||||
|         const mockText = this.simulateSpeechRecognition(); | ||||
|          | ||||
|         this.socket.emit('voice-input', { | ||||
|             audioData: audioBlob, | ||||
|             text: mockText | ||||
|         }); | ||||
|          | ||||
|         this.logMessage(`语音识别结果: ${mockText}`, 'info'); | ||||
|          | ||||
|         // 根据语音识别结果切换视频流
 | ||||
|         await this.handleVoiceInput(mockText); | ||||
|         this.logMessage('语音录制已停止', 'info'); | ||||
|     } | ||||
| 
 | ||||
|     // 处理语音输入结果
 | ||||
|     async handleVoiceInput(text) { | ||||
|         // 根据文本查找对应视频
 | ||||
|         let videoFile = this.videoMapping['默认'] || this.defaultVideo; | ||||
| @ -705,7 +699,20 @@ class WebRTCChat { | ||||
|             type: 'voice',  | ||||
|             text  | ||||
|         }); | ||||
|          | ||||
|         // 调用大模型处理
 | ||||
|         try { | ||||
|             this.logMessage('正在处理语音输入,请稍候...', 'info'); | ||||
|             const result = await chatWithAudioStream(text); | ||||
|             this.logMessage(`大模型回答: ${result.llmResponse}`, 'success'); | ||||
|         } catch (error) { | ||||
|             this.logMessage(`处理语音输入失败: ${error.message}`, 'error'); | ||||
|             console.error('chatWithAudioStream error:', error); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // 删除原有的简单音频处理方法
 | ||||
|     // processVoiceInput() 和 simulateSpeechRecognition() 方法已被移除
 | ||||
| 
 | ||||
|     simulateSpeechRecognition() { | ||||
|         // 模拟语音识别,随机返回预设的文本
 | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| // 以流式方式请求LLM大模型接口,并打印流式返回内容
 | ||||
| 
 | ||||
| async function requestLLMStream({ apiKey, model, messages }) { | ||||
| async function requestLLMStream({ apiKey, model, messages, onSegment }) { | ||||
|   const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', { | ||||
|     method: 'POST', | ||||
|     headers: { | ||||
| @ -26,6 +26,10 @@ async function requestLLMStream({ apiKey, model, messages }) { | ||||
|   let done = false; | ||||
|   let buffer = ''; | ||||
|   let content = ''; | ||||
|   let pendingText = ''; // 待处理的文本片段
 | ||||
| 
 | ||||
|   // 分段分隔符
 | ||||
|   const segmentDelimiters = /[,。:;!?,.:;!?]/; | ||||
| 
 | ||||
|   while (!done) { | ||||
|     const { value, done: doneReading } = await reader.read(); | ||||
| @ -47,6 +51,10 @@ async function requestLLMStream({ apiKey, model, messages }) { | ||||
|            | ||||
|           if (jsonStr === '[DONE]') { | ||||
|             console.log('LLM SSE流结束'); | ||||
|             // 处理最后的待处理文本
 | ||||
|             if (pendingText.trim() && onSegment) { | ||||
|               await onSegment(pendingText.trim()); | ||||
|             } | ||||
|             continue; | ||||
|           } | ||||
|            | ||||
| @ -55,7 +63,29 @@ async function requestLLMStream({ apiKey, model, messages }) { | ||||
|             if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) { | ||||
|               const deltaContent = obj.choices[0].delta.content; | ||||
|               content += deltaContent; | ||||
|               pendingText += deltaContent; | ||||
|               console.log('LLM内容片段:', deltaContent); | ||||
|                | ||||
|               // 检查是否包含分段分隔符
 | ||||
|               if (segmentDelimiters.test(pendingText)) { | ||||
|                 // 按分隔符分割文本
 | ||||
|                 const segments = pendingText.split(segmentDelimiters); | ||||
|                  | ||||
|                 // 处理完整的段落(除了最后一个,因为可能不完整)
 | ||||
|                 for (let i = 0; i < segments.length - 1; i++) { | ||||
|                   const segment = segments[i].trim(); | ||||
|                   if (segment && onSegment) { | ||||
|                     // 找到对应的分隔符
 | ||||
|                     const delimiterMatch = pendingText.match(segmentDelimiters); | ||||
|                     const segmentWithDelimiter = segment + (delimiterMatch ? delimiterMatch[0] : ''); | ||||
|                     console.log('检测到完整段落:', segmentWithDelimiter); | ||||
|                     await onSegment(segmentWithDelimiter); | ||||
|                   } | ||||
|                 } | ||||
|                  | ||||
|                 // 保留最后一个不完整的段落
 | ||||
|                 pendingText = segments[segments.length - 1] || ''; | ||||
|               } | ||||
|             } | ||||
|           } catch (e) { | ||||
|             console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr); | ||||
|  | ||||
| @ -1,5 +1,135 @@ | ||||
| // 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容
 | ||||
| 
 | ||||
| // 在文件顶部添加音频播放相关的变量和函数
 | ||||
| let audioContext = null; | ||||
| let audioQueue = []; // 音频队列
 | ||||
| let isPlaying = false; | ||||
| let isProcessingQueue = false; // 队列处理状态
 | ||||
| let nextStartTime = 0; // 添加这行来声明 nextStartTime 变量
 | ||||
| 
 | ||||
| // 初始化音频上下文
 | ||||
| function initAudioContext() { | ||||
|   if (!audioContext) { | ||||
|     audioContext = new (window.AudioContext || window.webkitAudioContext)(); | ||||
|   } | ||||
|   return audioContext; | ||||
| } | ||||
| 
 | ||||
| // 将hex字符串转换为ArrayBuffer
 | ||||
| function hexToArrayBuffer(hex) { | ||||
|   const bytes = new Uint8Array(hex.length / 2); | ||||
|   for (let i = 0; i < hex.length; i += 2) { | ||||
|     bytes[i / 2] = parseInt(hex.substr(i, 2), 16); | ||||
|   } | ||||
|   return bytes.buffer; | ||||
| } | ||||
| 
 | ||||
| // 将音频添加到队列(不等待播放)
 | ||||
| async function addAudioToQueue(audioHex) { | ||||
|   if (!audioHex || audioHex.length === 0) return; | ||||
|    | ||||
|   try { | ||||
|     const ctx = initAudioContext(); | ||||
|     const audioBuffer = hexToArrayBuffer(audioHex); | ||||
|     const audioData = await ctx.decodeAudioData(audioBuffer); | ||||
|      | ||||
|     // 将解码后的音频数据添加到队列
 | ||||
|     audioQueue.push({ | ||||
|       audioData, | ||||
|       timestamp: Date.now() | ||||
|     }); | ||||
|      | ||||
|     console.log(`音频已添加到队列,队列长度: ${audioQueue.length}`); | ||||
|      | ||||
|     // 启动队列处理器(如果还没有运行)
 | ||||
|     if (!isProcessingQueue) { | ||||
|       processAudioQueue(); | ||||
|     } | ||||
|      | ||||
|   } catch (error) { | ||||
|     console.error('音频解码失败:', error); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| // 队列处理器 - 独立运行,按顺序播放音频
 | ||||
| async function processAudioQueue() { | ||||
|   if (isProcessingQueue) return; | ||||
|    | ||||
|   isProcessingQueue = true; | ||||
|   console.log('开始处理音频队列'); | ||||
|    | ||||
|   while (audioQueue.length > 0 || isPlaying) { | ||||
|     // 如果当前没有音频在播放,且队列中有音频
 | ||||
|     if (!isPlaying && audioQueue.length > 0) { | ||||
|       const audioItem = audioQueue.shift(); | ||||
|       await playAudioData(audioItem.audioData); | ||||
|     } else { | ||||
|       // 等待一小段时间再检查
 | ||||
|       await new Promise(resolve => setTimeout(resolve, 50)); | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   isProcessingQueue = false; | ||||
|   console.log('音频队列处理完成'); | ||||
| } | ||||
| 
 | ||||
| // 播放单个音频数据
 | ||||
| function playAudioData(audioData) { | ||||
|   return new Promise((resolve) => { | ||||
|     try { | ||||
|       const ctx = initAudioContext(); | ||||
|       const source = ctx.createBufferSource(); | ||||
|       source.buffer = audioData; | ||||
|       source.connect(ctx.destination); | ||||
|        | ||||
|       isPlaying = true; | ||||
|        | ||||
|       source.onended = () => { | ||||
|         console.log('音频片段播放完成'); | ||||
|         isPlaying = false; | ||||
|         resolve(); | ||||
|       }; | ||||
|        | ||||
|       // 超时保护
 | ||||
|       setTimeout(() => { | ||||
|         if (isPlaying) { | ||||
|           console.log('音频播放超时,强制结束'); | ||||
|           isPlaying = false; | ||||
|           resolve(); | ||||
|         } | ||||
|       }, (audioData.duration + 0.5) * 1000); | ||||
|        | ||||
|       source.start(0); | ||||
|       console.log(`开始播放音频片段,时长: ${audioData.duration}秒`); | ||||
|        | ||||
|     } catch (error) { | ||||
|       console.error('播放音频失败:', error); | ||||
|       isPlaying = false; | ||||
|       resolve(); | ||||
|     } | ||||
|   }); | ||||
| } | ||||
| 
 | ||||
| // 修改原来的playAudioChunk函数,改为addAudioToQueue
 | ||||
| const playAudioChunk = addAudioToQueue; | ||||
| 
 | ||||
| // 清空音频队列
 | ||||
| function clearAudioQueue() { | ||||
|   audioQueue.length = 0; | ||||
|   console.log('音频队列已清空'); | ||||
| } | ||||
| 
 | ||||
| // 获取队列状态
 | ||||
| function getQueueStatus() { | ||||
|   return { | ||||
|     queueLength: audioQueue.length, | ||||
|     isPlaying, | ||||
|     isProcessingQueue | ||||
|   }; | ||||
| } | ||||
| 
 | ||||
| // 移除waitForCurrentAudioToFinish函数,不再需要
 | ||||
| 
 | ||||
| async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|   const url = `https://api.minimaxi.com/v1/t2a_v2`; | ||||
|   const reqBody = { ...body, stream }; | ||||
| @ -24,7 +154,7 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|     console.log(JSON.stringify(result, null, 2)); | ||||
|     return result; | ||||
|   } else { | ||||
|     // 流式,解析每个chunk,合并audio
 | ||||
|     // 流式,解析每个chunk,实时播放音频
 | ||||
|     const reader = response.body.getReader(); | ||||
|     const decoder = new TextDecoder('utf-8'); | ||||
|     let done = false; | ||||
| @ -32,25 +162,28 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|     let audioHex = ''; | ||||
|     let lastFullResult = null; | ||||
|      | ||||
|     // 重置播放状态
 | ||||
|     nextStartTime = 0; | ||||
|     if (audioContext) { | ||||
|       nextStartTime = audioContext.currentTime; | ||||
|     } | ||||
| 
 | ||||
|     while (!done) { | ||||
|       const { value, done: doneReading } = await reader.read(); | ||||
|       done = doneReading; | ||||
|       if (value) { | ||||
|         const chunk = decoder.decode(value, { stream: true }); | ||||
|         buffer += chunk; | ||||
|         // console.log('收到原始chunk:', chunk);
 | ||||
|          | ||||
|         // 处理SSE格式的数据(以\n分割)
 | ||||
|         let lines = buffer.split('\n'); | ||||
|         buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
 | ||||
|         for (const line of lines) { | ||||
|           if (!line.trim()) continue; | ||||
|           // console.log('处理行:', line);
 | ||||
|            | ||||
|           // 检查是否是SSE格式的数据行
 | ||||
|           if (line.startsWith('data:')) { | ||||
|             const jsonStr = line.substring(6); // 移除 'data: ' 前缀
 | ||||
|             // console.log('提取的JSON字符串:', jsonStr);
 | ||||
|              | ||||
|             if (jsonStr.trim() === '[DONE]') { | ||||
|               console.log('SSE流结束'); | ||||
| @ -59,17 +192,19 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|              | ||||
|             try { | ||||
|               const obj = JSON.parse(jsonStr); | ||||
|               // 流式,解析每个chunk,合并audio
 | ||||
|               if (obj.data && obj.data.audio) { | ||||
|               // 流式,解析每个chunk,实时播放音频
 | ||||
|               if (obj.data && obj.data.audio && obj.data.status === 1) { | ||||
|                 console.log('收到音频数据片段!', obj.data.audio.length); | ||||
|                 audioHex += obj.data.audio; | ||||
|                  | ||||
|                 // 立即播放这个音频片段
 | ||||
|                 await playAudioChunk(obj.data.audio); | ||||
|               } | ||||
|               // status=2为最后一个chunk,记录完整结构
 | ||||
|               if (obj.data && obj.data.status === 2) { | ||||
|                 lastFullResult = obj; | ||||
|                 console.log('收到最终状态'); | ||||
|               } | ||||
|               // 实时打印每个chunk
 | ||||
|               console.log('解析成功:', JSON.stringify(obj)); | ||||
|             } catch (e) { | ||||
|               console.error('解析SSE数据失败:', e, '原始数据:', jsonStr); | ||||
|             } | ||||
| @ -83,7 +218,11 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|             try { | ||||
|               const obj = JSON.parse(line); | ||||
|               if (obj.data && obj.data.audio) { | ||||
|                 console.log('收到无data:音频数据!', obj.data.audio.length); | ||||
|                 audioHex += obj.data.audio; | ||||
|                  | ||||
|                 // 立即播放这个音频片段
 | ||||
|                 await playAudioChunk(obj.data.audio); | ||||
|               } | ||||
|               if (obj.data && obj.data.status === 2) { | ||||
|                 lastFullResult = obj; | ||||
| @ -109,4 +248,135 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| export { requestMinimaxi }; | ||||
| // 火山引擎TTS方法
 | ||||
| async function requestVolcanTTS({  | ||||
|   appId,  | ||||
|   accessKey,  | ||||
|   resourceId = 'volc.service_type.10029',  | ||||
|   appKey = 'aGjiRDfUWi', | ||||
|   body,  | ||||
|   stream = true  | ||||
| }) { | ||||
|   const url = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional'; | ||||
|    | ||||
|   // 生成请求ID
 | ||||
|   const requestId = generateUUID(); | ||||
|    | ||||
|   const response = await fetch(url, { | ||||
|     method: 'POST', | ||||
|     headers: { | ||||
|       'X-Api-App-Id': appId, | ||||
|       'X-Api-Access-Key': accessKey, | ||||
|       'X-Api-Resource-Id': resourceId, | ||||
|       'X-Api-App-Key': appKey, | ||||
|       'X-Api-Request-Id': requestId, | ||||
|       'Content-Type': 'application/json', | ||||
|       'Accept': stream ? 'text/event-stream' : 'application/json', | ||||
|       'Cache-Control': 'no-cache', | ||||
|     }, | ||||
|     body: JSON.stringify(body), | ||||
|   }); | ||||
| 
 | ||||
|   if (!response.ok) { | ||||
|     throw new Error(`HTTP error! status: ${response.status}`); | ||||
|   } | ||||
| 
 | ||||
|   if (!stream) { | ||||
|     // 非流式,直接返回JSON
 | ||||
|     const result = await response.json(); | ||||
|     console.log('火山引擎TTS非流式结果:', JSON.stringify(result, null, 2)); | ||||
|     return result; | ||||
|   } else { | ||||
|     // 流式,解析每个chunk,合并audio
 | ||||
|     const reader = response.body.getReader(); | ||||
|     const decoder = new TextDecoder('utf-8'); | ||||
|     let done = false; | ||||
|     let buffer = ''; | ||||
|     let audioBase64 = ''; | ||||
|     let lastFullResult = null; | ||||
| 
 | ||||
|     while (!done) { | ||||
|       const { value, done: doneReading } = await reader.read(); | ||||
|       done = doneReading; | ||||
|       if (value) { | ||||
|         const chunk = decoder.decode(value, { stream: true }); | ||||
|         buffer += chunk; | ||||
|          | ||||
|         // 处理SSE格式的数据(以\n分割)
 | ||||
|         let lines = buffer.split('\n'); | ||||
|         buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
 | ||||
|          | ||||
|         for (const line of lines) { | ||||
|           if (!line.trim()) continue; | ||||
|            | ||||
|           // 检查是否是SSE格式的数据行
 | ||||
|           if (line.startsWith('data:')) { | ||||
|             const jsonStr = line.substring(6); // 移除 'data: ' 前缀
 | ||||
|              | ||||
|             if (jsonStr.trim() === '[DONE]') { | ||||
|               console.log('火山引擎TTS流结束'); | ||||
|               continue; | ||||
|             } | ||||
|              | ||||
|             try { | ||||
|               const obj = JSON.parse(jsonStr); | ||||
|               // 流式,解析每个chunk,合并audio base64数据
 | ||||
|               if (obj.data) { | ||||
|                 audioBase64 += obj.data; | ||||
|                 lastFullResult = obj; | ||||
|               } | ||||
|               // 实时打印每个chunk
 | ||||
|               console.log('火山引擎TTS解析成功:', JSON.stringify(obj)); | ||||
|             } catch (e) { | ||||
|               console.error('解析火山引擎TTS数据失败:', e, '原始数据:', jsonStr); | ||||
|             } | ||||
|           } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) { | ||||
|             // 忽略SSE的其他字段
 | ||||
|             console.log('忽略SSE字段:', line); | ||||
|             continue; | ||||
|           } else if (line.trim() && !line.startsWith('data:')) { | ||||
|             // 尝试直接解析(兼容非SSE格式)
 | ||||
|             try { | ||||
|               const obj = JSON.parse(line); | ||||
|               if (obj.data) { | ||||
|                 audioBase64 += obj.data; | ||||
|                 lastFullResult = obj; | ||||
|               } | ||||
|               console.log('火山引擎TTS直接解析成功:', JSON.stringify(obj)); | ||||
|             } catch (e) { | ||||
|               console.error('解析火山引擎TTS chunk失败:', e, line); | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|      | ||||
|     // 合成最终结构
 | ||||
|     console.log('火山引擎TTS音频数据总长度:', audioBase64.length); | ||||
|      | ||||
|     if (lastFullResult) { | ||||
|       // 更新最终结果的音频数据
 | ||||
|       lastFullResult.data = audioBase64; | ||||
|       console.log('火山引擎TTS最终合成结果:', JSON.stringify(lastFullResult, null, 2)); | ||||
|       return lastFullResult; | ||||
|     } else { | ||||
|       // 没有完整结构,返回合成的audio
 | ||||
|       return {  | ||||
|         code: 0,  | ||||
|         message: '',  | ||||
|         data: audioBase64  | ||||
|       }; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| // 生成UUID的辅助函数
 | ||||
| function generateUUID() { | ||||
|   return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { | ||||
|     const r = Math.random() * 16 | 0; | ||||
|     const v = c === 'x' ? r : (r & 0x3 | 0x8); | ||||
|     return v.toString(16); | ||||
|   }); | ||||
| } | ||||
| 
 | ||||
| export { requestMinimaxi, requestVolcanTTS }; | ||||
							
								
								
									
										346
									
								
								src/new_app.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										346
									
								
								src/new_app.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,346 @@ | ||||
| let ASRTEXT = '' | ||||
| 
 | ||||
| class HttpASRRecognizer { | ||||
|     constructor() { | ||||
|         this.mediaRecorder = null; | ||||
|         this.audioContext = null; | ||||
|         this.isRecording = false; | ||||
|         this.audioChunks = []; | ||||
|          | ||||
|         // VAD相关属性
 | ||||
|         this.isSpeaking = false; | ||||
|         this.silenceThreshold = 0.01; | ||||
|         this.silenceTimeout = 1000; | ||||
|         this.minSpeechDuration = 300; | ||||
|         this.silenceTimer = null; | ||||
|         this.speechStartTime = null; | ||||
|         this.audioBuffer = []; | ||||
|          | ||||
|         // API配置
 | ||||
|         this.apiConfig = { | ||||
|             url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash', | ||||
|             headers: { | ||||
|                 'X-Api-App-Key': '1988591469', | ||||
|                 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r', | ||||
|                 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo', | ||||
|                 'X-Api-Request-Id': this.generateUUID(), | ||||
|                 'X-Api-Sequence': '-1', | ||||
|                 'Content-Type': 'application/json' | ||||
|             } | ||||
|         }; | ||||
|          | ||||
|         this.recordBtn = document.getElementById('startVoiceButton'); | ||||
|         this.statusDiv = document.getElementById('status'); | ||||
|         this.resultsDiv = document.getElementById('results'); | ||||
|          | ||||
|         this.initEventListeners(); | ||||
|     } | ||||
|      | ||||
|     initEventListeners() { | ||||
|         this.recordBtn.addEventListener('click', () => { | ||||
|             if (this.isRecording) { | ||||
|                 this.stopRecording(); | ||||
|             } else { | ||||
|                 this.startRecording(); | ||||
|             } | ||||
|         }); | ||||
|     } | ||||
|      | ||||
|     // 生成UUID
 | ||||
|     generateUUID() { | ||||
|         return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { | ||||
|             const r = Math.random() * 16 | 0; | ||||
|             const v = c == 'x' ? r : (r & 0x3 | 0x8); | ||||
|             return v.toString(16); | ||||
|         }); | ||||
|     } | ||||
|      | ||||
|     // 计算音频能量(音量)
 | ||||
|     calculateAudioLevel(audioData) { | ||||
|         let sum = 0; | ||||
|         for (let i = 0; i < audioData.length; i++) { | ||||
|             sum += audioData[i] * audioData[i]; | ||||
|         } | ||||
|         return Math.sqrt(sum / audioData.length); | ||||
|     } | ||||
|      | ||||
|     // 语音活动检测
 | ||||
|     detectVoiceActivity(audioData) { | ||||
|         const audioLevel = this.calculateAudioLevel(audioData); | ||||
|         const currentTime = Date.now(); | ||||
|          | ||||
|         if (audioLevel > this.silenceThreshold) { | ||||
|             if (!this.isSpeaking) { | ||||
|                 this.isSpeaking = true; | ||||
|                 this.speechStartTime = currentTime; | ||||
|                 this.audioBuffer = []; | ||||
|                 this.updateStatus('检测到语音,开始录音...', 'speaking'); | ||||
|                 console.log('开始说话'); | ||||
|             } | ||||
|              | ||||
|             if (this.silenceTimer) { | ||||
|                 clearTimeout(this.silenceTimer); | ||||
|                 this.silenceTimer = null; | ||||
|             } | ||||
|              | ||||
|             return true; | ||||
|         } else { | ||||
|             if (this.isSpeaking && !this.silenceTimer) { | ||||
|                 this.silenceTimer = setTimeout(() => { | ||||
|                     this.onSpeechEnd(); | ||||
|                 }, this.silenceTimeout); | ||||
|             } | ||||
|              | ||||
|             return this.isSpeaking; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 语音结束处理
 | ||||
|     async onSpeechEnd() { | ||||
|         if (this.isSpeaking) { | ||||
|             const speechDuration = Date.now() - this.speechStartTime; | ||||
|              | ||||
|             if (speechDuration >= this.minSpeechDuration) { | ||||
|                 console.log(`语音结束,时长: ${speechDuration}ms`); | ||||
|                 await this.processAudioBuffer(); | ||||
|                 // this.updateStatus('语音识别中...', 'processing');
 | ||||
|                 console.log('语音识别中') | ||||
|             } else { | ||||
|                 console.log('说话时长太短,忽略'); | ||||
|                 // this.updateStatus('等待语音输入...', 'ready');
 | ||||
|                 console.log('等待语音输入...') | ||||
| 
 | ||||
|             } | ||||
|              | ||||
|             this.isSpeaking = false; | ||||
|             this.speechStartTime = null; | ||||
|             this.audioBuffer = []; | ||||
|         } | ||||
|          | ||||
|         if (this.silenceTimer) { | ||||
|             clearTimeout(this.silenceTimer); | ||||
|             this.silenceTimer = null; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 处理音频缓冲区并发送到API
 | ||||
|     async processAudioBuffer() { | ||||
|         if (this.audioBuffer.length === 0) { | ||||
|             return; | ||||
|         } | ||||
|          | ||||
|         try { | ||||
|             // 合并所有音频数据
 | ||||
|             const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0); | ||||
|             const combinedBuffer = new Float32Array(totalLength); | ||||
|             let offset = 0; | ||||
|              | ||||
|             for (const buffer of this.audioBuffer) { | ||||
|                 combinedBuffer.set(buffer, offset); | ||||
|                 offset += buffer.length; | ||||
|             } | ||||
|              | ||||
|             // 转换为WAV格式并编码为base64
 | ||||
|             const wavBuffer = this.encodeWAV(combinedBuffer, 16000); | ||||
|             const base64Audio = this.arrayBufferToBase64(wavBuffer); | ||||
|              | ||||
|             // 调用ASR API
 | ||||
|             await this.callASRAPI(base64Audio); | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('处理音频数据失败:', error); | ||||
|             this.updateStatus('识别失败', 'error'); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 调用ASR API
 | ||||
|     async callASRAPI(base64AudioData) { | ||||
|         try { | ||||
|             const requestBody = { | ||||
|                 user: { | ||||
|                     uid: "1988591469" | ||||
|                 }, | ||||
|                 audio: { | ||||
|                     data: base64AudioData | ||||
|                 }, | ||||
|                 request: { | ||||
|                     model_name: "bigmodel" | ||||
|                 } | ||||
|             }; | ||||
|              | ||||
|             const response = await fetch(this.apiConfig.url, { | ||||
|                 method: 'POST', | ||||
|                 headers: this.apiConfig.headers, | ||||
|                 body: JSON.stringify(requestBody) | ||||
|             }); | ||||
|              | ||||
|             if (!response.ok) { | ||||
|                 throw new Error(`HTTP error! status: ${response.status}`); | ||||
|             } | ||||
|              | ||||
|             const result = await response.json(); | ||||
|             this.handleASRResponse(result); | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('ASR API调用失败:', error); | ||||
|             this.updateStatus('API调用失败', 'error'); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 处理ASR响应
 | ||||
|     handleASRResponse(response) { | ||||
|         console.log('ASR响应:', response); | ||||
|          | ||||
|         if (response && response.data && response.data.result) { | ||||
|             ASRTEXT = response.data.result; | ||||
|             // this.displayResult(text);
 | ||||
|             // this.updateStatus('识别完成', 'completed');
 | ||||
|             console.log('识别完成') | ||||
|         } else { | ||||
|             console.log('未识别到文字'); | ||||
|             // this.updateStatus('未识别到文字', 'ready');
 | ||||
| 
 | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     // 显示识别结果
 | ||||
|     displayResult(text) { | ||||
|         const resultElement = document.createElement('div'); | ||||
|         resultElement.className = 'result-item'; | ||||
|         resultElement.innerHTML = ` | ||||
|             <span class="timestamp">${new Date().toLocaleTimeString()}</span> | ||||
|             <span class="text">${text}</span> | ||||
|         `;
 | ||||
|         this.resultsDiv.appendChild(resultElement); | ||||
|         this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight; | ||||
|     } | ||||
|      | ||||
|     // 更新状态显示
 | ||||
|     updateStatus(message, status) { | ||||
|         this.statusDiv.textContent = message; | ||||
|         this.statusDiv.className = `status ${status}`; | ||||
|     } | ||||
|      | ||||
|     // 编码WAV格式
 | ||||
|     encodeWAV(samples, sampleRate) { | ||||
|         const length = samples.length; | ||||
|         const buffer = new ArrayBuffer(44 + length * 2); | ||||
|         const view = new DataView(buffer); | ||||
|          | ||||
|         // WAV文件头
 | ||||
|         const writeString = (offset, string) => { | ||||
|             for (let i = 0; i < string.length; i++) { | ||||
|                 view.setUint8(offset + i, string.charCodeAt(i)); | ||||
|             } | ||||
|         }; | ||||
|          | ||||
|         writeString(0, 'RIFF'); | ||||
|         view.setUint32(4, 36 + length * 2, true); | ||||
|         writeString(8, 'WAVE'); | ||||
|         writeString(12, 'fmt '); | ||||
|         view.setUint32(16, 16, true); | ||||
|         view.setUint16(20, 1, true); | ||||
|         view.setUint16(22, 1, true); | ||||
|         view.setUint32(24, sampleRate, true); | ||||
|         view.setUint32(28, sampleRate * 2, true); | ||||
|         view.setUint16(32, 2, true); | ||||
|         view.setUint16(34, 16, true); | ||||
|         writeString(36, 'data'); | ||||
|         view.setUint32(40, length * 2, true); | ||||
|          | ||||
|         // 写入音频数据
 | ||||
|         let offset = 44; | ||||
|         for (let i = 0; i < length; i++) { | ||||
|             const sample = Math.max(-1, Math.min(1, samples[i])); | ||||
|             view.setInt16(offset, sample * 0x7FFF, true); | ||||
|             offset += 2; | ||||
|         } | ||||
|          | ||||
|         return buffer; | ||||
|     } | ||||
|      | ||||
|     // ArrayBuffer转Base64
 | ||||
|     arrayBufferToBase64(buffer) { | ||||
|         let binary = ''; | ||||
|         const bytes = new Uint8Array(buffer); | ||||
|         for (let i = 0; i < bytes.byteLength; i++) { | ||||
|             binary += String.fromCharCode(bytes[i]); | ||||
|         } | ||||
|         return btoa(binary); | ||||
|     } | ||||
|      | ||||
|     async startRecording() { | ||||
|         try { | ||||
|             const stream = await navigator.mediaDevices.getUserMedia({ | ||||
|                 audio: { | ||||
|                     sampleRate: 16000, | ||||
|                     channelCount: 1, | ||||
|                     echoCancellation: true, | ||||
|                     noiseSuppression: true | ||||
|                 } | ||||
|             }); | ||||
|              | ||||
|             this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ | ||||
|                 sampleRate: 16000 | ||||
|             }); | ||||
|              | ||||
|             const source = this.audioContext.createMediaStreamSource(stream); | ||||
|             const processor = this.audioContext.createScriptProcessor(4096, 1, 1); | ||||
|              | ||||
|             processor.onaudioprocess = (event) => { | ||||
|                 const inputBuffer = event.inputBuffer; | ||||
|                 const inputData = inputBuffer.getChannelData(0); | ||||
|                  | ||||
|                 // 语音活动检测
 | ||||
|                 if (this.detectVoiceActivity(inputData)) { | ||||
|                     // 如果检测到语音活动,缓存音频数据
 | ||||
|                     this.audioBuffer.push(new Float32Array(inputData)); | ||||
|                 } | ||||
|             }; | ||||
|              | ||||
|             source.connect(processor); | ||||
|             processor.connect(this.audioContext.destination); | ||||
|              | ||||
|             this.isRecording = true; | ||||
|             this.recordBtn.textContent = '停止录音'; | ||||
|             this.recordBtn.className = 'btn recording'; | ||||
|             // this.updateStatus('等待语音输入...', 'ready');
 | ||||
|              | ||||
|         } catch (error) { | ||||
|             console.error('启动录音失败:', error); | ||||
|             // this.updateStatus('录音启动失败', 'error');
 | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     stopRecording() { | ||||
|         if (this.audioContext) { | ||||
|             this.audioContext.close(); | ||||
|             this.audioContext = null; | ||||
|         } | ||||
|          | ||||
|         if (this.silenceTimer) { | ||||
|             clearTimeout(this.silenceTimer); | ||||
|             this.silenceTimer = null; | ||||
|         } | ||||
|          | ||||
|         // 如果正在说话,处理最后的音频
 | ||||
|         if (this.isSpeaking) { | ||||
|             this.onSpeechEnd(); | ||||
|         } | ||||
|          | ||||
|         this.isRecording = false; | ||||
|         this.isSpeaking = false; | ||||
|         this.audioBuffer = []; | ||||
|          | ||||
|         this.recordBtn.textContent = '开始录音'; | ||||
|         this.recordBtn.className = 'btn'; | ||||
|         console.log('录音已停止'); | ||||
|         // this.updateStatus('录音已停止', 'stopped');
 | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // 初始化应用
 | ||||
| document.addEventListener('DOMContentLoaded', () => { | ||||
|     const asrRecognizer = new HttpASRRecognizer(); | ||||
|     console.log('HTTP ASR识别器已初始化'); | ||||
| }); | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user