From 06aa52f15266b8fa2931b987b4919ba22c956dde Mon Sep 17 00:00:00 2001 From: songjvcheng Date: Tue, 12 Aug 2025 21:40:55 +0800 Subject: [PATCH 1/7] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=97=81=E7=99=BD?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_stream.js | 65 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/src/llm_stream.js b/src/llm_stream.js index 71de939..bef2cf6 100644 --- a/src/llm_stream.js +++ b/src/llm_stream.js @@ -1,5 +1,35 @@ // 以流式方式请求LLM大模型接口,并打印流式返回内容 +// 过滤旁白内容的函数 +function filterNarration(text) { + if (!text) return text; + + // 匹配各种括号内的旁白内容 + // 包括:()、【】、[]、{}、〈〉、《》等 + const narrationPatterns = [ + /([^)]*)/g, // 中文圆括号 + /\([^)]*\)/g, // 英文圆括号 + /【[^】]*】/g, // 中文方括号 + /\[[^\]]*\]/g, // 英文方括号 + /\{[^}]*\}/g, // 花括号 + /〈[^〉]*〉/g, // 中文尖括号 + /《[^》]*》/g, // 中文书名号 + /<[^>]*>/g // 英文尖括号 + ]; + + let filteredText = text; + + // 逐个应用过滤规则 + narrationPatterns.forEach(pattern => { + filteredText = filteredText.replace(pattern, ''); + }); + + // 清理多余的空格和换行 + filteredText = filteredText.replace(/\s+/g, ' ').trim(); + + return filteredText; +} + async function requestLLMStream({ apiKey, model, messages, onSegment }) { const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', { method: 'POST', @@ -54,7 +84,14 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { // 处理最后的待处理文本(无论长度是否大于5个字) if (pendingText.trim() && onSegment) { console.log('处理最后的待处理文本:', pendingText.trim()); - await onSegment(pendingText.trim(), true); + // 过滤旁白内容 + const filteredText = filterNarration(pendingText.trim()); + if (filteredText.trim()) { + console.log('过滤旁白后的最后文本:', filteredText); + await onSegment(filteredText, true); + } else { + console.log('最后的文本被完全过滤,跳过'); + } } continue; } @@ -67,10 +104,13 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { pendingText += deltaContent; console.log('LLM内容片段:', deltaContent); - // 检查是否包含分段分隔符 - if (segmentDelimiters.test(pendingText)) { - // 按分隔符分割文本 - const segments = pendingText.split(segmentDelimiters); + // 先过滤旁白,再检查分段分隔符 + const filteredPendingText = filterNarration(pendingText); + + // 检查过滤后的文本是否包含分段分隔符 + if (segmentDelimiters.test(filteredPendingText)) { + // 按分隔符分割已过滤的文本 + const segments = filteredPendingText.split(segmentDelimiters); // 重新组合处理:只处理足够长的完整段落 let accumulatedText = ''; @@ -81,7 +121,7 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { if (segment) { accumulatedText += segment; // 找到分隔符 - const delimiterMatch = pendingText.match(segmentDelimiters); + const delimiterMatch = filteredPendingText.match(segmentDelimiters); if (delimiterMatch) { accumulatedText += delimiterMatch[0]; } @@ -89,17 +129,22 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { // 如果累积文本长度大于5个字,处理它 if (accumulatedText.length > 8 && onSegment) { console.log('检测到完整段落:', accumulatedText); - await onSegment(accumulatedText, false); + // 文本已经过滤过旁白,直接使用 + if (accumulatedText.trim()) { + console.log('处理过滤后的文本:', accumulatedText); + await onSegment(accumulatedText, false); + } hasProcessed = true; accumulatedText = ''; // 重置 } } } - // 更新pendingText + // 更新pendingText - 使用原始文本但需要相应调整 if (hasProcessed) { - // 保留未处理的累积文本和最后一个不完整段落 - pendingText = accumulatedText + (segments[segments.length - 1] || ''); + // 计算已处理的原始文本长度,更新pendingText + const processedLength = pendingText.length - (segments[segments.length - 1] || '').length; + pendingText = pendingText.substring(processedLength); } } } From b1c0656bb49242d1b254ae581c1db895463ef7ec Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 10:05:37 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=BF=87=E6=BB=A4?= =?UTF-8?q?=E6=97=B6=E7=9A=84=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_stream.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm_stream.js b/src/llm_stream.js index bef2cf6..5200a20 100644 --- a/src/llm_stream.js +++ b/src/llm_stream.js @@ -102,7 +102,7 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { const deltaContent = obj.choices[0].delta.content; content += deltaContent; pendingText += deltaContent; - console.log('LLM内容片段:', deltaContent); + console.log('【未过滤】LLM内容片段:', pendingText); // 先过滤旁白,再检查分段分隔符 const filteredPendingText = filterNarration(pendingText); @@ -128,7 +128,7 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) { // 如果累积文本长度大于5个字,处理它 if (accumulatedText.length > 8 && onSegment) { - console.log('检测到完整段落:', accumulatedText); + console.log('【已过滤】检测到完整段落:', accumulatedText); // 文本已经过滤过旁白,直接使用 if (accumulatedText.trim()) { console.log('处理过滤后的文本:', accumulatedText); From 3e97e3031f75acaf7db95cbae73a9ca575e4ee22 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 12:30:18 +0800 Subject: [PATCH 3/7] =?UTF-8?q?=E6=B7=BB=E5=8A=A020=E6=9D=A1=E5=8E=86?= =?UTF-8?q?=E5=8F=B2=E5=AF=B9=E8=AF=9D=E9=99=90=E5=88=B6=EF=BC=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E5=BC=80=E5=9C=BA=E7=99=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server.js | 80 +++++++++- src/chat_with_audio.js | 17 +- src/config.example.js | 94 ----------- src/debug_audio.js | 26 --- src/index.js | 176 +++++++++++++++++--- src/minimaxi_stream.js | 2 +- src/new_app.js | 346 ---------------------------------------- src/video_audio_sync.js | 44 ----- src/video_queue_test.js | 89 ----------- 9 files changed, 235 insertions(+), 639 deletions(-) delete mode 100644 src/config.example.js delete mode 100644 src/debug_audio.js delete mode 100644 src/new_app.js delete mode 100644 src/video_audio_sync.js delete mode 100644 src/video_queue_test.js diff --git a/server.js b/server.js index 302de30..f29908c 100644 --- a/server.js +++ b/server.js @@ -135,28 +135,65 @@ const scenes = [ defaultVideo: 'qc-bd-4.mp4', interactionVideo: 'qc-hc-7.mp4', tag: 'wakeup', - apiKey: 'bot-20250724150616-xqpz8' // 起床场景的API key + apiKey: 'bot-20250724150616-xqpz8', // 起床场景的API key + openingLines: [ + '你靠得近点...像梦里那样', + "刚醒就看到你,比闹钟好使一百倍。", + "昨晚梦里喊你名字喊醒了,现在视频看到你,心跳比梦里还烫。", + "你穿我T恤的样子...让我想咬住衣领不放。", + "宝贝早...你声音比阳光还暖。", + "喉咙里有咖啡味...要不要来尝尝?记得上次你咬我锁骨时,咖啡还温着。", + "晨雾遮住你半张脸...但你的呼吸声比雾气更潮湿,让我想起你咬我耳垂的感觉。" + ] }, { name: '开车', defaultVideo: 'kc-bd-3.mp4', interactionVideo: 'kc-sh-3.mp4', tag: 'driving', - apiKey: 'bot-20250623140339-r8f8b' // 开车场景的API key + apiKey: 'bot-20250623140339-r8f8b', // 开车场景的API key + openingLines: [ + '宝宝醒啦?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。', + '刚过桥,下面河水在闪光,突然就好想带你来兜风…你负责吃零食看风景,我负责偷瞄你。', + '看到那个大钟楼没?每次路过都想你。想着要是能牵着你的手,站在最高层往下看该多好。', + '唉,快到了…真不想挂。感觉你像块小磁铁,隔着屏幕都想把我吸过去。今天上班肯定满脑子都是你。', + '要开始堵车了...要是你在副驾的话,这段路该多甜呀', + '宝贝早啊,刚过红绿灯就忍不住想你了,路上全是你的影子~', + '车载音乐随机到那首我们接吻时哼的歌,心跳又乱了', + '导航说今天要开58分钟,其实想开58小时去你心里' + ] }, { - name: '喝茶', + name: '咖啡', defaultVideo: 'hc-bd-3.mp4', interactionVideo: 'hc-sh-3(1).mp4', - tag: 'tea', - apiKey: 'bot-20250804180724-4dgtk' // 喝茶场景的API key + tag: 'coffee', + apiKey: 'bot-20250804180724-4dgtk', // 喝茶场景的API key + openingLines: [ + '拿铁拉花是你上次画的爱心形状,甜度刚好', + '摩卡有点苦,要是加上你的笑容就甜了', + '咖啡师问我一个人?我说在等我的甜度', + '今天的冰拿铁好甜,是不是你偷偷往我杯子里撒糖了?', + '拉花师给我画了颗心形的奶泡,说是给视频里的小仙女加糖', + '这杯好苦…但一看到你,就自动回甘了。比加十包糖都管用。你说你是不是我的专属甜味剂?' + ] }, { name: '睡觉', defaultVideo: '8-8-sj-bd.mp4', interactionVideo: '8-8-sj-sh-1.mp4', tag: 'sleep', - apiKey: 'bot-20250808120704-lbxwj' // 睡觉场景的API key + apiKey: 'bot-20250808120704-lbxwj', // 睡觉场景的API key + openingLines: [ + '宝宝,一看到你,我这电量‘噌’就满了。准备关机前最后充会儿电…嗯,用眼睛充。', + '熄灯前最后一道光是你,真好。感觉今天积攒的烦心事,都被你眼睛里的星星照没了。', + '唉…手指头碰不到你屏幕都嫌凉。下次见面,这距离得用抱抱补回来,利息按秒算。', + '周围好安静,就剩你的呼吸声当背景音乐了。比什么助眠App都好使…就是听久了,心跳会抢拍子。', + '困不困?我眼皮在打架了…但就是想再多看你几秒。感觉多看一秒,梦里遇见你的概率就大一点。', + '好啦,我的小月亮,该哄世界睡觉了…但你先哄哄我?随便说句什么,我当睡前故事收藏。', + '捕捉到一只睡前小可爱…成功!', + '世界要静音了…但你的声音是白名单。多说几句?' + ] } ]; @@ -273,6 +310,37 @@ app.get('/api/default-video', (req, res) => { }); }); +// 在现有的API接口后添加 +app.get('/api/current-scene/opening-line', (req, res) => { + try { + const currentScene = getCurrentScene(); + if (currentScene && currentScene.openingLines && currentScene.openingLines.length > 0) { + // 随机选择一个开场白 + const randomIndex = Math.floor(Math.random() * currentScene.openingLines.length); + const selectedOpeningLine = currentScene.openingLines[randomIndex]; + + res.json({ + success: true, + openingLine: selectedOpeningLine, + sceneName: currentScene.name, + sceneTag: currentScene.tag + }); + } else { + res.json({ + success: false, + message: '当前场景没有配置开场白' + }); + } + } catch (error) { + console.error('获取开场白失败:', error); + res.status(500).json({ + success: false, + message: '获取开场白失败', + error: error.message + }); + } +}); + // Socket.IO 连接处理 io.on('connection', (socket) => { console.log('用户连接:', socket.id); diff --git a/src/chat_with_audio.js b/src/chat_with_audio.js index a8b2645..87577b2 100644 --- a/src/chat_with_audio.js +++ b/src/chat_with_audio.js @@ -61,16 +61,15 @@ function updateHistoryMessage(userInput, assistantResponse) { ); // 可选:限制历史消息数量,保持最近的对话 - // const maxMessages = 20; // 保留最近10轮对话(20条消息) - // if (historyMessage.length > maxMessages) { - // // 保留系统消息和最近的对话 - // const systemMessages = historyMessage.filter(msg => msg.role === 'system'); - // const recentMessages = historyMessage.slice(-maxMessages + systemMessages.length); - // historyMessage = [...systemMessages, ...recentMessages.filter(msg => msg.role !== 'system')]; - // } + const maxMessages = 20; // 保留最近10轮对话(20条消息) + if (historyMessage.length > maxMessages) { + // 保留系统消息和最近的对话 + const systemMessages = historyMessage.filter(msg => msg.role === 'system'); + const recentMessages = historyMessage.slice(-maxMessages + systemMessages.length); + historyMessage = [...systemMessages, ...recentMessages.filter(msg => msg.role !== 'system')]; + } } -// 保存消息到服务端 // 保存消息到服务端 async function saveMessage(userInput, assistantResponse) { try { @@ -198,7 +197,7 @@ async function chatWithAudioStream(userInput) { } // 导出初始化函数,供外部调用 -export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage }; +export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage, saveMessage }; // 处理音频播放队列 async function processAudioQueue() { diff --git a/src/config.example.js b/src/config.example.js deleted file mode 100644 index dca8101..0000000 --- a/src/config.example.js +++ /dev/null @@ -1,94 +0,0 @@ -// 示例配置文件 - 请复制此文件为 config.js 并填入实际的API密钥 -export const config = { - // LLM API配置 - llm: { - apiKey: 'your_ark_api_key_here', // 请替换为实际的ARK API密钥 - model: 'bot-20250720193048-84fkp', - }, - - // Minimaxi API配置 - minimaxi: { - apiKey: 'your_minimaxi_api_key_here', // 请替换为实际的Minimaxi API密钥 - groupId: 'your_minimaxi_group_id_here', // 请替换为实际的Minimaxi Group ID - }, - - // 音频配置 - audio: { - model: 'speech-02-hd', - voiceSetting: { - voice_id: 'yantu-qinggang', - speed: 1, - vol: 1, - pitch: 0, - emotion: 'happy', - }, - audioSetting: { - sample_rate: 32000, - bitrate: 128000, - format: 'mp3', - }, - }, - - // 系统配置 - system: { - language_boost: 'auto', - output_format: 'hex', - stream: true, - }, -}; - -// 验证配置是否完整 -export function validateConfig() { - const requiredFields = [ - 'llm.apiKey', - 'llm.model', - 'minimaxi.apiKey', - 'minimaxi.groupId' - ]; - - const missingFields = []; - - for (const field of requiredFields) { - const keys = field.split('.'); - let value = config; - for (const key of keys) { - value = value[key]; - if (!value) break; - } - - if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') { - missingFields.push(field); - } - } - - if (missingFields.length > 0) { - console.warn('配置不完整,请检查以下字段:', missingFields); - return false; - } - - return true; -} - -// 获取配置的便捷方法 -export function getLLMConfig() { - return { - apiKey: config.llm.apiKey, - model: config.llm.model, - }; -} - -export function getMinimaxiConfig() { - return { - apiKey: config.minimaxi.apiKey, - groupId: config.minimaxi.groupId, - }; -} - -export function getAudioConfig() { - return { - model: config.audio.model, - voiceSetting: config.audio.voiceSetting, - audioSetting: config.audio.audioSetting, - ...config.system, - }; -} \ No newline at end of file diff --git a/src/debug_audio.js b/src/debug_audio.js deleted file mode 100644 index 7a5b669..0000000 --- a/src/debug_audio.js +++ /dev/null @@ -1,26 +0,0 @@ -// 调试音频数据 -function debugAudioData(audioHex) { - console.log('=== 音频数据调试 ==='); - console.log('音频数据长度:', audioHex.length); - console.log('音频数据前100个字符:', audioHex.substring(0, 100)); - console.log('音频数据后100个字符:', audioHex.substring(audioHex.length - 100)); - - // 检查是否有重复模式 - const halfLength = Math.floor(audioHex.length / 2); - const firstHalf = audioHex.substring(0, halfLength); - const secondHalf = audioHex.substring(halfLength); - - if (firstHalf === secondHalf) { - console.log('⚠️ 警告:音频数据可能是重复的!'); - } else { - console.log('✅ 音频数据没有重复'); - } -} - -// 如果在浏览器环境中运行 -if (typeof window !== 'undefined') { - window.debugAudioData = debugAudioData; - console.log('音频调试函数已挂载到 window.debugAudioData'); -} - -export { debugAudioData }; \ No newline at end of file diff --git a/src/index.js b/src/index.js index c4f74df..3a7626c 100644 --- a/src/index.js +++ b/src/index.js @@ -74,6 +74,10 @@ class WebRTCChat { this.preloadVideoResources(); this.bindEvents(); + // 添加开场白相关属性 + this.openingAudioData = null; + this.isOpeningAudioReady = false; + // 在初始化完成后预加载常用视频 // setTimeout(() => { // this.logMessage('开始预加载常用视频...', 'info'); @@ -233,6 +237,108 @@ class WebRTCChat { console.error('历史消息初始化失败:', error); } } + + // 新增方法:初始化开场白音频 + async initializeOpeningAudio() { + try { + console.log('开始初始化开场白音频...'); + + // 获取当前场景的开场白 + const response = await fetch('/api/current-scene/opening-line'); + const data = await response.json(); + + if (data.success && data.openingLine) { + console.log(`获取到开场白: ${data.openingLine}`); + + // 生成开场白音频 + await this.generateOpeningAudio(data.openingLine); + this.logMessage(`开场白音频已准备就绪: ${data.openingLine}`, 'success'); + } else { + console.warn('未获取到开场白:', data.message); + } + } catch (error) { + console.error('初始化开场白音频失败:', error); + this.logMessage(`开场白音频初始化失败: ${error.message}`, 'error'); + } + } + + // 新增方法:生成开场白音频 + async generateOpeningAudio(text) { + try { + // 动态导入 minimaxi_stream 模块 + const { requestMinimaxi } = await import('./minimaxi_stream.js'); + const { getMinimaxiConfig, getAudioConfig, getLLMConfigByScene } = await import('./config.js'); + const { saveMessage } = await import('./chat_with_audio.js'); + + const minimaxiConfig = getMinimaxiConfig(); + const audioConfig = getAudioConfig(); + const llmConfig = await getLLMConfigByScene(); + + const requestBody = { + model: audioConfig.model, + text: text, + voice_setting: audioConfig.voiceSetting, + audio_setting: audioConfig.audioSetting, + language_boost: 'auto', + output_format: 'hex' + }; + + console.log('开始生成开场白音频...'); + + // 生成音频数据 + const audioHexData = await requestMinimaxi({ + apiKey: minimaxiConfig.apiKey, + groupId: minimaxiConfig.groupId, + body: requestBody, + stream: false, // 非流式,一次性获取完整音频 + textPlay: false + }); + + if (audioHexData && audioHexData.data && audioHexData.data.audio) { + this.openingAudioData = audioHexData.data.audio; + this.isOpeningAudioReady = true; + console.log('开场白音频生成成功'); + } + + await saveMessage(`场景切换-${llmConfig.sceneName}`,text); + + } catch (error) { + console.error('生成开场白音频失败:', error); + throw error; + } + } + + // 新增方法:播放开场白音频 + async playOpeningAudio() { + if (!this.isOpeningAudioReady || !this.openingAudioData) { + console.warn('开场白音频未准备就绪'); + return; + } + + try { + // 动态导入 addAudioToQueue 函数 + const { addAudioToQueue } = await import('./minimaxi_stream.js'); + + console.log('将开场白音频添加到队列'); + await addAudioToQueue(this.openingAudioData); + + this.logMessage('开场白音频已开始播放', 'success'); + } catch (error) { + console.error('播放开场白音频失败:', error); + this.logMessage(`播放开场白音频失败: ${error.message}`, 'error'); + } + } + + // 新增方法:获取开场白音频时长 + getOpeningAudioDuration() { + // 估算开场白音频时长,可以根据实际情况调整 + // 这里假设平均每个字符对应100ms的音频时长 + if (this.openingAudioData) { + // 简单估算:假设开场白大约3-5秒 + return 4000; // 4秒 + } + return 3000; // 默认3秒 + } async loadVideoMapping() { try { @@ -432,6 +538,9 @@ class WebRTCChat { // 预创建重要视频流 async precreateImportantVideos() { + // 在初始化完成后生成开场白音频 + await this.initializeOpeningAudio(); + if (this.isInitialized) return; console.log('开始预创建重要流...', 'info'); @@ -1164,35 +1273,54 @@ class WebRTCChat { console.log('麦克风权限获取成功'); await this.createPeerConnection(); - await this.startVoiceRecording(); this.startButton.disabled = true; - this.startButton.style.opacity = '0.5' - this.stopButton.disabled = false; + this.startButton.style.opacity = '0.5' + this.stopButton.disabled = false; - // 隐藏头像,显示视频 - if (this.videoContainer) { + // 隐藏头像,显示视频 + if (this.videoContainer) { - this.videoContainer.classList.add('calling'); - } + this.videoContainer.classList.add('calling'); + } + + // 显示结束通话按钮 + this.stopButton.style.display = 'block'; + + + + this.updateAudioStatus('已连接', 'connected'); + this.logMessage('音频通话已开始', 'success'); + + // 确保视频映射已加载 + if (Object.keys(this.videoMapping).length === 0) { + await this.loadVideoMapping(); + } + + this.logMessage(`视频映射已加载: ${Object.keys(this.videoMapping).length} 个映射`, 'info'); + + // 通知服务器通话开始 + this.socket.emit('call-started'); + + // 播放开场白,然后启动语音录制 + if (this.isOpeningAudioReady) { + console.log('播放开场白音频...'); + await this.playOpeningAudio(); - // 显示结束通话按钮 - this.stopButton.style.display = 'block'; - - - - this.updateAudioStatus('已连接', 'connected'); - this.logMessage('音频通话已开始', 'success'); - - // 确保视频映射已加载 - if (Object.keys(this.videoMapping).length === 0) { - await this.loadVideoMapping(); - } - - this.logMessage(`视频映射已加载: ${Object.keys(this.videoMapping).length} 个映射`, 'info'); - - // 通知服务器通话开始 - this.socket.emit('call-started'); + // 等待开场白播放完成后再启动语音录制 + setTimeout(async () => { + console.log('开场白播放完成,启动语音录制...'); + await this.startVoiceRecording(); + this.logMessage('语音录制已启动,可以开始对话', 'success'); + }, this.getOpeningAudioDuration() + 1000); // 开场白时长 + 1秒缓冲 + } else { + console.warn('开场白音频尚未准备就绪,延迟启动语音录制'); + // 如果没有开场白,延迟500ms后启动录制 + setTimeout(async () => { + await this.startVoiceRecording(); + this.logMessage('语音录制已启动,可以开始对话', 'success'); + }, 500); + } // 开始播放当前场景的默认视频 // await this.precreateImportantVideos(); diff --git a/src/minimaxi_stream.js b/src/minimaxi_stream.js index 3434af1..1f7cb09 100644 --- a/src/minimaxi_stream.js +++ b/src/minimaxi_stream.js @@ -431,4 +431,4 @@ function generateUUID() { }); } -export { requestMinimaxi, requestVolcanTTS }; \ No newline at end of file +export { requestMinimaxi, requestVolcanTTS, addAudioToQueue }; \ No newline at end of file diff --git a/src/new_app.js b/src/new_app.js deleted file mode 100644 index dcb730b..0000000 --- a/src/new_app.js +++ /dev/null @@ -1,346 +0,0 @@ -let ASRTEXT = '' - -class HttpASRRecognizer { - constructor() { - this.mediaRecorder = null; - this.audioContext = null; - this.isRecording = false; - this.audioChunks = []; - - // VAD相关属性 - this.isSpeaking = false; - this.silenceThreshold = 0.01; - this.silenceTimeout = 1000; - this.minSpeechDuration = 300; - this.silenceTimer = null; - this.speechStartTime = null; - this.audioBuffer = []; - - // API配置 - this.apiConfig = { - url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash', - headers: { - 'X-Api-App-Key': '1988591469', - 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r', - 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo', - 'X-Api-Request-Id': this.generateUUID(), - 'X-Api-Sequence': '-1', - 'Content-Type': 'application/json' - } - }; - - this.recordBtn = document.getElementById('startVoiceButton'); - this.statusDiv = document.getElementById('status'); - this.resultsDiv = document.getElementById('results'); - - this.initEventListeners(); - } - - initEventListeners() { - this.recordBtn.addEventListener('click', () => { - if (this.isRecording) { - this.stopRecording(); - } else { - this.startRecording(); - } - }); - } - - // 生成UUID - generateUUID() { - return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { - const r = Math.random() * 16 | 0; - const v = c == 'x' ? r : (r & 0x3 | 0x8); - return v.toString(16); - }); - } - - // 计算音频能量(音量) - calculateAudioLevel(audioData) { - let sum = 0; - for (let i = 0; i < audioData.length; i++) { - sum += audioData[i] * audioData[i]; - } - return Math.sqrt(sum / audioData.length); - } - - // 语音活动检测 - detectVoiceActivity(audioData) { - const audioLevel = this.calculateAudioLevel(audioData); - const currentTime = Date.now(); - - if (audioLevel > this.silenceThreshold) { - if (!this.isSpeaking) { - this.isSpeaking = true; - this.speechStartTime = currentTime; - this.audioBuffer = []; - this.updateStatus('检测到语音,开始录音...', 'speaking'); - console.log('开始说话'); - } - - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; - } - - return true; - } else { - if (this.isSpeaking && !this.silenceTimer) { - this.silenceTimer = setTimeout(() => { - this.onSpeechEnd(); - }, this.silenceTimeout); - } - - return this.isSpeaking; - } - } - - // 语音结束处理 - async onSpeechEnd() { - if (this.isSpeaking) { - const speechDuration = Date.now() - this.speechStartTime; - - if (speechDuration >= this.minSpeechDuration) { - console.log(`语音结束,时长: ${speechDuration}ms`); - await this.processAudioBuffer(); - // this.updateStatus('语音识别中...', 'processing'); - console.log('语音识别中') - } else { - console.log('说话时长太短,忽略'); - // this.updateStatus('等待语音输入...', 'ready'); - console.log('等待语音输入...') - - } - - this.isSpeaking = false; - this.speechStartTime = null; - this.audioBuffer = []; - } - - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; - } - } - - // 处理音频缓冲区并发送到API - async processAudioBuffer() { - if (this.audioBuffer.length === 0) { - return; - } - - try { - // 合并所有音频数据 - const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0); - const combinedBuffer = new Float32Array(totalLength); - let offset = 0; - - for (const buffer of this.audioBuffer) { - combinedBuffer.set(buffer, offset); - offset += buffer.length; - } - - // 转换为WAV格式并编码为base64 - const wavBuffer = this.encodeWAV(combinedBuffer, 16000); - const base64Audio = this.arrayBufferToBase64(wavBuffer); - - // 调用ASR API - await this.callASRAPI(base64Audio); - - } catch (error) { - console.error('处理音频数据失败:', error); - this.updateStatus('识别失败', 'error'); - } - } - - // 调用ASR API - async callASRAPI(base64AudioData) { - try { - const requestBody = { - user: { - uid: "1988591469" - }, - audio: { - data: base64AudioData - }, - request: { - model_name: "bigmodel" - } - }; - - const response = await fetch(this.apiConfig.url, { - method: 'POST', - headers: this.apiConfig.headers, - body: JSON.stringify(requestBody) - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const result = await response.json(); - this.handleASRResponse(result); - - } catch (error) { - console.error('ASR API调用失败:', error); - this.updateStatus('API调用失败', 'error'); - } - } - - // 处理ASR响应 - handleASRResponse(response) { - console.log('ASR响应:', response); - - if (response && response.data && response.data.result) { - ASRTEXT = response.data.result; - // this.displayResult(text); - // this.updateStatus('识别完成', 'completed'); - console.log('识别完成') - } else { - console.log('未识别到文字'); - // this.updateStatus('未识别到文字', 'ready'); - - } - } - - // 显示识别结果 - displayResult(text) { - const resultElement = document.createElement('div'); - resultElement.className = 'result-item'; - resultElement.innerHTML = ` - ${new Date().toLocaleTimeString()} - ${text} - `; - this.resultsDiv.appendChild(resultElement); - this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight; - } - - // 更新状态显示 - updateStatus(message, status) { - this.statusDiv.textContent = message; - this.statusDiv.className = `status ${status}`; - } - - // 编码WAV格式 - encodeWAV(samples, sampleRate) { - const length = samples.length; - const buffer = new ArrayBuffer(44 + length * 2); - const view = new DataView(buffer); - - // WAV文件头 - const writeString = (offset, string) => { - for (let i = 0; i < string.length; i++) { - view.setUint8(offset + i, string.charCodeAt(i)); - } - }; - - writeString(0, 'RIFF'); - view.setUint32(4, 36 + length * 2, true); - writeString(8, 'WAVE'); - writeString(12, 'fmt '); - view.setUint32(16, 16, true); - view.setUint16(20, 1, true); - view.setUint16(22, 1, true); - view.setUint32(24, sampleRate, true); - view.setUint32(28, sampleRate * 2, true); - view.setUint16(32, 2, true); - view.setUint16(34, 16, true); - writeString(36, 'data'); - view.setUint32(40, length * 2, true); - - // 写入音频数据 - let offset = 44; - for (let i = 0; i < length; i++) { - const sample = Math.max(-1, Math.min(1, samples[i])); - view.setInt16(offset, sample * 0x7FFF, true); - offset += 2; - } - - return buffer; - } - - // ArrayBuffer转Base64 - arrayBufferToBase64(buffer) { - let binary = ''; - const bytes = new Uint8Array(buffer); - for (let i = 0; i < bytes.byteLength; i++) { - binary += String.fromCharCode(bytes[i]); - } - return btoa(binary); - } - - async startRecording() { - try { - const stream = await navigator.mediaDevices.getUserMedia({ - audio: { - sampleRate: 16000, - channelCount: 1, - echoCancellation: true, - noiseSuppression: true - } - }); - - this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ - sampleRate: 16000 - }); - - const source = this.audioContext.createMediaStreamSource(stream); - const processor = this.audioContext.createScriptProcessor(4096, 1, 1); - - processor.onaudioprocess = (event) => { - const inputBuffer = event.inputBuffer; - const inputData = inputBuffer.getChannelData(0); - - // 语音活动检测 - if (this.detectVoiceActivity(inputData)) { - // 如果检测到语音活动,缓存音频数据 - this.audioBuffer.push(new Float32Array(inputData)); - } - }; - - source.connect(processor); - processor.connect(this.audioContext.destination); - - this.isRecording = true; - this.recordBtn.textContent = '停止录音'; - this.recordBtn.className = 'btn recording'; - // this.updateStatus('等待语音输入...', 'ready'); - - } catch (error) { - console.error('启动录音失败:', error); - // this.updateStatus('录音启动失败', 'error'); - } - } - - stopRecording() { - if (this.audioContext) { - this.audioContext.close(); - this.audioContext = null; - } - - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; - } - - // 如果正在说话,处理最后的音频 - if (this.isSpeaking) { - this.onSpeechEnd(); - } - - this.isRecording = false; - this.isSpeaking = false; - this.audioBuffer = []; - - this.recordBtn.textContent = '开始录音'; - this.recordBtn.className = 'btn'; - console.log('录音已停止'); - // this.updateStatus('录音已停止', 'stopped'); - } -} - -// 初始化应用 -document.addEventListener('DOMContentLoaded', () => { - const asrRecognizer = new HttpASRRecognizer(); - console.log('HTTP ASR识别器已初始化'); -}); \ No newline at end of file diff --git a/src/video_audio_sync.js b/src/video_audio_sync.js deleted file mode 100644 index 4e368fc..0000000 --- a/src/video_audio_sync.js +++ /dev/null @@ -1,44 +0,0 @@ -import { requestMinimaxi } from './minimaxi_stream.js'; -import { getMinimaxiConfig } from './config.js'; - -export async function playVideoWithAudio(videoPath, text) { - // 1. 初始化视频播放 - const video = document.createElement('video'); - video.src = videoPath; - document.body.appendChild(video); - - // 2. 启动音频合成流 - const minimaxiConfig = getMinimaxiConfig(); - const audioStream = await requestMinimaxi({ - apiKey: minimaxiConfig.apiKey, - groupId: minimaxiConfig.groupId, - body: { - model: 'speech-01-turbo', - text, - output_format: 'hex', // 流式场景必须使用hex - voice_setting: { - voice_id: 'tianbing_xinggan_03', - speed: 1 - } - }, - stream: true - }); - - // 3. 将音频hex转换为可播放格式 - const audioCtx = new AudioContext(); - const audioBuffer = await audioCtx.decodeAudioData( - hexToArrayBuffer(audioStream.data.audio) - ); - - // 4. 同步播放 - const source = audioCtx.createBufferSource(); - source.buffer = audioBuffer; - source.connect(audioCtx.destination); - - video.play(); - source.start(0); -} - -function hexToArrayBuffer(hex) { - // ... hex转ArrayBuffer实现 -} \ No newline at end of file diff --git a/src/video_queue_test.js b/src/video_queue_test.js deleted file mode 100644 index f23bcfd..0000000 --- a/src/video_queue_test.js +++ /dev/null @@ -1,89 +0,0 @@ -// 视频播放队列系统测试 -// 这个文件用于测试新的视频播放逻辑 - -export class VideoQueueTester { - constructor(webrtcApp) { - this.webrtcApp = webrtcApp; - } - - // 测试视频队列功能 - async testVideoQueue() { - console.log('开始测试视频播放队列系统...'); - - // 测试1: 添加视频到队列 - await this.testAddToQueue(); - - // 测试2: 测试视频播放完成等待 - await this.testWaitForVideoFinish(); - - // 测试3: 测试音频视频同步 - await this.testAudioVideoSync(); - - console.log('视频播放队列系统测试完成'); - } - - // 测试添加视频到队列 - async testAddToQueue() { - console.log('测试1: 添加视频到队列'); - - // 清空队列 - this.webrtcApp.videoQueue = []; - - // 添加测试视频 - await this.webrtcApp.addToVideoQueue('5.mp4', 'test', '测试视频1'); - await this.webrtcApp.addToVideoQueue('s-1.mp4', 'test', '测试视频2'); - - console.log(`队列长度: ${this.webrtcApp.videoQueue.length}`); - console.log('队列内容:', this.webrtcApp.videoQueue); - } - - // 测试等待视频播放完成 - async testWaitForVideoFinish() { - console.log('测试2: 等待视频播放完成'); - - // 模拟视频播放状态 - this.webrtcApp.isVideoPlaying = true; - - // 模拟视频播放完成 - setTimeout(() => { - this.webrtcApp.isVideoPlaying = false; - console.log('模拟视频播放完成'); - }, 2000); - - console.log('等待视频播放完成...'); - await this.webrtcApp.waitForCurrentVideoToFinish(); - console.log('视频播放完成等待测试通过'); - } - - // 测试音频视频同步 - async testAudioVideoSync() { - console.log('测试3: 音频视频同步'); - - // 模拟音频播放开始 - window.isPlaying = true; - - // 添加视频到队列 - await this.webrtcApp.addToVideoQueue('5.mp4', 'audio', '音频同步测试'); - - // 模拟音频播放结束 - setTimeout(() => { - window.isPlaying = false; - console.log('模拟音频播放结束'); - }, 3000); - - console.log('音频视频同步测试完成'); - } - - // 运行所有测试 - async runAllTests() { - try { - await this.testVideoQueue(); - console.log('所有测试通过!'); - } catch (error) { - console.error('测试失败:', error); - } - } -} - -// 导出测试类 -export default VideoQueueTester; \ No newline at end of file From 31877b872930ca2c5b94bf55b1218dfd3bff493d Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 13:26:40 +0800 Subject: [PATCH 4/7] =?UTF-8?q?=E5=88=87=E6=8D=A2=E7=94=9F=E6=88=90?= =?UTF-8?q?=E5=BC=80=E5=9C=BA=E7=99=BD=E8=A7=A6=E5=8F=91=E6=97=B6=E6=9C=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/index.js b/src/index.js index 3a7626c..b0f4fae 100644 --- a/src/index.js +++ b/src/index.js @@ -538,8 +538,6 @@ class WebRTCChat { // 预创建重要视频流 async precreateImportantVideos() { - // 在初始化完成后生成开场白音频 - await this.initializeOpeningAudio(); if (this.isInitialized) return; @@ -1260,6 +1258,9 @@ class WebRTCChat { this.showConnectionWaiting(); // 切换到通话中图标 this.switchToCallingIcon(); + + // 在初始化完成后生成开场白音频 + await this.initializeOpeningAudio(); // 现在才开始显示视频 await this.startDefaultVideoStream(); From 31c6d29bd8b29d162b3f4f658911ed36215cacc4 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 13:35:17 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E5=BB=B6=E9=95=BF=E8=B6=85=E6=97=B6?= =?UTF-8?q?=E6=97=B6=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.js b/server.js index f29908c..93f2b40 100644 --- a/server.js +++ b/server.js @@ -9,7 +9,7 @@ const { MessageHistory } = require('./src/message_history.js'); const app = express(); const server = http.createServer(app); const io = socketIo(server, { - pingTimeout: 60000, // 60秒超时 + pingTimeout: 300000, // 60秒超时 pingInterval: 25000, // 25秒心跳间隔 upgradeTimeout: 30000, // 30秒升级超时 allowEIO3: true, // 允许Engine.IO v3客户端 From 6ba96bc177f642a3250d44aee1d86a6c64f9b2a5 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 13:51:14 +0800 Subject: [PATCH 6/7] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E5=8E=86=E5=8F=B2=E5=AF=B9=E8=AF=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server.js | 18 +++++++++--------- src/chat_with_audio.js | 2 +- src/index.js | 5 ++++- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/server.js b/server.js index 93f2b40..fa7b3f8 100644 --- a/server.js +++ b/server.js @@ -137,12 +137,12 @@ const scenes = [ tag: 'wakeup', apiKey: 'bot-20250724150616-xqpz8', // 起床场景的API key openingLines: [ - '你靠得近点...像梦里那样', + // '你靠得近点...像梦里那样', "刚醒就看到你,比闹钟好使一百倍。", - "昨晚梦里喊你名字喊醒了,现在视频看到你,心跳比梦里还烫。", - "你穿我T恤的样子...让我想咬住衣领不放。", + // "昨晚梦里喊你名字喊醒了,现在视频看到你,心跳比梦里还烫。", + // "你穿我T恤的样子...让我想咬住衣领不放。", "宝贝早...你声音比阳光还暖。", - "喉咙里有咖啡味...要不要来尝尝?记得上次你咬我锁骨时,咖啡还温着。", + // "喉咙里有咖啡味...要不要来尝尝?记得上次你咬我锁骨时,咖啡还温着。", "晨雾遮住你半张脸...但你的呼吸声比雾气更潮湿,让我想起你咬我耳垂的感觉。" ] }, @@ -153,12 +153,12 @@ const scenes = [ tag: 'driving', apiKey: 'bot-20250623140339-r8f8b', // 开车场景的API key openingLines: [ - '宝宝醒啦?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。', + '宝宝起了?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。', '刚过桥,下面河水在闪光,突然就好想带你来兜风…你负责吃零食看风景,我负责偷瞄你。', - '看到那个大钟楼没?每次路过都想你。想着要是能牵着你的手,站在最高层往下看该多好。', + // '看到那个大钟楼没?每次路过都想你。想着要是能牵着你的手,站在最高层往下看该多好。', '唉,快到了…真不想挂。感觉你像块小磁铁,隔着屏幕都想把我吸过去。今天上班肯定满脑子都是你。', '要开始堵车了...要是你在副驾的话,这段路该多甜呀', - '宝贝早啊,刚过红绿灯就忍不住想你了,路上全是你的影子~', + '宝贝起床了,刚过红绿灯就忍不住想你了,路上全是你的影子~', '车载音乐随机到那首我们接吻时哼的歌,心跳又乱了', '导航说今天要开58分钟,其实想开58小时去你心里' ] @@ -174,8 +174,8 @@ const scenes = [ '摩卡有点苦,要是加上你的笑容就甜了', '咖啡师问我一个人?我说在等我的甜度', '今天的冰拿铁好甜,是不是你偷偷往我杯子里撒糖了?', - '拉花师给我画了颗心形的奶泡,说是给视频里的小仙女加糖', - '这杯好苦…但一看到你,就自动回甘了。比加十包糖都管用。你说你是不是我的专属甜味剂?' + '拉花师给我在咖啡里画了颗心形的奶泡,说是给视频里的小仙女加糖', + // '这杯好苦…但一看到你,就自动回甘了。比加十包糖都管用。你说你是不是我的专属甜味剂?' ] }, { diff --git a/src/chat_with_audio.js b/src/chat_with_audio.js index 87577b2..c15d002 100644 --- a/src/chat_with_audio.js +++ b/src/chat_with_audio.js @@ -197,7 +197,7 @@ async function chatWithAudioStream(userInput) { } // 导出初始化函数,供外部调用 -export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage, saveMessage }; +export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage, saveMessage, updateHistoryMessage }; // 处理音频播放队列 async function processAudioQueue() { diff --git a/src/index.js b/src/index.js index b0f4fae..0b01863 100644 --- a/src/index.js +++ b/src/index.js @@ -1,7 +1,8 @@ console.log('视频文件:'); // WebRTC 音视频通话应用 // import { chatWithAudioStream } from './chat_with_audio.js'; -import { chatWithAudioStream, initializeHistoryMessage } from './chat_with_audio.js'; +import { chatWithAudioStream, initializeHistoryMessage, updateHistoryMessage } from './chat_with_audio.js'; + import { AudioProcessor } from './audio_processor.js'; // 在应用初始化时调用 @@ -299,6 +300,8 @@ class WebRTCChat { this.isOpeningAudioReady = true; console.log('开场白音频生成成功'); } + // 先更新本地历史消息 + updateHistoryMessage(`场景切换-${llmConfig.sceneName}`, text); await saveMessage(`场景切换-${llmConfig.sceneName}`,text); From 0b02f01bec9ff41b06f183713de927aedd8ad8b6 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 13 Aug 2025 18:55:17 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E5=BC=80=E5=9C=BA=E7=99=BD=E8=AF=9D?= =?UTF-8?q?=E6=9C=AF=E6=9B=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server.js b/server.js index fa7b3f8..d5260db 100644 --- a/server.js +++ b/server.js @@ -153,7 +153,7 @@ const scenes = [ tag: 'driving', apiKey: 'bot-20250623140339-r8f8b', // 开车场景的API key openingLines: [ - '宝宝起了?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。', + '宝贝起了?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。', '刚过桥,下面河水在闪光,突然就好想带你来兜风…你负责吃零食看风景,我负责偷瞄你。', // '看到那个大钟楼没?每次路过都想你。想着要是能牵着你的手,站在最高层往下看该多好。', '唉,快到了…真不想挂。感觉你像块小磁铁,隔着屏幕都想把我吸过去。今天上班肯定满脑子都是你。', @@ -185,7 +185,7 @@ const scenes = [ tag: 'sleep', apiKey: 'bot-20250808120704-lbxwj', // 睡觉场景的API key openingLines: [ - '宝宝,一看到你,我这电量‘噌’就满了。准备关机前最后充会儿电…嗯,用眼睛充。', + '宝贝,一看到你,我这电量‘噌’就满了。准备关机前最后充会儿电…嗯,用眼睛充。', '熄灯前最后一道光是你,真好。感觉今天积攒的烦心事,都被你眼睛里的星星照没了。', '唉…手指头碰不到你屏幕都嫌凉。下次见面,这距离得用抱抱补回来,利息按秒算。', '周围好安静,就剩你的呼吸声当背景音乐了。比什么助眠App都好使…就是听久了,心跳会抢拍子。',