local initial

2025-07-25 17:00:26 +08:00 · 2025-07-25 17:00:26 +08:00 · c95e6a2552
commit c95e6a2552
parent 6f087fe874
9 changed files with 382 additions and 148 deletions
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@ -2,62 +2,61 @@
 import { requestLLMStream } from './llm_stream.js';
 import { requestMinimaxi } from './minimaxi_stream.js';
 import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from './config.js';
 // 防止重复播放的标志
 let isPlaying = false;
 async function chatWithAudioStream(userInput) {
  // 验证配置
  if (!validateConfig()) {
    throw new Error('配置不完整，请检查config.js文件中的API密钥设置');
  }
 async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) {
  console.log('用户输入:', userInput);
  // 获取配置
  const llmConfig = getLLMConfig();
  const minimaxiConfig = getMinimaxiConfig();
  const audioConfig = getAudioConfig();
  // 1. 请求大模型回答
  console.log('\n=== 请求大模型回答 ===');
  const llmResponse = await requestLLMStream({
-    apiKey: llmApiKey,
+    apiKey: llmConfig.apiKey,
-    model: llmModel,
+    model: llmConfig.model,
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: userInput },
    ],
  });
-  // 提取大模型回答内容（假设返回的是JSON格式，包含content字段）
+  // 提取大模型回答内容（现在直接返回内容）
-  let llmContent = '';
+  const llmContent = llmResponse;
  try {
    const llmData = JSON.parse(llmResponse);
    llmContent = llmData.choices?.[0]?.message?.content || llmResponse;
  } catch (e) {
    llmContent = llmResponse;
  }
  console.log('\n=== 大模型回答 ===');
-  console.log(llmContent);
+  console.log("llmResponse: ", llmContent);
  // 2. 合成音频
  console.log('\n=== 开始合成音频 ===');
  const audioResult = await requestMinimaxi({
-    apiKey: minimaxiApiKey,
+    apiKey: minimaxiConfig.apiKey,
-    groupId: minimaxiGroupId,
+    groupId: minimaxiConfig.groupId,
    body: {
-      model: 'speech-02-hd',
+      model: audioConfig.model,
      text: llmContent,
-      stream: true,
+      stream: audioConfig.stream,
-      language_boost: 'auto',
+      language_boost: audioConfig.language_boost,
-      output_format: 'hex',
+      output_format: audioConfig.output_format,
-      voice_setting: {
+      voice_setting: audioConfig.voiceSetting,
-        voice_id: 'male-qn-qingse',
+      audio_setting: audioConfig.audioSetting,
        speed: 1,
        vol: 1,
        pitch: 0,
        emotion: 'happy',
      },
      audio_setting: {
        sample_rate: 32000,
        bitrate: 128000,
        format: 'mp3',
      },
    },
    stream: true,
  });
  // 3. 流式播放音频
  console.log('\n=== 开始流式播放音频 ===');
  // console.log('音频数据长度:', audioResult.data.audio.length);
  await playAudioStream(audioResult.data.audio);
  return {
@ -69,6 +68,16 @@ async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApi
 // 流式播放音频
 async function playAudioStream(audioHex) {
  if (isPlaying) {
    console.log('音频正在播放中，跳过重复播放');
    return;
  }
  console.log('=== 开始播放音频 ===');
  console.log('音频数据长度:', audioHex.length);
  isPlaying = true;
  // 将hex转换为ArrayBuffer
  const audioBuffer = hexToArrayBuffer(audioHex);
@ -93,11 +102,13 @@ async function playAudioStream(audioHex) {
    return new Promise((resolve) => {
      source.onended = () => {
        console.log('音频播放完成');
        isPlaying = false;
        resolve();
      };
    });
  } catch (error) {
    console.error('音频播放失败:', error);
    isPlaying = false;
    throw error;
  }
 }
@ -113,6 +124,13 @@ function hexToArrayBuffer(hex) {
 // 在Node.js环境下的音频播放（使用play-sound库）
 async function playAudioStreamNode(audioHex) {
  // 检查是否在Node.js环境中
  if (typeof window !== 'undefined') {
    console.warn('playAudioStreamNode 只能在Node.js环境中使用');
    return;
  }
  try {
    const fs = require('fs');
    const path = require('path');
@ -123,7 +141,6 @@ async function playAudioStreamNode(audioHex) {
    const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
    fs.writeFileSync(tempFile, audioBuffer);
  try {
    // 使用系统默认播放器播放
    const { exec } = require('child_process');
    const platform = process.platform;
@ -158,27 +175,4 @@ async function playAudioStreamNode(audioHex) {
  }
 }
 // 示例用法
 if (require.main === module) {
  const llmApiKey = process.env.ARK_API_KEY;
  const llmModel = 'bot-20250720193048-84fkp';
  const minimaxiApiKey = process.env.MINIMAXI_API_KEY;
  const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID;
  if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) {
    console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID');
    process.exit(1);
  }
  const userInput = process.argv[2] || '你好，请介绍一下人工智能的发展历程';
  chatWithAudioStream({
    userInput,
    llmApiKey,
    llmModel,
    minimaxiApiKey,
    minimaxiGroupId,
  }).catch(console.error);
 }
 export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; 
--- a/src/config.example.js
+++ b/src/config.example.js
@ -0,0 +1,94 @@
 // 示例配置文件 - 请复制此文件为 config.js 并填入实际的API密钥
 export const config = {
  // LLM API配置
  llm: {
    apiKey: 'your_ark_api_key_here', // 请替换为实际的ARK API密钥
    model: 'bot-20250720193048-84fkp',
  },
  // Minimaxi API配置
  minimaxi: {
    apiKey: 'your_minimaxi_api_key_here', // 请替换为实际的Minimaxi API密钥
    groupId: 'your_minimaxi_group_id_here', // 请替换为实际的Minimaxi Group ID
  },
  // 音频配置
  audio: {
    model: 'speech-02-hd',
    voiceSetting: {
      voice_id: 'yantu-qinggang',
      speed: 1,
      vol: 1,
      pitch: 0,
      emotion: 'happy',
    },
    audioSetting: {
      sample_rate: 32000,
      bitrate: 128000,
      format: 'mp3',
    },
  },
  // 系统配置
  system: {
    language_boost: 'auto',
    output_format: 'hex',
    stream: true,
  },
 };
 // 验证配置是否完整
 export function validateConfig() {
  const requiredFields = [
    'llm.apiKey',
    'llm.model',
    'minimaxi.apiKey',
    'minimaxi.groupId'
  ];
  const missingFields = [];
  for (const field of requiredFields) {
    const keys = field.split('.');
    let value = config;
    for (const key of keys) {
      value = value[key];
      if (!value) break;
    }
    if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') {
      missingFields.push(field);
    }
  }
  if (missingFields.length > 0) {
    console.warn('配置不完整，请检查以下字段:', missingFields);
    return false;
  }
  return true;
 }
 // 获取配置的便捷方法
 export function getLLMConfig() {
  return {
    apiKey: config.llm.apiKey,
    model: config.llm.model,
  };
 }
 export function getMinimaxiConfig() {
  return {
    apiKey: config.minimaxi.apiKey,
    groupId: config.minimaxi.groupId,
  };
 }
 export function getAudioConfig() {
  return {
    model: config.audio.model,
    voiceSetting: config.audio.voiceSetting,
    audioSetting: config.audio.audioSetting,
    ...config.system,
  };
 } 
--- a/src/config.js
+++ b/src/config.js
@ -0,0 +1,94 @@
 // 配置管理文件
 export const config = {
  // LLM API配置
  llm: {
    apiKey: 'd012651b-a65b-4b13-8ff3-cc4ff3a29783', // 请替换为实际的API密钥
    model: 'bot-20250720193048-84fkp',
  },
  // Minimaxi API配置
  minimaxi: {
    apiKey: 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiLkuIrmtbfpopzpgJTnp5HmioDmnInpmZDlhazlj7giLCJVc2VyTmFtZSI6IuadqOmqpSIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxNzI4NzEyMzI0OTc5NjI2ODM5IiwiUGhvbmUiOiIxMzM4MTU1OTYxOCIsIkdyb3VwSUQiOiIxNzI4NzEyMzI0OTcxMjM4MjMxIiwiUGFnZU5hbWUiOiIiLCJNYWlsIjoiIiwiQ3JlYXRlVGltZSI6IjIwMjUtMDYtMTYgMTY6Mjk6NTkiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.D_JF0-nO89NdMZCYq4ocEyqxtZ9SeEdtMvbeSkZTWspt0XfX2QpPAVh-DI3MCPZTeSmjNWLf4fA_Th2zpVrj4UxWMbGKBeLZWLulNpwAHGMUTdqenuih3daCDPCzs0duhlFyQnZgGcEOGQ476HL72N2klujP8BUy_vfAh_Zv0po-aujQa5RxardDSOsbs49NTPEw0SQEXwaJ5bVmiZ5s-ysJ9pZWSEiyJ6SX9z3JeZHKj9DxHdOw5roZR8izo54e4IoqyLlzEfhOMW7P15-ffDH3M6HGiEmeBaGRYGAIciELjZS19ONNMKsTj-wXNGWtKG-sjAB1uuqkkT5Ul9Dunw', // 请替换为实际的API密钥
    groupId: '1728712324971238231', // 请替换为实际的Group ID
  },
  // 音频配置
  audio: {
    model: 'speech-02-hd',
    voiceSetting: {
      voice_id: 'yantu-qinggang',
      speed: 1,
      vol: 1,
      pitch: 0,
      emotion: 'happy',
    },
    audioSetting: {
      sample_rate: 32000,
      bitrate: 128000,
      format: 'mp3',
    },
  },
  // 系统配置
  system: {
    language_boost: 'auto',
    output_format: 'hex',
    stream: true,
  },
 };
 // 验证配置是否完整
 export function validateConfig() {
  const requiredFields = [
    'llm.apiKey',
    'llm.model',
    'minimaxi.apiKey',
    'minimaxi.groupId'
  ];
  const missingFields = [];
  for (const field of requiredFields) {
    const keys = field.split('.');
    let value = config;
    for (const key of keys) {
      value = value[key];
      if (!value) break;
    }
    if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') {
      missingFields.push(field);
    }
  }
  if (missingFields.length > 0) {
    console.warn('配置不完整，请检查以下字段:', missingFields);
    return false;
  }
  return true;
 }
 // 获取配置的便捷方法
 export function getLLMConfig() {
  return {
    apiKey: config.llm.apiKey,
    model: config.llm.model,
  };
 }
 export function getMinimaxiConfig() {
  return {
    apiKey: config.minimaxi.apiKey,
    groupId: config.minimaxi.groupId,
  };
 }
 export function getAudioConfig() {
  return {
    model: config.audio.model,
    voiceSetting: config.audio.voiceSetting,
    audioSetting: config.audio.audioSetting,
    ...config.system,
  };
 } 
--- a/src/debug_audio.js
+++ b/src/debug_audio.js
@ -0,0 +1,26 @@
 // 调试音频数据
 function debugAudioData(audioHex) {
  console.log('=== 音频数据调试 ===');
  console.log('音频数据长度:', audioHex.length);
  console.log('音频数据前100个字符:', audioHex.substring(0, 100));
  console.log('音频数据后100个字符:', audioHex.substring(audioHex.length - 100));
  // 检查是否有重复模式
  const halfLength = Math.floor(audioHex.length / 2);
  const firstHalf = audioHex.substring(0, halfLength);
  const secondHalf = audioHex.substring(halfLength);
  if (firstHalf === secondHalf) {
    console.log('⚠️ 警告：音频数据可能是重复的！');
  } else {
    console.log('✅ 音频数据没有重复');
  }
 }
 // 如果在浏览器环境中运行
 if (typeof window !== 'undefined') {
  window.debugAudioData = debugAudioData;
  console.log('音频调试函数已挂载到 window.debugAudioData');
 }
 export { debugAudioData }; 
--- a/src/index.html
+++ b/src/index.html
@ -77,6 +77,6 @@
    <video id="remoteVideo" autoplay playsinline style="display: none;"></video>
    <script src="/socket.io/socket.io.js"></script>
-    <script src="index.js"></script>
+    <script type="module" src="index.js"></script>
 </body>
 </html> 
--- a/src/index.js
+++ b/src/index.js
@ -1,4 +1,6 @@
 // WebRTC 音视频通话应用
 import { chatWithAudioStream } from './chat_with_audio.js';
 class WebRTCChat {
    constructor() {
        this.socket = null;
@ -582,15 +584,25 @@ class WebRTCChat {
        }
    }
-    sendText() {
+    async sendText() {
        const text = this.textInput.value.trim();
        if (text) {
            this.socket.emit('text-input', { text });
            this.logMessage(`发送文本: ${text}`, 'info');
            this.textInput.value = '';
            try {
                // 调用chat_with_audio进行大模型回答和音频合成
                this.logMessage('正在处理文本，请稍候...', 'info');
                const result = await chatWithAudioStream(text);
                this.logMessage(`大模型回答: ${result.llmResponse}`, 'success');
                // 根据文本查找对应视频并切换
-            this.handleTextInput(text);
+                await this.handleTextInput(text);
            } catch (error) {
                this.logMessage(`处理文本失败: ${error.message}`, 'error');
                console.error('chatWithAudioStream error:', error);
            }
        }
    }
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@ -6,6 +6,8 @@ async function requestLLMStream({ apiKey, model, messages }) {
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json',
      'Accept': 'text/event-stream',
      'Cache-Control': 'no-cache',
    },
    body: JSON.stringify({
      model,
@ -23,6 +25,7 @@ async function requestLLMStream({ apiKey, model, messages }) {
  const decoder = new TextDecoder('utf-8');
  let done = false;
  let buffer = '';
  let content = '';
  while (!done) {
    const { value, done: doneReading } = await reader.read();
@ -30,30 +33,43 @@ async function requestLLMStream({ apiKey, model, messages }) {
    if (value) {
      const chunk = decoder.decode(value, { stream: true });
      buffer += chunk;
-      // 打印每次收到的内容
+      
-      process.stdout.write(chunk);
+      // 处理SSE格式的数据
      const lines = buffer.split('\n');
      buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
      for (const line of lines) {
        if (!line.trim()) continue;
        // 检查是否是SSE格式的数据行
        if (line.startsWith('data:')) {
          const jsonStr = line.substring(5).trim(); // 移除 'data:' 前缀
          if (jsonStr === '[DONE]') {
            console.log('LLM SSE流结束');
            continue;
          }
          try {
            const obj = JSON.parse(jsonStr);
            if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
              const deltaContent = obj.choices[0].delta.content;
              content += deltaContent;
              console.log('LLM内容片段:', deltaContent);
            }
          } catch (e) {
            console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
          }
        } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
          // 忽略SSE的其他字段
          continue;
        }
      }
    }
  }
-  // 可选：返回完整内容
+  // 返回完整内容
-  return buffer;
+  return content;
 }
 // 示例用法
 if (require.main === module) {
  const apiKey = process.env.ARK_API_KEY;
  if (!apiKey) {
    console.error('请设置环境变量 ARK_API_KEY');
    process.exit(1);
  }
  requestLLMStream({
    apiKey,
    model: 'bot-20250720193048-84fkp',
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: 'Hello!' },
    ],
  }).catch(console.error);
 }
 export { requestLLMStream }; 
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@ -1,13 +1,15 @@
 // 以流式或非流式方式请求 minimaxi 大模型接口，并打印/返回内容
 async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
-  const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`;
+  const url = `https://api.minimaxi.com/v1/t2a_v2`;
  const reqBody = { ...body, stream };
  const response = await fetch(url, {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json',
      'Accept': 'text/event-stream',
      'Cache-Control': 'no-cache',
    },
    body: JSON.stringify(reqBody),
  });
@ -36,29 +38,66 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
      if (value) {
        const chunk = decoder.decode(value, { stream: true });
        buffer += chunk;
-        // 处理多条JSON（以\n分割）
+        // console.log('收到原始chunk:', chunk);
        // 处理SSE格式的数据（以\n分割）
        let lines = buffer.split('\n');
        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
        for (const line of lines) {
          if (!line.trim()) continue;
          // console.log('处理行:', line);
          // 检查是否是SSE格式的数据行
          if (line.startsWith('data:')) {
            const jsonStr = line.substring(6); // 移除 'data: ' 前缀
            // console.log('提取的JSON字符串:', jsonStr);
            if (jsonStr.trim() === '[DONE]') {
              console.log('SSE流结束');
              continue;
            }
            try {
-            const obj = JSON.parse(line);
+              const obj = JSON.parse(jsonStr);
              // 流式，解析每个chunk，合并audio
              if (obj.data && obj.data.audio) {
                audioHex += obj.data.audio;
              }
              // status=2为最后一个chunk，记录完整结构
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
                console.log('收到最终状态');
              }
              // 实时打印每个chunk
-            console.log('chunk:', JSON.stringify(obj));
+              console.log('解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析SSE数据失败:', e, '原始数据:', jsonStr);
            }
          } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
            // 忽略SSE的其他字段
            console.log('忽略SSE字段:', line);
            continue;
          } else if (line.trim() && !line.startsWith('data:')) {
            // 尝试直接解析（兼容非SSE格式，但避免重复处理）
            console.log('尝试直接解析:', line);
            try {
              const obj = JSON.parse(line);
              if (obj.data && obj.data.audio) {
                audioHex += obj.data.audio;
              }
              if (obj.data && obj.data.status === 2) {
                lastFullResult = obj;
              }
              console.log('直接解析成功:', JSON.stringify(obj));
            } catch (e) {
              console.error('解析chunk失败:', e, line);
            }
          }
        }
      }
    }
    // 合成最终结构
    console.log('音频数据总长度:', audioHex.length);
    if (lastFullResult) {
      lastFullResult.data.audio = audioHex;
      console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2));
@ -70,47 +109,4 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  }
 }
 // 示例用法
 if (require.main === module) {
  const apiKey = process.env.MINIMAXI_API_KEY;
  const groupId = process.env.MINIMAXI_GROUP_ID;
  if (!apiKey || !groupId) {
    console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID');
    process.exit(1);
  }
  const baseBody = {
    model: 'speech-02-hd',
    text: '真正的危险不是计算机开始像人一样思考，而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。',
    language_boost: 'auto',
    output_format: 'hex',
    voice_setting: {
      voice_id: 'male-qn-qingse',
      speed: 1,
      vol: 1,
      pitch: 0,
      emotion: 'happy',
    },
    audio_setting: {
      sample_rate: 32000,
      bitrate: 128000,
      format: 'mp3',
    },
  };
  // 非流式
  requestMinimaxi({
    apiKey,
    groupId,
    body: baseBody,
    stream: false,
  }).then(() => {
    // 流式
    return requestMinimaxi({
      apiKey,
      groupId,
      body: baseBody,
      stream: true,
    });
  }).catch(console.error);
 }
 export { requestMinimaxi };
--- a/src/video_audio_sync.js
+++ b/src/video_audio_sync.js
@ -1,4 +1,5 @@
 import { requestMinimaxi } from './minimaxi_stream.js';
 import { getMinimaxiConfig } from './config.js';
 export async function playVideoWithAudio(videoPath, text) {
  // 1. 初始化视频播放
@ -7,15 +8,16 @@ export async function playVideoWithAudio(videoPath, text) {
  document.body.appendChild(video);
  // 2. 启动音频合成流
  const minimaxiConfig = getMinimaxiConfig();
  const audioStream = await requestMinimaxi({
-    apiKey: process.env.MINIMAXI_API_KEY,
+    apiKey: minimaxiConfig.apiKey,
-    groupId: process.env.MINIMAXI_GROUP_ID,
+    groupId: minimaxiConfig.groupId,
    body: {
      model: 'speech-02-hd',
      text,
-      output_format: 'hex',
+      output_format: 'hex', // 流式场景必须使用hex
      voice_setting: {
-        voice_id: 'male-qn-qingse',
+        voice_id: 'yantu-qinggang',
        speed: 1
      }
    },