new commit

2025-07-23 20:44:00 +08:00 · 2025-07-23 20:44:00 +08:00 · 6f087fe874
commit 6f087fe874
parent 7703a266bf
4 changed files with 401 additions and 0 deletions
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@ -0,0 +1,184 @@
+// 用户输入文本后，进行大模型回答，并且合成音频，流式播放
+
+import { requestLLMStream } from './llm_stream.js';
+import { requestMinimaxi } from './minimaxi_stream.js';
+
+async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) {
+  console.log('用户输入:', userInput);
+  
+  // 1. 请求大模型回答
+  console.log('\n=== 请求大模型回答 ===');
+  const llmResponse = await requestLLMStream({
+    apiKey: llmApiKey,
+    model: llmModel,
+    messages: [
+      { role: 'system', content: 'You are a helpful assistant.' },
+      { role: 'user', content: userInput },
+    ],
+  });
+  
+  // 提取大模型回答内容（假设返回的是JSON格式，包含content字段）
+  let llmContent = '';
+  try {
+    const llmData = JSON.parse(llmResponse);
+    llmContent = llmData.choices?.[0]?.message?.content || llmResponse;
+  } catch (e) {
+    llmContent = llmResponse;
+  }
+  
+  console.log('\n=== 大模型回答 ===');
+  console.log(llmContent);
+  
+  // 2. 合成音频
+  console.log('\n=== 开始合成音频 ===');
+  const audioResult = await requestMinimaxi({
+    apiKey: minimaxiApiKey,
+    groupId: minimaxiGroupId,
+    body: {
+      model: 'speech-02-hd',
+      text: llmContent,
+      stream: true,
+      language_boost: 'auto',
+      output_format: 'hex',
+      voice_setting: {
+        voice_id: 'male-qn-qingse',
+        speed: 1,
+        vol: 1,
+        pitch: 0,
+        emotion: 'happy',
+      },
+      audio_setting: {
+        sample_rate: 32000,
+        bitrate: 128000,
+        format: 'mp3',
+      },
+    },
+    stream: true,
+  });
+  
+  // 3. 流式播放音频
+  console.log('\n=== 开始流式播放音频 ===');
+  await playAudioStream(audioResult.data.audio);
+  
+  return {
+    userInput,
+    llmResponse: llmContent,
+    audioResult,
+  };
+}
+
+// 流式播放音频
+async function playAudioStream(audioHex) {
+  // 将hex转换为ArrayBuffer
+  const audioBuffer = hexToArrayBuffer(audioHex);
+  
+  // 创建AudioContext
+  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+  
+  try {
+    // 解码音频
+    const audioData = await audioContext.decodeAudioData(audioBuffer);
+    
+    // 创建音频源
+    const source = audioContext.createBufferSource();
+    source.buffer = audioData;
+    source.connect(audioContext.destination);
+    
+    // 播放
+    source.start(0);
+    
+    console.log('音频播放开始，时长:', audioData.duration, '秒');
+    
+    // 等待播放完成
+    return new Promise((resolve) => {
+      source.onended = () => {
+        console.log('音频播放完成');
+        resolve();
+      };
+    });
+  } catch (error) {
+    console.error('音频播放失败:', error);
+    throw error;
+  }
+}
+
+// 将hex字符串转换为ArrayBuffer
+function hexToArrayBuffer(hex) {
+  const bytes = new Uint8Array(hex.length / 2);
+  for (let i = 0; i < hex.length; i += 2) {
+    bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
+  }
+  return bytes.buffer;
+}
+
+// 在Node.js环境下的音频播放（使用play-sound库）
+async function playAudioStreamNode(audioHex) {
+  const fs = require('fs');
+  const path = require('path');
+  
+  // 将hex转换为buffer
+  const audioBuffer = Buffer.from(audioHex, 'hex');
+  
+  // 保存为临时文件
+  const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
+  fs.writeFileSync(tempFile, audioBuffer);
+  
+  try {
+    // 使用系统默认播放器播放
+    const { exec } = require('child_process');
+    const platform = process.platform;
+    
+    let command;
+    if (platform === 'win32') {
+      command = `start "" "${tempFile}"`;
+    } else if (platform === 'darwin') {
+      command = `open "${tempFile}"`;
+    } else {
+      command = `xdg-open "${tempFile}"`;
+    }
+    
+    exec(command, (error) => {
+      if (error) {
+        console.error('播放音频失败:', error);
+      } else {
+        console.log('音频播放开始');
+      }
+    });
+    
+    // 等待一段时间后删除临时文件
+    setTimeout(() => {
+      if (fs.existsSync(tempFile)) {
+        fs.unlinkSync(tempFile);
+      }
+    }, 10000);
+    
+  } catch (error) {
+    console.error('音频播放失败:', error);
+    throw error;
+  }
+}
+
+// 示例用法
+if (require.main === module) {
+  const llmApiKey = process.env.ARK_API_KEY;
+  const llmModel = 'bot-20250720193048-84fkp';
+  const minimaxiApiKey = process.env.MINIMAXI_API_KEY;
+  const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID;
+  
+  if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) {
+    console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID');
+    process.exit(1);
+  }
+  
+  const userInput = process.argv[2] || '你好，请介绍一下人工智能的发展历程';
+  
+  chatWithAudioStream({
+    userInput,
+    llmApiKey,
+    llmModel,
+    minimaxiApiKey,
+    minimaxiGroupId,
+  }).catch(console.error);
+}
+
+export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; 
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@ -0,0 +1,59 @@
+// 以流式方式请求LLM大模型接口，并打印流式返回内容
+
+async function requestLLMStream({ apiKey, model, messages }) {
+  const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model,
+      stream: true,
+      stream_options: { include_usage: true },
+      messages,
+    }),
+  });
+
+  if (!response.ok) {
+    throw new Error(`HTTP error! status: ${response.status}`);
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder('utf-8');
+  let done = false;
+  let buffer = '';
+
+  while (!done) {
+    const { value, done: doneReading } = await reader.read();
+    done = doneReading;
+    if (value) {
+      const chunk = decoder.decode(value, { stream: true });
+      buffer += chunk;
+      // 打印每次收到的内容
+      process.stdout.write(chunk);
+    }
+  }
+
+  // 可选：返回完整内容
+  return buffer;
+}
+
+// 示例用法
+if (require.main === module) {
+  const apiKey = process.env.ARK_API_KEY;
+  if (!apiKey) {
+    console.error('请设置环境变量 ARK_API_KEY');
+    process.exit(1);
+  }
+  requestLLMStream({
+    apiKey,
+    model: 'bot-20250720193048-84fkp',
+    messages: [
+      { role: 'system', content: 'You are a helpful assistant.' },
+      { role: 'user', content: 'Hello!' },
+    ],
+  }).catch(console.error);
+}
+
+export { requestLLMStream }; 
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@ -0,0 +1,116 @@
+// 以流式或非流式方式请求 minimaxi 大模型接口，并打印/返回内容
+
+async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
+  const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`;
+  const reqBody = { ...body, stream };
+  const response = await fetch(url, {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(reqBody),
+  });
+
+  if (!response.ok) {
+    throw new Error(`HTTP error! status: ${response.status}`);
+  }
+
+  if (!stream) {
+    // 非流式，直接返回JSON
+    const result = await response.json();
+    console.log(JSON.stringify(result, null, 2));
+    return result;
+  } else {
+    // 流式，解析每个chunk，合并audio
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder('utf-8');
+    let done = false;
+    let buffer = '';
+    let audioHex = '';
+    let lastFullResult = null;
+
+    while (!done) {
+      const { value, done: doneReading } = await reader.read();
+      done = doneReading;
+      if (value) {
+        const chunk = decoder.decode(value, { stream: true });
+        buffer += chunk;
+        // 处理多条JSON（以\n分割）
+        let lines = buffer.split('\n');
+        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
+        for (const line of lines) {
+          if (!line.trim()) continue;
+          try {
+            const obj = JSON.parse(line);
+            if (obj.data && obj.data.audio) {
+              audioHex += obj.data.audio;
+            }
+            // status=2为最后一个chunk，记录完整结构
+            if (obj.data && obj.data.status === 2) {
+              lastFullResult = obj;
+            }
+            // 实时打印每个chunk
+            console.log('chunk:', JSON.stringify(obj));
+          } catch (e) {
+            console.error('解析chunk失败:', e, line);
+          }
+        }
+      }
+    }
+    // 合成最终结构
+    if (lastFullResult) {
+      lastFullResult.data.audio = audioHex;
+      console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2));
+      return lastFullResult;
+    } else {
+      // 没有完整结构，返回合成的audio
+      return { data: { audio: audioHex } };
+    }
+  }
+}
+
+// 示例用法
+if (require.main === module) {
+  const apiKey = process.env.MINIMAXI_API_KEY;
+  const groupId = process.env.MINIMAXI_GROUP_ID;
+  if (!apiKey || !groupId) {
+    console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID');
+    process.exit(1);
+  }
+  const baseBody = {
+    model: 'speech-02-hd',
+    text: '真正的危险不是计算机开始像人一样思考，而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。',
+    language_boost: 'auto',
+    output_format: 'hex',
+    voice_setting: {
+      voice_id: 'male-qn-qingse',
+      speed: 1,
+      vol: 1,
+      pitch: 0,
+      emotion: 'happy',
+    },
+    audio_setting: {
+      sample_rate: 32000,
+      bitrate: 128000,
+      format: 'mp3',
+    },
+  };
+  // 非流式
+  requestMinimaxi({
+    apiKey,
+    groupId,
+    body: baseBody,
+    stream: false,
+  }).then(() => {
+    // 流式
+    return requestMinimaxi({
+      apiKey,
+      groupId,
+      body: baseBody,
+      stream: true,
+    });
+  }).catch(console.error);
+}
+
+export { requestMinimaxi }; 
--- a/src/video_audio_sync.js
+++ b/src/video_audio_sync.js
@ -0,0 +1,42 @@
+import { requestMinimaxi } from './minimaxi_stream.js';
+
+export async function playVideoWithAudio(videoPath, text) {
+  // 1. 初始化视频播放
+  const video = document.createElement('video');
+  video.src = videoPath;
+  document.body.appendChild(video);
+  
+  // 2. 启动音频合成流
+  const audioStream = await requestMinimaxi({
+    apiKey: process.env.MINIMAXI_API_KEY,
+    groupId: process.env.MINIMAXI_GROUP_ID,
+    body: {
+      model: 'speech-02-hd',
+      text,
+      output_format: 'hex',
+      voice_setting: {
+        voice_id: 'male-qn-qingse',
+        speed: 1
+      }
+    },
+    stream: true
+  });
+
+  // 3. 将音频hex转换为可播放格式
+  const audioCtx = new AudioContext();
+  const audioBuffer = await audioCtx.decodeAudioData(
+    hexToArrayBuffer(audioStream.data.audio)
+  );
+  
+  // 4. 同步播放
+  const source = audioCtx.createBufferSource();
+  source.buffer = audioBuffer;
+  source.connect(audioCtx.destination);
+  
+  video.play();
+  source.start(0);
+}
+
+function hexToArrayBuffer(hex) {
+  // ... hex转ArrayBuffer实现
+}