new commit

2025-07-23 20:44:00 +08:00 · 2025-07-23 20:44:00 +08:00 · 6f087fe874
commit 6f087fe874
parent 7703a266bf
4 changed files with 401 additions and 0 deletions
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@ -0,0 +1,184 @@
 // 用户输入文本后，进行大模型回答，并且合成音频，流式播放
 import { requestLLMStream } from './llm_stream.js';
 import { requestMinimaxi } from './minimaxi_stream.js';
 async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) {
  console.log('用户输入:', userInput);
  // 1. 请求大模型回答
  console.log('\n=== 请求大模型回答 ===');
  const llmResponse = await requestLLMStream({
    apiKey: llmApiKey,
    model: llmModel,
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: userInput },
    ],
  });
  // 提取大模型回答内容（假设返回的是JSON格式，包含content字段）
  let llmContent = '';
  try {
    const llmData = JSON.parse(llmResponse);
    llmContent = llmData.choices?.[0]?.message?.content || llmResponse;
  } catch (e) {
    llmContent = llmResponse;
  }
  console.log('\n=== 大模型回答 ===');
  console.log(llmContent);
  // 2. 合成音频
  console.log('\n=== 开始合成音频 ===');
  const audioResult = await requestMinimaxi({
    apiKey: minimaxiApiKey,
    groupId: minimaxiGroupId,
    body: {
      model: 'speech-02-hd',
      text: llmContent,
      stream: true,
      language_boost: 'auto',
      output_format: 'hex',
      voice_setting: {
        voice_id: 'male-qn-qingse',
        speed: 1,
        vol: 1,
        pitch: 0,
        emotion: 'happy',
      },
      audio_setting: {
        sample_rate: 32000,
        bitrate: 128000,
        format: 'mp3',
      },
    },
    stream: true,
  });
  // 3. 流式播放音频
  console.log('\n=== 开始流式播放音频 ===');
  await playAudioStream(audioResult.data.audio);
  return {
    userInput,
    llmResponse: llmContent,
    audioResult,
  };
 }
 // 流式播放音频
 async function playAudioStream(audioHex) {
  // 将hex转换为ArrayBuffer
  const audioBuffer = hexToArrayBuffer(audioHex);
  // 创建AudioContext
  const audioContext = new (window.AudioContext || window.webkitAudioContext)();
  try {
    // 解码音频
    const audioData = await audioContext.decodeAudioData(audioBuffer);
    // 创建音频源
    const source = audioContext.createBufferSource();
    source.buffer = audioData;
    source.connect(audioContext.destination);
    // 播放
    source.start(0);
    console.log('音频播放开始，时长:', audioData.duration, '秒');
    // 等待播放完成
    return new Promise((resolve) => {
      source.onended = () => {
        console.log('音频播放完成');
        resolve();
      };
    });
  } catch (error) {
    console.error('音频播放失败:', error);
    throw error;
  }
 }
 // 将hex字符串转换为ArrayBuffer
 function hexToArrayBuffer(hex) {
  const bytes = new Uint8Array(hex.length / 2);
  for (let i = 0; i < hex.length; i += 2) {
    bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
  }
  return bytes.buffer;
 }
 // 在Node.js环境下的音频播放（使用play-sound库）
 async function playAudioStreamNode(audioHex) {
  const fs = require('fs');
  const path = require('path');
  // 将hex转换为buffer
  const audioBuffer = Buffer.from(audioHex, 'hex');
  // 保存为临时文件
  const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
  fs.writeFileSync(tempFile, audioBuffer);
  try {
    // 使用系统默认播放器播放
    const { exec } = require('child_process');
    const platform = process.platform;
    let command;
    if (platform === 'win32') {
      command = `start "" "${tempFile}"`;
    } else if (platform === 'darwin') {
      command = `open "${tempFile}"`;
    } else {
      command = `xdg-open "${tempFile}"`;
    }
    exec(command, (error) => {
      if (error) {
        console.error('播放音频失败:', error);
      } else {
        console.log('音频播放开始');
      }
    });
    // 等待一段时间后删除临时文件
    setTimeout(() => {
      if (fs.existsSync(tempFile)) {
        fs.unlinkSync(tempFile);
      }
    }, 10000);
  } catch (error) {
    console.error('音频播放失败:', error);
    throw error;
  }
 }
 // 示例用法
 if (require.main === module) {
  const llmApiKey = process.env.ARK_API_KEY;
  const llmModel = 'bot-20250720193048-84fkp';
  const minimaxiApiKey = process.env.MINIMAXI_API_KEY;
  const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID;
  if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) {
    console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID');
    process.exit(1);
  }
  const userInput = process.argv[2] || '你好，请介绍一下人工智能的发展历程';
  chatWithAudioStream({
    userInput,
    llmApiKey,
    llmModel,
    minimaxiApiKey,
    minimaxiGroupId,
  }).catch(console.error);
 }
 export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; 
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@ -0,0 +1,59 @@
 // 以流式方式请求LLM大模型接口，并打印流式返回内容
 async function requestLLMStream({ apiKey, model, messages }) {
  const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      model,
      stream: true,
      stream_options: { include_usage: true },
      messages,
    }),
  });
  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }
  const reader = response.body.getReader();
  const decoder = new TextDecoder('utf-8');
  let done = false;
  let buffer = '';
  while (!done) {
    const { value, done: doneReading } = await reader.read();
    done = doneReading;
    if (value) {
      const chunk = decoder.decode(value, { stream: true });
      buffer += chunk;
      // 打印每次收到的内容
      process.stdout.write(chunk);
    }
  }
  // 可选：返回完整内容
  return buffer;
 }
 // 示例用法
 if (require.main === module) {
  const apiKey = process.env.ARK_API_KEY;
  if (!apiKey) {
    console.error('请设置环境变量 ARK_API_KEY');
    process.exit(1);
  }
  requestLLMStream({
    apiKey,
    model: 'bot-20250720193048-84fkp',
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: 'Hello!' },
    ],
  }).catch(console.error);
 }
 export { requestLLMStream }; 
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@ -0,0 +1,116 @@
 // 以流式或非流式方式请求 minimaxi 大模型接口，并打印/返回内容
 async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
  const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`;
  const reqBody = { ...body, stream };
  const response = await fetch(url, {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify(reqBody),
  });
  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`);
  }
  if (!stream) {
    // 非流式，直接返回JSON
    const result = await response.json();
    console.log(JSON.stringify(result, null, 2));
    return result;
  } else {
    // 流式，解析每个chunk，合并audio
    const reader = response.body.getReader();
    const decoder = new TextDecoder('utf-8');
    let done = false;
    let buffer = '';
    let audioHex = '';
    let lastFullResult = null;
    while (!done) {
      const { value, done: doneReading } = await reader.read();
      done = doneReading;
      if (value) {
        const chunk = decoder.decode(value, { stream: true });
        buffer += chunk;
        // 处理多条JSON（以\n分割）
        let lines = buffer.split('\n');
        buffer = lines.pop(); // 最后一行可能是不完整的，留到下次
        for (const line of lines) {
          if (!line.trim()) continue;
          try {
            const obj = JSON.parse(line);
            if (obj.data && obj.data.audio) {
              audioHex += obj.data.audio;
            }
            // status=2为最后一个chunk，记录完整结构
            if (obj.data && obj.data.status === 2) {
              lastFullResult = obj;
            }
            // 实时打印每个chunk
            console.log('chunk:', JSON.stringify(obj));
          } catch (e) {
            console.error('解析chunk失败:', e, line);
          }
        }
      }
    }
    // 合成最终结构
    if (lastFullResult) {
      lastFullResult.data.audio = audioHex;
      console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2));
      return lastFullResult;
    } else {
      // 没有完整结构，返回合成的audio
      return { data: { audio: audioHex } };
    }
  }
 }
 // 示例用法
 if (require.main === module) {
  const apiKey = process.env.MINIMAXI_API_KEY;
  const groupId = process.env.MINIMAXI_GROUP_ID;
  if (!apiKey || !groupId) {
    console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID');
    process.exit(1);
  }
  const baseBody = {
    model: 'speech-02-hd',
    text: '真正的危险不是计算机开始像人一样思考，而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。',
    language_boost: 'auto',
    output_format: 'hex',
    voice_setting: {
      voice_id: 'male-qn-qingse',
      speed: 1,
      vol: 1,
      pitch: 0,
      emotion: 'happy',
    },
    audio_setting: {
      sample_rate: 32000,
      bitrate: 128000,
      format: 'mp3',
    },
  };
  // 非流式
  requestMinimaxi({
    apiKey,
    groupId,
    body: baseBody,
    stream: false,
  }).then(() => {
    // 流式
    return requestMinimaxi({
      apiKey,
      groupId,
      body: baseBody,
      stream: true,
    });
  }).catch(console.error);
 }
 export { requestMinimaxi }; 
--- a/src/video_audio_sync.js
+++ b/src/video_audio_sync.js
@ -0,0 +1,42 @@
 import { requestMinimaxi } from './minimaxi_stream.js';
 export async function playVideoWithAudio(videoPath, text) {
  // 1. 初始化视频播放
  const video = document.createElement('video');
  video.src = videoPath;
  document.body.appendChild(video);
  // 2. 启动音频合成流
  const audioStream = await requestMinimaxi({
    apiKey: process.env.MINIMAXI_API_KEY,
    groupId: process.env.MINIMAXI_GROUP_ID,
    body: {
      model: 'speech-02-hd',
      text,
      output_format: 'hex',
      voice_setting: {
        voice_id: 'male-qn-qingse',
        speed: 1
      }
    },
    stream: true
  });
  // 3. 将音频hex转换为可播放格式
  const audioCtx = new AudioContext();
  const audioBuffer = await audioCtx.decodeAudioData(
    hexToArrayBuffer(audioStream.data.audio)
  );
  // 4. 同步播放
  const source = audioCtx.createBufferSource();
  source.buffer = audioBuffer;
  source.connect(audioCtx.destination);
  video.play();
  source.start(0);
 }
 function hexToArrayBuffer(hex) {
  // ... hex转ArrayBuffer实现
 }