new commit

This commit is contained in:
Song367 2025-07-23 20:44:00 +08:00
parent 7703a266bf
commit 6f087fe874
4 changed files with 401 additions and 0 deletions

184
src/chat_with_audio.js Normal file
View File

@ -0,0 +1,184 @@
// 用户输入文本后,进行大模型回答,并且合成音频,流式播放
import { requestLLMStream } from './llm_stream.js';
import { requestMinimaxi } from './minimaxi_stream.js';
async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) {
console.log('用户输入:', userInput);
// 1. 请求大模型回答
console.log('\n=== 请求大模型回答 ===');
const llmResponse = await requestLLMStream({
apiKey: llmApiKey,
model: llmModel,
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: userInput },
],
});
// 提取大模型回答内容假设返回的是JSON格式包含content字段
let llmContent = '';
try {
const llmData = JSON.parse(llmResponse);
llmContent = llmData.choices?.[0]?.message?.content || llmResponse;
} catch (e) {
llmContent = llmResponse;
}
console.log('\n=== 大模型回答 ===');
console.log(llmContent);
// 2. 合成音频
console.log('\n=== 开始合成音频 ===');
const audioResult = await requestMinimaxi({
apiKey: minimaxiApiKey,
groupId: minimaxiGroupId,
body: {
model: 'speech-02-hd',
text: llmContent,
stream: true,
language_boost: 'auto',
output_format: 'hex',
voice_setting: {
voice_id: 'male-qn-qingse',
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
},
stream: true,
});
// 3. 流式播放音频
console.log('\n=== 开始流式播放音频 ===');
await playAudioStream(audioResult.data.audio);
return {
userInput,
llmResponse: llmContent,
audioResult,
};
}
// 流式播放音频
async function playAudioStream(audioHex) {
// 将hex转换为ArrayBuffer
const audioBuffer = hexToArrayBuffer(audioHex);
// 创建AudioContext
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
try {
// 解码音频
const audioData = await audioContext.decodeAudioData(audioBuffer);
// 创建音频源
const source = audioContext.createBufferSource();
source.buffer = audioData;
source.connect(audioContext.destination);
// 播放
source.start(0);
console.log('音频播放开始,时长:', audioData.duration, '秒');
// 等待播放完成
return new Promise((resolve) => {
source.onended = () => {
console.log('音频播放完成');
resolve();
};
});
} catch (error) {
console.error('音频播放失败:', error);
throw error;
}
}
// 将hex字符串转换为ArrayBuffer
function hexToArrayBuffer(hex) {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i < hex.length; i += 2) {
bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
}
return bytes.buffer;
}
// 在Node.js环境下的音频播放使用play-sound库
async function playAudioStreamNode(audioHex) {
const fs = require('fs');
const path = require('path');
// 将hex转换为buffer
const audioBuffer = Buffer.from(audioHex, 'hex');
// 保存为临时文件
const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
fs.writeFileSync(tempFile, audioBuffer);
try {
// 使用系统默认播放器播放
const { exec } = require('child_process');
const platform = process.platform;
let command;
if (platform === 'win32') {
command = `start "" "${tempFile}"`;
} else if (platform === 'darwin') {
command = `open "${tempFile}"`;
} else {
command = `xdg-open "${tempFile}"`;
}
exec(command, (error) => {
if (error) {
console.error('播放音频失败:', error);
} else {
console.log('音频播放开始');
}
});
// 等待一段时间后删除临时文件
setTimeout(() => {
if (fs.existsSync(tempFile)) {
fs.unlinkSync(tempFile);
}
}, 10000);
} catch (error) {
console.error('音频播放失败:', error);
throw error;
}
}
// 示例用法
if (require.main === module) {
const llmApiKey = process.env.ARK_API_KEY;
const llmModel = 'bot-20250720193048-84fkp';
const minimaxiApiKey = process.env.MINIMAXI_API_KEY;
const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID;
if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) {
console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID');
process.exit(1);
}
const userInput = process.argv[2] || '你好,请介绍一下人工智能的发展历程';
chatWithAudioStream({
userInput,
llmApiKey,
llmModel,
minimaxiApiKey,
minimaxiGroupId,
}).catch(console.error);
}
export { chatWithAudioStream, playAudioStream, playAudioStreamNode };

59
src/llm_stream.js Normal file
View File

@ -0,0 +1,59 @@
// 以流式方式请求LLM大模型接口并打印流式返回内容
async function requestLLMStream({ apiKey, model, messages }) {
const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model,
stream: true,
stream_options: { include_usage: true },
messages,
}),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let done = false;
let buffer = '';
while (!done) {
const { value, done: doneReading } = await reader.read();
done = doneReading;
if (value) {
const chunk = decoder.decode(value, { stream: true });
buffer += chunk;
// 打印每次收到的内容
process.stdout.write(chunk);
}
}
// 可选:返回完整内容
return buffer;
}
// 示例用法
if (require.main === module) {
const apiKey = process.env.ARK_API_KEY;
if (!apiKey) {
console.error('请设置环境变量 ARK_API_KEY');
process.exit(1);
}
requestLLMStream({
apiKey,
model: 'bot-20250720193048-84fkp',
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: 'Hello!' },
],
}).catch(console.error);
}
export { requestLLMStream };

116
src/minimaxi_stream.js Normal file
View File

@ -0,0 +1,116 @@
// 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容
async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`;
const reqBody = { ...body, stream };
const response = await fetch(url, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(reqBody),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
if (!stream) {
// 非流式直接返回JSON
const result = await response.json();
console.log(JSON.stringify(result, null, 2));
return result;
} else {
// 流式解析每个chunk合并audio
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let done = false;
let buffer = '';
let audioHex = '';
let lastFullResult = null;
while (!done) {
const { value, done: doneReading } = await reader.read();
done = doneReading;
if (value) {
const chunk = decoder.decode(value, { stream: true });
buffer += chunk;
// 处理多条JSON以\n分割
let lines = buffer.split('\n');
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
for (const line of lines) {
if (!line.trim()) continue;
try {
const obj = JSON.parse(line);
if (obj.data && obj.data.audio) {
audioHex += obj.data.audio;
}
// status=2为最后一个chunk记录完整结构
if (obj.data && obj.data.status === 2) {
lastFullResult = obj;
}
// 实时打印每个chunk
console.log('chunk:', JSON.stringify(obj));
} catch (e) {
console.error('解析chunk失败:', e, line);
}
}
}
}
// 合成最终结构
if (lastFullResult) {
lastFullResult.data.audio = audioHex;
console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2));
return lastFullResult;
} else {
// 没有完整结构返回合成的audio
return { data: { audio: audioHex } };
}
}
}
// 示例用法
if (require.main === module) {
const apiKey = process.env.MINIMAXI_API_KEY;
const groupId = process.env.MINIMAXI_GROUP_ID;
if (!apiKey || !groupId) {
console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID');
process.exit(1);
}
const baseBody = {
model: 'speech-02-hd',
text: '真正的危险不是计算机开始像人一样思考,而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。',
language_boost: 'auto',
output_format: 'hex',
voice_setting: {
voice_id: 'male-qn-qingse',
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
};
// 非流式
requestMinimaxi({
apiKey,
groupId,
body: baseBody,
stream: false,
}).then(() => {
// 流式
return requestMinimaxi({
apiKey,
groupId,
body: baseBody,
stream: true,
});
}).catch(console.error);
}
export { requestMinimaxi };

42
src/video_audio_sync.js Normal file
View File

@ -0,0 +1,42 @@
import { requestMinimaxi } from './minimaxi_stream.js';
export async function playVideoWithAudio(videoPath, text) {
// 1. 初始化视频播放
const video = document.createElement('video');
video.src = videoPath;
document.body.appendChild(video);
// 2. 启动音频合成流
const audioStream = await requestMinimaxi({
apiKey: process.env.MINIMAXI_API_KEY,
groupId: process.env.MINIMAXI_GROUP_ID,
body: {
model: 'speech-02-hd',
text,
output_format: 'hex',
voice_setting: {
voice_id: 'male-qn-qingse',
speed: 1
}
},
stream: true
});
// 3. 将音频hex转换为可播放格式
const audioCtx = new AudioContext();
const audioBuffer = await audioCtx.decodeAudioData(
hexToArrayBuffer(audioStream.data.audio)
);
// 4. 同步播放
const source = audioCtx.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioCtx.destination);
video.play();
source.start(0);
}
function hexToArrayBuffer(hex) {
// ... hex转ArrayBuffer实现
}