local initial

This commit is contained in:
Song367 2025-07-25 17:00:26 +08:00
parent 6f087fe874
commit c95e6a2552
9 changed files with 382 additions and 148 deletions

View File

@ -2,62 +2,61 @@
import { requestLLMStream } from './llm_stream.js'; import { requestLLMStream } from './llm_stream.js';
import { requestMinimaxi } from './minimaxi_stream.js'; import { requestMinimaxi } from './minimaxi_stream.js';
import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from './config.js';
async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) { // 防止重复播放的标志
let isPlaying = false;
async function chatWithAudioStream(userInput) {
// 验证配置
if (!validateConfig()) {
throw new Error('配置不完整请检查config.js文件中的API密钥设置');
}
console.log('用户输入:', userInput); console.log('用户输入:', userInput);
// 获取配置
const llmConfig = getLLMConfig();
const minimaxiConfig = getMinimaxiConfig();
const audioConfig = getAudioConfig();
// 1. 请求大模型回答 // 1. 请求大模型回答
console.log('\n=== 请求大模型回答 ==='); console.log('\n=== 请求大模型回答 ===');
const llmResponse = await requestLLMStream({ const llmResponse = await requestLLMStream({
apiKey: llmApiKey, apiKey: llmConfig.apiKey,
model: llmModel, model: llmConfig.model,
messages: [ messages: [
{ role: 'system', content: 'You are a helpful assistant.' }, { role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: userInput }, { role: 'user', content: userInput },
], ],
}); });
// 提取大模型回答内容假设返回的是JSON格式包含content字段 // 提取大模型回答内容(现在直接返回内容)
let llmContent = ''; const llmContent = llmResponse;
try {
const llmData = JSON.parse(llmResponse);
llmContent = llmData.choices?.[0]?.message?.content || llmResponse;
} catch (e) {
llmContent = llmResponse;
}
console.log('\n=== 大模型回答 ==='); console.log('\n=== 大模型回答 ===');
console.log(llmContent); console.log("llmResponse: ", llmContent);
// 2. 合成音频 // 2. 合成音频
console.log('\n=== 开始合成音频 ==='); console.log('\n=== 开始合成音频 ===');
const audioResult = await requestMinimaxi({ const audioResult = await requestMinimaxi({
apiKey: minimaxiApiKey, apiKey: minimaxiConfig.apiKey,
groupId: minimaxiGroupId, groupId: minimaxiConfig.groupId,
body: { body: {
model: 'speech-02-hd', model: audioConfig.model,
text: llmContent, text: llmContent,
stream: true, stream: audioConfig.stream,
language_boost: 'auto', language_boost: audioConfig.language_boost,
output_format: 'hex', output_format: audioConfig.output_format,
voice_setting: { voice_setting: audioConfig.voiceSetting,
voice_id: 'male-qn-qingse', audio_setting: audioConfig.audioSetting,
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
}, },
stream: true, stream: true,
}); });
// 3. 流式播放音频 // 3. 流式播放音频
console.log('\n=== 开始流式播放音频 ==='); console.log('\n=== 开始流式播放音频 ===');
// console.log('音频数据长度:', audioResult.data.audio.length);
await playAudioStream(audioResult.data.audio); await playAudioStream(audioResult.data.audio);
return { return {
@ -69,6 +68,16 @@ async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApi
// 流式播放音频 // 流式播放音频
async function playAudioStream(audioHex) { async function playAudioStream(audioHex) {
if (isPlaying) {
console.log('音频正在播放中,跳过重复播放');
return;
}
console.log('=== 开始播放音频 ===');
console.log('音频数据长度:', audioHex.length);
isPlaying = true;
// 将hex转换为ArrayBuffer // 将hex转换为ArrayBuffer
const audioBuffer = hexToArrayBuffer(audioHex); const audioBuffer = hexToArrayBuffer(audioHex);
@ -93,11 +102,13 @@ async function playAudioStream(audioHex) {
return new Promise((resolve) => { return new Promise((resolve) => {
source.onended = () => { source.onended = () => {
console.log('音频播放完成'); console.log('音频播放完成');
isPlaying = false;
resolve(); resolve();
}; };
}); });
} catch (error) { } catch (error) {
console.error('音频播放失败:', error); console.error('音频播放失败:', error);
isPlaying = false;
throw error; throw error;
} }
} }
@ -113,17 +124,23 @@ function hexToArrayBuffer(hex) {
// 在Node.js环境下的音频播放使用play-sound库 // 在Node.js环境下的音频播放使用play-sound库
async function playAudioStreamNode(audioHex) { async function playAudioStreamNode(audioHex) {
const fs = require('fs'); // 检查是否在Node.js环境中
const path = require('path'); if (typeof window !== 'undefined') {
console.warn('playAudioStreamNode 只能在Node.js环境中使用');
// 将hex转换为buffer return;
const audioBuffer = Buffer.from(audioHex, 'hex'); }
// 保存为临时文件
const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
fs.writeFileSync(tempFile, audioBuffer);
try { try {
const fs = require('fs');
const path = require('path');
// 将hex转换为buffer
const audioBuffer = Buffer.from(audioHex, 'hex');
// 保存为临时文件
const tempFile = path.join(process.cwd(), 'temp_audio.mp3');
fs.writeFileSync(tempFile, audioBuffer);
// 使用系统默认播放器播放 // 使用系统默认播放器播放
const { exec } = require('child_process'); const { exec } = require('child_process');
const platform = process.platform; const platform = process.platform;
@ -158,27 +175,4 @@ async function playAudioStreamNode(audioHex) {
} }
} }
// 示例用法
if (require.main === module) {
const llmApiKey = process.env.ARK_API_KEY;
const llmModel = 'bot-20250720193048-84fkp';
const minimaxiApiKey = process.env.MINIMAXI_API_KEY;
const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID;
if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) {
console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID');
process.exit(1);
}
const userInput = process.argv[2] || '你好,请介绍一下人工智能的发展历程';
chatWithAudioStream({
userInput,
llmApiKey,
llmModel,
minimaxiApiKey,
minimaxiGroupId,
}).catch(console.error);
}
export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; export { chatWithAudioStream, playAudioStream, playAudioStreamNode };

94
src/config.example.js Normal file
View File

@ -0,0 +1,94 @@
// 示例配置文件 - 请复制此文件为 config.js 并填入实际的API密钥
export const config = {
// LLM API配置
llm: {
apiKey: 'your_ark_api_key_here', // 请替换为实际的ARK API密钥
model: 'bot-20250720193048-84fkp',
},
// Minimaxi API配置
minimaxi: {
apiKey: 'your_minimaxi_api_key_here', // 请替换为实际的Minimaxi API密钥
groupId: 'your_minimaxi_group_id_here', // 请替换为实际的Minimaxi Group ID
},
// 音频配置
audio: {
model: 'speech-02-hd',
voiceSetting: {
voice_id: 'yantu-qinggang',
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audioSetting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
},
// 系统配置
system: {
language_boost: 'auto',
output_format: 'hex',
stream: true,
},
};
// 验证配置是否完整
export function validateConfig() {
const requiredFields = [
'llm.apiKey',
'llm.model',
'minimaxi.apiKey',
'minimaxi.groupId'
];
const missingFields = [];
for (const field of requiredFields) {
const keys = field.split('.');
let value = config;
for (const key of keys) {
value = value[key];
if (!value) break;
}
if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') {
missingFields.push(field);
}
}
if (missingFields.length > 0) {
console.warn('配置不完整,请检查以下字段:', missingFields);
return false;
}
return true;
}
// 获取配置的便捷方法
export function getLLMConfig() {
return {
apiKey: config.llm.apiKey,
model: config.llm.model,
};
}
export function getMinimaxiConfig() {
return {
apiKey: config.minimaxi.apiKey,
groupId: config.minimaxi.groupId,
};
}
export function getAudioConfig() {
return {
model: config.audio.model,
voiceSetting: config.audio.voiceSetting,
audioSetting: config.audio.audioSetting,
...config.system,
};
}

94
src/config.js Normal file
View File

@ -0,0 +1,94 @@
// 配置管理文件
export const config = {
// LLM API配置
llm: {
apiKey: 'd012651b-a65b-4b13-8ff3-cc4ff3a29783', // 请替换为实际的API密钥
model: 'bot-20250720193048-84fkp',
},
// Minimaxi API配置
minimaxi: {
apiKey: 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiLkuIrmtbfpopzpgJTnp5HmioDmnInpmZDlhazlj7giLCJVc2VyTmFtZSI6IuadqOmqpSIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxNzI4NzEyMzI0OTc5NjI2ODM5IiwiUGhvbmUiOiIxMzM4MTU1OTYxOCIsIkdyb3VwSUQiOiIxNzI4NzEyMzI0OTcxMjM4MjMxIiwiUGFnZU5hbWUiOiIiLCJNYWlsIjoiIiwiQ3JlYXRlVGltZSI6IjIwMjUtMDYtMTYgMTY6Mjk6NTkiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.D_JF0-nO89NdMZCYq4ocEyqxtZ9SeEdtMvbeSkZTWspt0XfX2QpPAVh-DI3MCPZTeSmjNWLf4fA_Th2zpVrj4UxWMbGKBeLZWLulNpwAHGMUTdqenuih3daCDPCzs0duhlFyQnZgGcEOGQ476HL72N2klujP8BUy_vfAh_Zv0po-aujQa5RxardDSOsbs49NTPEw0SQEXwaJ5bVmiZ5s-ysJ9pZWSEiyJ6SX9z3JeZHKj9DxHdOw5roZR8izo54e4IoqyLlzEfhOMW7P15-ffDH3M6HGiEmeBaGRYGAIciELjZS19ONNMKsTj-wXNGWtKG-sjAB1uuqkkT5Ul9Dunw', // 请替换为实际的API密钥
groupId: '1728712324971238231', // 请替换为实际的Group ID
},
// 音频配置
audio: {
model: 'speech-02-hd',
voiceSetting: {
voice_id: 'yantu-qinggang',
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audioSetting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
},
// 系统配置
system: {
language_boost: 'auto',
output_format: 'hex',
stream: true,
},
};
// 验证配置是否完整
export function validateConfig() {
const requiredFields = [
'llm.apiKey',
'llm.model',
'minimaxi.apiKey',
'minimaxi.groupId'
];
const missingFields = [];
for (const field of requiredFields) {
const keys = field.split('.');
let value = config;
for (const key of keys) {
value = value[key];
if (!value) break;
}
if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') {
missingFields.push(field);
}
}
if (missingFields.length > 0) {
console.warn('配置不完整,请检查以下字段:', missingFields);
return false;
}
return true;
}
// 获取配置的便捷方法
export function getLLMConfig() {
return {
apiKey: config.llm.apiKey,
model: config.llm.model,
};
}
export function getMinimaxiConfig() {
return {
apiKey: config.minimaxi.apiKey,
groupId: config.minimaxi.groupId,
};
}
export function getAudioConfig() {
return {
model: config.audio.model,
voiceSetting: config.audio.voiceSetting,
audioSetting: config.audio.audioSetting,
...config.system,
};
}

26
src/debug_audio.js Normal file
View File

@ -0,0 +1,26 @@
// 调试音频数据
function debugAudioData(audioHex) {
console.log('=== 音频数据调试 ===');
console.log('音频数据长度:', audioHex.length);
console.log('音频数据前100个字符:', audioHex.substring(0, 100));
console.log('音频数据后100个字符:', audioHex.substring(audioHex.length - 100));
// 检查是否有重复模式
const halfLength = Math.floor(audioHex.length / 2);
const firstHalf = audioHex.substring(0, halfLength);
const secondHalf = audioHex.substring(halfLength);
if (firstHalf === secondHalf) {
console.log('⚠️ 警告:音频数据可能是重复的!');
} else {
console.log('✅ 音频数据没有重复');
}
}
// 如果在浏览器环境中运行
if (typeof window !== 'undefined') {
window.debugAudioData = debugAudioData;
console.log('音频调试函数已挂载到 window.debugAudioData');
}
export { debugAudioData };

View File

@ -77,6 +77,6 @@
<video id="remoteVideo" autoplay playsinline style="display: none;"></video> <video id="remoteVideo" autoplay playsinline style="display: none;"></video>
<script src="/socket.io/socket.io.js"></script> <script src="/socket.io/socket.io.js"></script>
<script src="index.js"></script> <script type="module" src="index.js"></script>
</body> </body>
</html> </html>

View File

@ -1,4 +1,6 @@
// WebRTC 音视频通话应用 // WebRTC 音视频通话应用
import { chatWithAudioStream } from './chat_with_audio.js';
class WebRTCChat { class WebRTCChat {
constructor() { constructor() {
this.socket = null; this.socket = null;
@ -582,15 +584,25 @@ class WebRTCChat {
} }
} }
sendText() { async sendText() {
const text = this.textInput.value.trim(); const text = this.textInput.value.trim();
if (text) { if (text) {
this.socket.emit('text-input', { text }); this.socket.emit('text-input', { text });
this.logMessage(`发送文本: ${text}`, 'info'); this.logMessage(`发送文本: ${text}`, 'info');
this.textInput.value = ''; this.textInput.value = '';
// 根据文本查找对应视频并切换 try {
this.handleTextInput(text); // 调用chat_with_audio进行大模型回答和音频合成
this.logMessage('正在处理文本,请稍候...', 'info');
const result = await chatWithAudioStream(text);
this.logMessage(`大模型回答: ${result.llmResponse}`, 'success');
// 根据文本查找对应视频并切换
await this.handleTextInput(text);
} catch (error) {
this.logMessage(`处理文本失败: ${error.message}`, 'error');
console.error('chatWithAudioStream error:', error);
}
} }
} }

View File

@ -6,6 +6,8 @@ async function requestLLMStream({ apiKey, model, messages }) {
headers: { headers: {
'Authorization': `Bearer ${apiKey}`, 'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Accept': 'text/event-stream',
'Cache-Control': 'no-cache',
}, },
body: JSON.stringify({ body: JSON.stringify({
model, model,
@ -23,6 +25,7 @@ async function requestLLMStream({ apiKey, model, messages }) {
const decoder = new TextDecoder('utf-8'); const decoder = new TextDecoder('utf-8');
let done = false; let done = false;
let buffer = ''; let buffer = '';
let content = '';
while (!done) { while (!done) {
const { value, done: doneReading } = await reader.read(); const { value, done: doneReading } = await reader.read();
@ -30,30 +33,43 @@ async function requestLLMStream({ apiKey, model, messages }) {
if (value) { if (value) {
const chunk = decoder.decode(value, { stream: true }); const chunk = decoder.decode(value, { stream: true });
buffer += chunk; buffer += chunk;
// 打印每次收到的内容
process.stdout.write(chunk); // 处理SSE格式的数据
const lines = buffer.split('\n');
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
for (const line of lines) {
if (!line.trim()) continue;
// 检查是否是SSE格式的数据行
if (line.startsWith('data:')) {
const jsonStr = line.substring(5).trim(); // 移除 'data:' 前缀
if (jsonStr === '[DONE]') {
console.log('LLM SSE流结束');
continue;
}
try {
const obj = JSON.parse(jsonStr);
if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
const deltaContent = obj.choices[0].delta.content;
content += deltaContent;
console.log('LLM内容片段:', deltaContent);
}
} catch (e) {
console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
}
} else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
// 忽略SSE的其他字段
continue;
}
}
} }
} }
// 可选:返回完整内容 // 返回完整内容
return buffer; return content;
}
// 示例用法
if (require.main === module) {
const apiKey = process.env.ARK_API_KEY;
if (!apiKey) {
console.error('请设置环境变量 ARK_API_KEY');
process.exit(1);
}
requestLLMStream({
apiKey,
model: 'bot-20250720193048-84fkp',
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: 'Hello!' },
],
}).catch(console.error);
} }
export { requestLLMStream }; export { requestLLMStream };

View File

@ -1,13 +1,15 @@
// 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容 // 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容
async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`; const url = `https://api.minimaxi.com/v1/t2a_v2`;
const reqBody = { ...body, stream }; const reqBody = { ...body, stream };
const response = await fetch(url, { const response = await fetch(url, {
method: 'POST', method: 'POST',
headers: { headers: {
'Authorization': `Bearer ${apiKey}`, 'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Accept': 'text/event-stream',
'Cache-Control': 'no-cache',
}, },
body: JSON.stringify(reqBody), body: JSON.stringify(reqBody),
}); });
@ -36,29 +38,66 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
if (value) { if (value) {
const chunk = decoder.decode(value, { stream: true }); const chunk = decoder.decode(value, { stream: true });
buffer += chunk; buffer += chunk;
// 处理多条JSON以\n分割 // console.log('收到原始chunk:', chunk);
// 处理SSE格式的数据以\n分割
let lines = buffer.split('\n'); let lines = buffer.split('\n');
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次 buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
for (const line of lines) { for (const line of lines) {
if (!line.trim()) continue; if (!line.trim()) continue;
try { // console.log('处理行:', line);
const obj = JSON.parse(line);
if (obj.data && obj.data.audio) { // 检查是否是SSE格式的数据行
audioHex += obj.data.audio; if (line.startsWith('data:')) {
const jsonStr = line.substring(6); // 移除 'data: ' 前缀
// console.log('提取的JSON字符串:', jsonStr);
if (jsonStr.trim() === '[DONE]') {
console.log('SSE流结束');
continue;
} }
// status=2为最后一个chunk记录完整结构
if (obj.data && obj.data.status === 2) { try {
lastFullResult = obj; const obj = JSON.parse(jsonStr);
// 流式解析每个chunk合并audio
if (obj.data && obj.data.audio) {
audioHex += obj.data.audio;
}
// status=2为最后一个chunk记录完整结构
if (obj.data && obj.data.status === 2) {
lastFullResult = obj;
console.log('收到最终状态');
}
// 实时打印每个chunk
console.log('解析成功:', JSON.stringify(obj));
} catch (e) {
console.error('解析SSE数据失败:', e, '原始数据:', jsonStr);
}
} else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
// 忽略SSE的其他字段
console.log('忽略SSE字段:', line);
continue;
} else if (line.trim() && !line.startsWith('data:')) {
// 尝试直接解析兼容非SSE格式但避免重复处理
console.log('尝试直接解析:', line);
try {
const obj = JSON.parse(line);
if (obj.data && obj.data.audio) {
audioHex += obj.data.audio;
}
if (obj.data && obj.data.status === 2) {
lastFullResult = obj;
}
console.log('直接解析成功:', JSON.stringify(obj));
} catch (e) {
console.error('解析chunk失败:', e, line);
} }
// 实时打印每个chunk
console.log('chunk:', JSON.stringify(obj));
} catch (e) {
console.error('解析chunk失败:', e, line);
} }
} }
} }
} }
// 合成最终结构 // 合成最终结构
console.log('音频数据总长度:', audioHex.length);
if (lastFullResult) { if (lastFullResult) {
lastFullResult.data.audio = audioHex; lastFullResult.data.audio = audioHex;
console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2)); console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2));
@ -70,47 +109,4 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
} }
} }
// 示例用法 export { requestMinimaxi };
if (require.main === module) {
const apiKey = process.env.MINIMAXI_API_KEY;
const groupId = process.env.MINIMAXI_GROUP_ID;
if (!apiKey || !groupId) {
console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID');
process.exit(1);
}
const baseBody = {
model: 'speech-02-hd',
text: '真正的危险不是计算机开始像人一样思考,而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。',
language_boost: 'auto',
output_format: 'hex',
voice_setting: {
voice_id: 'male-qn-qingse',
speed: 1,
vol: 1,
pitch: 0,
emotion: 'happy',
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: 'mp3',
},
};
// 非流式
requestMinimaxi({
apiKey,
groupId,
body: baseBody,
stream: false,
}).then(() => {
// 流式
return requestMinimaxi({
apiKey,
groupId,
body: baseBody,
stream: true,
});
}).catch(console.error);
}
export { requestMinimaxi };

View File

@ -1,4 +1,5 @@
import { requestMinimaxi } from './minimaxi_stream.js'; import { requestMinimaxi } from './minimaxi_stream.js';
import { getMinimaxiConfig } from './config.js';
export async function playVideoWithAudio(videoPath, text) { export async function playVideoWithAudio(videoPath, text) {
// 1. 初始化视频播放 // 1. 初始化视频播放
@ -7,15 +8,16 @@ export async function playVideoWithAudio(videoPath, text) {
document.body.appendChild(video); document.body.appendChild(video);
// 2. 启动音频合成流 // 2. 启动音频合成流
const minimaxiConfig = getMinimaxiConfig();
const audioStream = await requestMinimaxi({ const audioStream = await requestMinimaxi({
apiKey: process.env.MINIMAXI_API_KEY, apiKey: minimaxiConfig.apiKey,
groupId: process.env.MINIMAXI_GROUP_ID, groupId: minimaxiConfig.groupId,
body: { body: {
model: 'speech-02-hd', model: 'speech-02-hd',
text, text,
output_format: 'hex', output_format: 'hex', // 流式场景必须使用hex
voice_setting: { voice_setting: {
voice_id: 'male-qn-qingse', voice_id: 'yantu-qinggang',
speed: 1 speed: 1
} }
}, },