WebRtc_QingGan/src/llm_stream.js
songjvcheng 2d08a3963f
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 3m58s
llm 旁白解决
2025-08-13 10:09:52 +08:00

166 lines
6.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 以流式方式请求LLM大模型接口并打印流式返回内容
// 过滤旁白内容的函数
function filterNarration(text) {
if (!text) return text;
// 匹配各种括号内的旁白内容
// 包括:()、【】、[]、{}、〈〉、《》等
const narrationPatterns = [
/[^]*/g, // 中文圆括号
/\([^)]*\)/g, // 英文圆括号
/【[^】]*】/g, // 中文方括号
/\[[^\]]*\]/g, // 英文方括号
/\{[^}]*\}/g, // 花括号
/〈[^〉]*〉/g, // 中文尖括号
/《[^》]*》/g, // 中文书名号
/<[^>]*>/g // 英文尖括号
];
let filteredText = text;
// 逐个应用过滤规则
narrationPatterns.forEach(pattern => {
filteredText = filteredText.replace(pattern, '');
});
// 清理多余的空格和换行
filteredText = filteredText.replace(/\s+/g, ' ').trim();
return filteredText;
}
async function requestLLMStream({ apiKey, model, messages, onSegment }) {
const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'Accept': 'text/event-stream',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
model,
stream: true,
stream_options: { include_usage: true },
messages,
}),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let done = false;
let buffer = '';
let content = '';
let pendingText = ''; // 待处理的文本片段
// 分段分隔符
const segmentDelimiters = /[,。:;!?,.:;!?]|\.{3,}|……|…/;
while (!done) {
const { value, done: doneReading } = await reader.read();
done = doneReading;
if (value) {
const chunk = decoder.decode(value, { stream: true });
buffer += chunk;
// 处理SSE格式的数据
const lines = buffer.split('\n');
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
for (const line of lines) {
if (!line.trim()) continue;
// 检查是否是SSE格式的数据行
if (line.startsWith('data:')) {
const jsonStr = line.substring(5).trim(); // 移除 'data:' 前缀
if (jsonStr === '[DONE]') {
console.log('LLM SSE流结束');
// 处理最后的待处理文本无论长度是否大于5个字
if (pendingText.trim() && onSegment) {
console.log('处理最后的待处理文本:', pendingText.trim());
// 过滤旁白内容
const filteredText = filterNarration(pendingText.trim());
if (filteredText.trim()) {
console.log('过滤旁白后的最后文本:', filteredText);
await onSegment(filteredText, true);
} else {
console.log('最后的文本被完全过滤,跳过');
}
}
continue;
}
try {
const obj = JSON.parse(jsonStr);
if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
const deltaContent = obj.choices[0].delta.content;
content += deltaContent;
pendingText += deltaContent;
console.log('【未过滤】LLM内容片段:', pendingText);
// 先过滤旁白,再检查分段分隔符
const filteredPendingText = filterNarration(pendingText);
// 检查过滤后的文本是否包含分段分隔符
if (segmentDelimiters.test(filteredPendingText)) {
// 按分隔符分割已过滤的文本
const segments = filteredPendingText.split(segmentDelimiters);
// 重新组合处理:只处理足够长的完整段落
let accumulatedText = '';
let hasProcessed = false;
for (let i = 0; i < segments.length - 1; i++) {
const segment = segments[i].trim();
if (segment) {
accumulatedText += segment;
// 找到分隔符
const delimiterMatch = filteredPendingText.match(segmentDelimiters);
if (delimiterMatch) {
accumulatedText += delimiterMatch[0];
}
// 如果累积文本长度大于5个字处理它
if (accumulatedText.length > 8 && onSegment) {
console.log('【已过滤】检测到完整段落:', accumulatedText);
// 文本已经过滤过旁白,直接使用
if (accumulatedText.trim()) {
console.log('处理过滤后的文本:', accumulatedText);
await onSegment(accumulatedText, false);
}
hasProcessed = true;
accumulatedText = ''; // 重置
}
}
}
// 更新pendingText - 使用原始文本但需要相应调整
if (hasProcessed) {
// 计算已处理的原始文本长度更新pendingText
const processedLength = pendingText.length - (segments[segments.length - 1] || '').length;
pendingText = pendingText.substring(processedLength);
}
}
}
} catch (e) {
console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
}
} else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
// 忽略SSE的其他字段
continue;
}
}
}
}
// 返回完整内容
return content;
}
export { requestLLMStream };