All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 3m58s
166 lines
6.0 KiB
JavaScript
166 lines
6.0 KiB
JavaScript
// 以流式方式请求LLM大模型接口,并打印流式返回内容
|
||
|
||
// 过滤旁白内容的函数
|
||
function filterNarration(text) {
|
||
if (!text) return text;
|
||
|
||
// 匹配各种括号内的旁白内容
|
||
// 包括:()、【】、[]、{}、〈〉、《》等
|
||
const narrationPatterns = [
|
||
/([^)]*)/g, // 中文圆括号
|
||
/\([^)]*\)/g, // 英文圆括号
|
||
/【[^】]*】/g, // 中文方括号
|
||
/\[[^\]]*\]/g, // 英文方括号
|
||
/\{[^}]*\}/g, // 花括号
|
||
/〈[^〉]*〉/g, // 中文尖括号
|
||
/《[^》]*》/g, // 中文书名号
|
||
/<[^>]*>/g // 英文尖括号
|
||
];
|
||
|
||
let filteredText = text;
|
||
|
||
// 逐个应用过滤规则
|
||
narrationPatterns.forEach(pattern => {
|
||
filteredText = filteredText.replace(pattern, '');
|
||
});
|
||
|
||
// 清理多余的空格和换行
|
||
filteredText = filteredText.replace(/\s+/g, ' ').trim();
|
||
|
||
return filteredText;
|
||
}
|
||
|
||
async function requestLLMStream({ apiKey, model, messages, onSegment }) {
|
||
const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Authorization': `Bearer ${apiKey}`,
|
||
'Content-Type': 'application/json',
|
||
'Accept': 'text/event-stream',
|
||
'Cache-Control': 'no-cache',
|
||
},
|
||
body: JSON.stringify({
|
||
model,
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages,
|
||
}),
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP error! status: ${response.status}`);
|
||
}
|
||
|
||
const reader = response.body.getReader();
|
||
const decoder = new TextDecoder('utf-8');
|
||
let done = false;
|
||
let buffer = '';
|
||
let content = '';
|
||
let pendingText = ''; // 待处理的文本片段
|
||
|
||
// 分段分隔符
|
||
const segmentDelimiters = /[,。:;!?,.:;!?]|\.{3,}|……|…/;
|
||
|
||
while (!done) {
|
||
const { value, done: doneReading } = await reader.read();
|
||
done = doneReading;
|
||
if (value) {
|
||
const chunk = decoder.decode(value, { stream: true });
|
||
buffer += chunk;
|
||
|
||
// 处理SSE格式的数据
|
||
const lines = buffer.split('\n');
|
||
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
|
||
|
||
for (const line of lines) {
|
||
if (!line.trim()) continue;
|
||
|
||
// 检查是否是SSE格式的数据行
|
||
if (line.startsWith('data:')) {
|
||
const jsonStr = line.substring(5).trim(); // 移除 'data:' 前缀
|
||
|
||
if (jsonStr === '[DONE]') {
|
||
console.log('LLM SSE流结束');
|
||
// 处理最后的待处理文本(无论长度是否大于5个字)
|
||
if (pendingText.trim() && onSegment) {
|
||
console.log('处理最后的待处理文本:', pendingText.trim());
|
||
// 过滤旁白内容
|
||
const filteredText = filterNarration(pendingText.trim());
|
||
if (filteredText.trim()) {
|
||
console.log('过滤旁白后的最后文本:', filteredText);
|
||
await onSegment(filteredText, true);
|
||
} else {
|
||
console.log('最后的文本被完全过滤,跳过');
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
const obj = JSON.parse(jsonStr);
|
||
if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
|
||
const deltaContent = obj.choices[0].delta.content;
|
||
content += deltaContent;
|
||
pendingText += deltaContent;
|
||
console.log('【未过滤】LLM内容片段:', pendingText);
|
||
|
||
// 先过滤旁白,再检查分段分隔符
|
||
const filteredPendingText = filterNarration(pendingText);
|
||
|
||
// 检查过滤后的文本是否包含分段分隔符
|
||
if (segmentDelimiters.test(filteredPendingText)) {
|
||
// 按分隔符分割已过滤的文本
|
||
const segments = filteredPendingText.split(segmentDelimiters);
|
||
|
||
// 重新组合处理:只处理足够长的完整段落
|
||
let accumulatedText = '';
|
||
let hasProcessed = false;
|
||
|
||
for (let i = 0; i < segments.length - 1; i++) {
|
||
const segment = segments[i].trim();
|
||
if (segment) {
|
||
accumulatedText += segment;
|
||
// 找到分隔符
|
||
const delimiterMatch = filteredPendingText.match(segmentDelimiters);
|
||
if (delimiterMatch) {
|
||
accumulatedText += delimiterMatch[0];
|
||
}
|
||
|
||
// 如果累积文本长度大于5个字,处理它
|
||
if (accumulatedText.length > 8 && onSegment) {
|
||
console.log('【已过滤】检测到完整段落:', accumulatedText);
|
||
// 文本已经过滤过旁白,直接使用
|
||
if (accumulatedText.trim()) {
|
||
console.log('处理过滤后的文本:', accumulatedText);
|
||
await onSegment(accumulatedText, false);
|
||
}
|
||
hasProcessed = true;
|
||
accumulatedText = ''; // 重置
|
||
}
|
||
}
|
||
}
|
||
|
||
// 更新pendingText - 使用原始文本但需要相应调整
|
||
if (hasProcessed) {
|
||
// 计算已处理的原始文本长度,更新pendingText
|
||
const processedLength = pendingText.length - (segments[segments.length - 1] || '').length;
|
||
pendingText = pendingText.substring(processedLength);
|
||
}
|
||
}
|
||
}
|
||
} catch (e) {
|
||
console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
|
||
}
|
||
} else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
|
||
// 忽略SSE的其他字段
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 返回完整内容
|
||
return content;
|
||
}
|
||
|
||
export { requestLLMStream }; |