video_translate/src/server/videoSubtitleGeneration.test.ts
2026-03-19 22:56:53 +08:00

189 lines
6.3 KiB
TypeScript

import fs from 'fs';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { generateSubtitlesFromVideo } from './videoSubtitleGeneration';
describe('generateSubtitlesFromVideo', () => {
afterEach(() => {
vi.restoreAllMocks();
});
it('passes the configured doubao timeout to fetch', async () => {
vi.spyOn(fs, 'readFileSync').mockReturnValue(Buffer.from('video-bytes'));
const fetchImpl = vi.fn<typeof fetch>(async () =>
new Response(
JSON.stringify({
output: [
{
content: [
{
text: JSON.stringify({
sourceLanguage: 'zh',
subtitles: [
{
originalText: 'hello there',
translatedText: 'Hello',
ttsText: 'Bonjour',
ttsLanguage: 'fr',
startTime: 0,
endTime: 1,
},
],
}),
},
],
},
],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
);
await generateSubtitlesFromVideo({
providerConfig: {
provider: 'doubao',
apiKey: 'ark-key',
model: 'doubao-seed-2-0-pro-260215',
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
timeoutMs: 600000,
},
videoPath: 'clip.mp4',
targetLanguage: 'English',
ttsLanguage: 'fr',
fetchImpl,
} as any);
expect(fetchImpl).toHaveBeenCalledWith(
'https://ark.cn-beijing.volces.com/api/v3/responses',
expect.objectContaining({
method: 'POST',
signal: expect.any(AbortSignal),
}),
);
});
it('uses ark file ids for doubao requests when available', async () => {
const fetchImpl = vi.fn<typeof fetch>(async () =>
new Response(
JSON.stringify({
output: [
{
content: [
{
text: JSON.stringify({
sourceLanguage: 'zh',
subtitles: [
{
originalText: 'hello there',
translatedText: 'Hello',
ttsText: 'Bonjour',
ttsLanguage: 'fr',
startTime: 0,
endTime: 1,
},
],
}),
},
],
},
],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
);
await generateSubtitlesFromVideo({
providerConfig: {
provider: 'doubao',
apiKey: 'ark-key',
model: 'doubao-seed-2-0-pro-260215',
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
timeoutMs: 600000,
},
fileId: 'file-123',
targetLanguage: 'English',
ttsLanguage: 'fr',
fetchImpl,
} as any);
const [, request] = fetchImpl.mock.calls[0] as [string, RequestInit];
const payload = JSON.parse(String(request.body));
expect(payload.input[0].role).toBe('system');
expect(payload.input[0].content[0].type).toBe('input_text');
expect(payload.input[0].content[0].text).toContain('# Role');
expect(payload.input[0].content[0].text).toContain('Voice Selection');
expect(payload.input[0].content[0].text).toContain('# Output Contract');
expect(payload.input[0].content[0].text).toContain('The first character of your response must be {.');
expect(payload.input[0].content[0].text).toContain('The last character of your response must be }.');
expect(payload.input[0].content[0].text).toContain('"ttsText":');
expect(payload.input[0].content[0].text).toContain('"ttsLanguage":');
expect(payload.input[0].content[0].text).toContain('translatedText must always be English');
expect(payload.input[0].content[0].text).toContain('originalText must be a faithful transcription of the actually audible speech');
expect(payload.input[0].content[0].text).toContain('Do not rewrite, summarize, polish, correct, or paraphrase');
expect(payload.input[0].content[0].text).toContain('Do not infer hidden dialogue from context, visuals, plot, or likely meaning');
expect(payload.input[1].role).toBe('user');
expect(payload.input[1].content[0]).toEqual({
type: 'input_video',
file_id: 'file-123',
});
expect(payload.input[1].content[1].type).toBe('input_text');
expect(payload.input[1].content[1].text).toContain('Subtitle language: English');
expect(payload.input[1].content[1].text).toContain('TTS language: fr');
expect(payload.input[1].content[1].text).toContain('Available voices for the TTS language');
});
it('extracts tts fields when doubao returns prose around a fenced payload', async () => {
const fetchImpl = vi.fn<typeof fetch>(async () =>
new Response(
JSON.stringify({
output: [
{
content: [
{
text: 'Here is the JSON result:\n```json\n{"sourceLanguage":"zh","subtitles":[{"originalText":"hello there","translatedText":"Hello","ttsText":"Bonjour","ttsLanguage":"fr","startTime":0,"endTime":1,"speaker":"Speaker 1","voiceId":"male-qn-qingse"}]}\n```\nUse it directly.',
},
],
},
],
}),
{
status: 200,
headers: { 'Content-Type': 'application/json' },
},
),
);
const result = await generateSubtitlesFromVideo({
providerConfig: {
provider: 'doubao',
apiKey: 'ark-key',
model: 'doubao-seed-2-0-pro-260215',
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
timeoutMs: 600000,
},
fileId: 'file-123',
targetLanguage: 'English',
ttsLanguage: 'fr',
fetchImpl,
} as any);
expect(result.sourceLanguage).toBe('zh');
expect(result.subtitles).toHaveLength(1);
expect(result.subtitles[0]).toMatchObject({
originalText: 'hello there',
translatedText: 'Hello',
ttsText: 'Bonjour',
ttsLanguage: 'fr',
speaker: 'Speaker 1',
voiceId: 'male-qn-qingse',
});
});
});