import fs from 'fs'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { generateSubtitlesFromVideo } from './videoSubtitleGeneration'; describe('generateSubtitlesFromVideo', () => { afterEach(() => { vi.restoreAllMocks(); }); it('passes the configured doubao timeout to fetch', async () => { vi.spyOn(fs, 'readFileSync').mockReturnValue(Buffer.from('video-bytes')); const fetchImpl = vi.fn(async () => new Response( JSON.stringify({ output: [ { content: [ { text: JSON.stringify({ sourceLanguage: 'zh', subtitles: [ { originalText: 'hello there', translatedText: 'Hello', ttsText: 'Bonjour', ttsLanguage: 'fr', startTime: 0, endTime: 1, }, ], }), }, ], }, ], }), { status: 200, headers: { 'Content-Type': 'application/json' }, }, ), ); await generateSubtitlesFromVideo({ providerConfig: { provider: 'doubao', apiKey: 'ark-key', model: 'doubao-seed-2-0-pro-260215', baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses', timeoutMs: 600000, }, videoPath: 'clip.mp4', targetLanguage: 'English', ttsLanguage: 'fr', fetchImpl, } as any); expect(fetchImpl).toHaveBeenCalledWith( 'https://ark.cn-beijing.volces.com/api/v3/responses', expect.objectContaining({ method: 'POST', signal: expect.any(AbortSignal), }), ); }); it('uses ark file ids for doubao requests when available', async () => { const fetchImpl = vi.fn(async () => new Response( JSON.stringify({ output: [ { content: [ { text: JSON.stringify({ sourceLanguage: 'zh', subtitles: [ { originalText: 'hello there', translatedText: 'Hello', ttsText: 'Bonjour', ttsLanguage: 'fr', startTime: 0, endTime: 1, }, ], }), }, ], }, ], }), { status: 200, headers: { 'Content-Type': 'application/json' }, }, ), ); await generateSubtitlesFromVideo({ providerConfig: { provider: 'doubao', apiKey: 'ark-key', model: 'doubao-seed-2-0-pro-260215', baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses', timeoutMs: 600000, }, fileId: 'file-123', targetLanguage: 'English', ttsLanguage: 'fr', fetchImpl, } as any); const [, request] = fetchImpl.mock.calls[0] as [string, RequestInit]; const payload = JSON.parse(String(request.body)); expect(payload.input[0].role).toBe('system'); expect(payload.input[0].content[0].type).toBe('input_text'); expect(payload.input[0].content[0].text).toContain('# Role'); expect(payload.input[0].content[0].text).toContain('Voice Selection'); expect(payload.input[0].content[0].text).toContain('# Output Contract'); expect(payload.input[0].content[0].text).toContain('The first character of your response must be {.'); expect(payload.input[0].content[0].text).toContain('The last character of your response must be }.'); expect(payload.input[0].content[0].text).toContain('"ttsText":'); expect(payload.input[0].content[0].text).toContain('"ttsLanguage":'); expect(payload.input[0].content[0].text).toContain('translatedText must always be English'); expect(payload.input[0].content[0].text).toContain('originalText must be a faithful transcription of the actually audible speech'); expect(payload.input[0].content[0].text).toContain('Do not rewrite, summarize, polish, correct, or paraphrase'); expect(payload.input[0].content[0].text).toContain('Do not infer hidden dialogue from context, visuals, plot, or likely meaning'); expect(payload.input[1].role).toBe('user'); expect(payload.input[1].content[0]).toEqual({ type: 'input_video', file_id: 'file-123', }); expect(payload.input[1].content[1].type).toBe('input_text'); expect(payload.input[1].content[1].text).toContain('Subtitle language: English'); expect(payload.input[1].content[1].text).toContain('TTS language: fr'); expect(payload.input[1].content[1].text).toContain('Available voices for the TTS language'); }); it('extracts tts fields when doubao returns prose around a fenced payload', async () => { const fetchImpl = vi.fn(async () => new Response( JSON.stringify({ output: [ { content: [ { text: 'Here is the JSON result:\n```json\n{"sourceLanguage":"zh","subtitles":[{"originalText":"hello there","translatedText":"Hello","ttsText":"Bonjour","ttsLanguage":"fr","startTime":0,"endTime":1,"speaker":"Speaker 1","voiceId":"male-qn-qingse"}]}\n```\nUse it directly.', }, ], }, ], }), { status: 200, headers: { 'Content-Type': 'application/json' }, }, ), ); const result = await generateSubtitlesFromVideo({ providerConfig: { provider: 'doubao', apiKey: 'ark-key', model: 'doubao-seed-2-0-pro-260215', baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses', timeoutMs: 600000, }, fileId: 'file-123', targetLanguage: 'English', ttsLanguage: 'fr', fetchImpl, } as any); expect(result.sourceLanguage).toBe('zh'); expect(result.subtitles).toHaveLength(1); expect(result.subtitles[0]).toMatchObject({ originalText: 'hello there', translatedText: 'Hello', ttsText: 'Bonjour', ttsLanguage: 'fr', speaker: 'Speaker 1', voiceId: 'male-qn-qingse', }); }); });