189 lines
6.3 KiB
TypeScript
189 lines
6.3 KiB
TypeScript
import fs from 'fs';
|
|
import { afterEach, describe, expect, it, vi } from 'vitest';
|
|
import { generateSubtitlesFromVideo } from './videoSubtitleGeneration';
|
|
|
|
describe('generateSubtitlesFromVideo', () => {
|
|
afterEach(() => {
|
|
vi.restoreAllMocks();
|
|
});
|
|
|
|
it('passes the configured doubao timeout to fetch', async () => {
|
|
vi.spyOn(fs, 'readFileSync').mockReturnValue(Buffer.from('video-bytes'));
|
|
const fetchImpl = vi.fn<typeof fetch>(async () =>
|
|
new Response(
|
|
JSON.stringify({
|
|
output: [
|
|
{
|
|
content: [
|
|
{
|
|
text: JSON.stringify({
|
|
sourceLanguage: 'zh',
|
|
subtitles: [
|
|
{
|
|
originalText: 'hello there',
|
|
translatedText: 'Hello',
|
|
ttsText: 'Bonjour',
|
|
ttsLanguage: 'fr',
|
|
startTime: 0,
|
|
endTime: 1,
|
|
},
|
|
],
|
|
}),
|
|
},
|
|
],
|
|
},
|
|
],
|
|
}),
|
|
{
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
},
|
|
),
|
|
);
|
|
|
|
await generateSubtitlesFromVideo({
|
|
providerConfig: {
|
|
provider: 'doubao',
|
|
apiKey: 'ark-key',
|
|
model: 'doubao-seed-2-0-pro-260215',
|
|
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
|
timeoutMs: 600000,
|
|
},
|
|
videoPath: 'clip.mp4',
|
|
targetLanguage: 'English',
|
|
ttsLanguage: 'fr',
|
|
fetchImpl,
|
|
} as any);
|
|
|
|
expect(fetchImpl).toHaveBeenCalledWith(
|
|
'https://ark.cn-beijing.volces.com/api/v3/responses',
|
|
expect.objectContaining({
|
|
method: 'POST',
|
|
signal: expect.any(AbortSignal),
|
|
}),
|
|
);
|
|
});
|
|
|
|
it('uses ark file ids for doubao requests when available', async () => {
|
|
const fetchImpl = vi.fn<typeof fetch>(async () =>
|
|
new Response(
|
|
JSON.stringify({
|
|
output: [
|
|
{
|
|
content: [
|
|
{
|
|
text: JSON.stringify({
|
|
sourceLanguage: 'zh',
|
|
subtitles: [
|
|
{
|
|
originalText: 'hello there',
|
|
translatedText: 'Hello',
|
|
ttsText: 'Bonjour',
|
|
ttsLanguage: 'fr',
|
|
startTime: 0,
|
|
endTime: 1,
|
|
},
|
|
],
|
|
}),
|
|
},
|
|
],
|
|
},
|
|
],
|
|
}),
|
|
{
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
},
|
|
),
|
|
);
|
|
|
|
await generateSubtitlesFromVideo({
|
|
providerConfig: {
|
|
provider: 'doubao',
|
|
apiKey: 'ark-key',
|
|
model: 'doubao-seed-2-0-pro-260215',
|
|
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
|
timeoutMs: 600000,
|
|
},
|
|
fileId: 'file-123',
|
|
targetLanguage: 'English',
|
|
ttsLanguage: 'fr',
|
|
fetchImpl,
|
|
} as any);
|
|
|
|
const [, request] = fetchImpl.mock.calls[0] as [string, RequestInit];
|
|
const payload = JSON.parse(String(request.body));
|
|
|
|
expect(payload.input[0].role).toBe('system');
|
|
expect(payload.input[0].content[0].type).toBe('input_text');
|
|
expect(payload.input[0].content[0].text).toContain('# Role');
|
|
expect(payload.input[0].content[0].text).toContain('Voice Selection');
|
|
expect(payload.input[0].content[0].text).toContain('# Output Contract');
|
|
expect(payload.input[0].content[0].text).toContain('The first character of your response must be {.');
|
|
expect(payload.input[0].content[0].text).toContain('The last character of your response must be }.');
|
|
expect(payload.input[0].content[0].text).toContain('"ttsText":');
|
|
expect(payload.input[0].content[0].text).toContain('"ttsLanguage":');
|
|
expect(payload.input[0].content[0].text).toContain('translatedText must always be English');
|
|
expect(payload.input[0].content[0].text).toContain('originalText must be a faithful transcription of the actually audible speech');
|
|
expect(payload.input[0].content[0].text).toContain('Do not rewrite, summarize, polish, correct, or paraphrase');
|
|
expect(payload.input[0].content[0].text).toContain('Do not infer hidden dialogue from context, visuals, plot, or likely meaning');
|
|
|
|
expect(payload.input[1].role).toBe('user');
|
|
expect(payload.input[1].content[0]).toEqual({
|
|
type: 'input_video',
|
|
file_id: 'file-123',
|
|
});
|
|
expect(payload.input[1].content[1].type).toBe('input_text');
|
|
expect(payload.input[1].content[1].text).toContain('Subtitle language: English');
|
|
expect(payload.input[1].content[1].text).toContain('TTS language: fr');
|
|
expect(payload.input[1].content[1].text).toContain('Available voices for the TTS language');
|
|
});
|
|
|
|
it('extracts tts fields when doubao returns prose around a fenced payload', async () => {
|
|
const fetchImpl = vi.fn<typeof fetch>(async () =>
|
|
new Response(
|
|
JSON.stringify({
|
|
output: [
|
|
{
|
|
content: [
|
|
{
|
|
text: 'Here is the JSON result:\n```json\n{"sourceLanguage":"zh","subtitles":[{"originalText":"hello there","translatedText":"Hello","ttsText":"Bonjour","ttsLanguage":"fr","startTime":0,"endTime":1,"speaker":"Speaker 1","voiceId":"male-qn-qingse"}]}\n```\nUse it directly.',
|
|
},
|
|
],
|
|
},
|
|
],
|
|
}),
|
|
{
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
},
|
|
),
|
|
);
|
|
|
|
const result = await generateSubtitlesFromVideo({
|
|
providerConfig: {
|
|
provider: 'doubao',
|
|
apiKey: 'ark-key',
|
|
model: 'doubao-seed-2-0-pro-260215',
|
|
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
|
timeoutMs: 600000,
|
|
},
|
|
fileId: 'file-123',
|
|
targetLanguage: 'English',
|
|
ttsLanguage: 'fr',
|
|
fetchImpl,
|
|
} as any);
|
|
|
|
expect(result.sourceLanguage).toBe('zh');
|
|
expect(result.subtitles).toHaveLength(1);
|
|
expect(result.subtitles[0]).toMatchObject({
|
|
originalText: 'hello there',
|
|
translatedText: 'Hello',
|
|
ttsText: 'Bonjour',
|
|
ttsLanguage: 'fr',
|
|
speaker: 'Speaker 1',
|
|
voiceId: 'male-qn-qingse',
|
|
});
|
|
});
|
|
});
|