This commit is contained in:
parent
9ddcdc9ec6
commit
a0c1dc6ad5
2
.env
2
.env
@ -3,3 +3,5 @@ ARK_API_KEY="e96194a9-8eda-4a90-a211-6db288045bdc"
|
||||
MINIMAX_API_KEY="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiLkuIrmtbfpopzpgJTnp5HmioDmnInpmZDlhazlj7giLCJVc2VyTmFtZSI6IuadqOmqpSIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxNzI4NzEyMzI0OTc5NjI2ODM5IiwiUGhvbmUiOiIxMzM4MTU1OTYxOCIsIkdyb3VwSUQiOiIxNzI4NzEyMzI0OTcxMjM4MjMxIiwiUGFnZU5hbWUiOiIiLCJNYWlsIjoiIiwiQ3JlYXRlVGltZSI6IjIwMjUtMDYtMDYgMTU6MDU6NTUiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.aw1AUJnBYxXerJ4qNUaXM3DqPTd94WSVHWRiIpnjImhuCia3Ta1AyANTQTx__2CF5eByHOaHJFHhBCg6KgHUEaR6TiWFn0fWwXaU7XgnHwbvD4pNAmF_uYxMKbi-a6IyIGNyFdEMy22V5JEqfY4okAco5U96cnSOQZH7lyIBpvOsesjZU6L9q6Tf2jvlcnO9QG8GPg2DVpeL8Q3zLuYWezN4Wk6N-ISwQmZUwBYL3BhYamsFqCdSEyMd_uYQ_aQJa5tmlQqpimtALiutFshPUXB6VsvXEO6q-lCZ6Tg8QWwlFHkmEtUMQw4pWoX25d7Us06VFUhvV6pOzvM7yqCaWw"
|
||||
VITE_BASE_URL=/video_translate/
|
||||
VITE_API_BASE_PATH=/video_translate/api
|
||||
DOUBAO_TIMEOUT_MS=900000
|
||||
VITE_ARK_API_KEY="e96194a9-8eda-4a90-a211-6db288045bdc"
|
||||
|
||||
12
.env.example
12
.env.example
@ -4,6 +4,10 @@ GEMINI_API_KEY="MY_GEMINI_API_KEY"
|
||||
# ARK_API_KEY: Required when the editor LLM is set to Doubao.
|
||||
ARK_API_KEY="YOUR_ARK_API_KEY"
|
||||
|
||||
# VITE_ARK_API_KEY: Required only if the browser uploads videos directly to Ark Files API.
|
||||
# This exposes the key to the frontend and should only be used in trusted environments.
|
||||
# VITE_ARK_API_KEY="YOUR_ARK_API_KEY"
|
||||
|
||||
# DEFAULT_LLM_PROVIDER: Optional editor default. Supported values: doubao, gemini.
|
||||
# Defaults to doubao.
|
||||
DEFAULT_LLM_PROVIDER="doubao"
|
||||
@ -12,11 +16,19 @@ DEFAULT_LLM_PROVIDER="doubao"
|
||||
# Defaults to doubao-seed-2-0-pro-260215.
|
||||
DOUBAO_MODEL="doubao-seed-2-0-pro-260215"
|
||||
|
||||
# DOUBAO_TIMEOUT_MS: Optional timeout for Doubao subtitle requests in milliseconds.
|
||||
# Defaults to 600000 (10 minutes).
|
||||
# DOUBAO_TIMEOUT_MS="600000"
|
||||
|
||||
# VITE_API_BASE_PATH: Optional frontend API base path.
|
||||
# Defaults to /api.
|
||||
# Set to /video_translate/api when the app is served under /video_translate.
|
||||
# VITE_API_BASE_PATH="/video_translate/api"
|
||||
|
||||
# VITE_ALLOWED_HOSTS: Optional comma-separated hostnames allowed by the Vite dev server.
|
||||
# Useful when exposing the dev server through a tunnel such as cpolar.
|
||||
# VITE_ALLOWED_HOSTS="ced4302.r20.vip.cpolar.cn"
|
||||
|
||||
# MINIMAX_API_KEY: Required for MiniMax TTS API calls.
|
||||
# Use a MiniMax API secret key that has TTS access enabled.
|
||||
MINIMAX_API_KEY="YOUR_MINIMAX_API_KEY"
|
||||
|
||||
62
docs/plans/2026-03-18-ubuntu-start-script.md
Normal file
62
docs/plans/2026-03-18-ubuntu-start-script.md
Normal file
@ -0,0 +1,62 @@
|
||||
# Ubuntu Start Script Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Add Ubuntu development scripts that can start the app in the background and stop it later without requiring the caller to `cd` first.
|
||||
|
||||
**Architecture:** Keep Bash entrypoints in the repository root. `start-dev.sh` resolves the project directory, creates a `run/` working area, launches `npm run dev` in a dedicated process group, and records the group leader PID and log path. `stop.sh` reads the recorded PID, stops the whole process group, and removes stale state.
|
||||
|
||||
**Tech Stack:** Bash, npm
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add Ubuntu start and stop scripts
|
||||
|
||||
**Files:**
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\start-dev.sh`
|
||||
- Create: `E:\Downloads\ai-video-dubbing-&-translation\stop.sh`
|
||||
- Create: `E:\Downloads\ai-video-dubbing-&-translation\docs\plans\2026-03-18-ubuntu-start-script.md`
|
||||
|
||||
**Step 1: Define the verification target**
|
||||
|
||||
Run: `bash -n ./start-dev.sh`
|
||||
Expected: exit code 0 after the script is updated
|
||||
|
||||
Run: `bash -n ./stop.sh`
|
||||
Expected: exit code 0 after the script is added
|
||||
|
||||
**Step 2: Write the minimal implementation**
|
||||
|
||||
Update `start-dev.sh` so it:
|
||||
- uses `#!/usr/bin/env bash`
|
||||
- enables `set -euo pipefail`
|
||||
- resolves the script directory
|
||||
- changes into that directory
|
||||
- creates `run/`
|
||||
- starts `npm run dev` in the background as its own process group
|
||||
- writes the process id to `run/dev.pid`
|
||||
- writes logs to `run/dev.log`
|
||||
- refuses to start a second copy if the PID is still alive
|
||||
|
||||
Create `stop.sh` so it:
|
||||
- resolves the script directory
|
||||
- reads `run/dev.pid`
|
||||
- sends `TERM` to the whole process group if it is running
|
||||
- waits briefly and escalates to `KILL` only if needed
|
||||
- removes stale `run/dev.pid`
|
||||
|
||||
**Step 3: Run syntax verification**
|
||||
|
||||
Run: `bash -n ./start-dev.sh`
|
||||
Expected: exit code 0 with no syntax errors
|
||||
|
||||
Run: `bash -n ./stop.sh`
|
||||
Expected: exit code 0 with no syntax errors
|
||||
|
||||
**Step 4: Run an execution smoke check**
|
||||
|
||||
Run: `bash ./start-dev.sh`
|
||||
Expected: npm starts the development server in the background and prints the PID/log location
|
||||
|
||||
Run: `bash ./stop.sh`
|
||||
Expected: the background dev process stops and the PID file is removed
|
||||
156
docs/plans/2026-03-19-doubao-file-id-frontend-design.md
Normal file
156
docs/plans/2026-03-19-doubao-file-id-frontend-design.md
Normal file
@ -0,0 +1,156 @@
|
||||
# Doubao Frontend File ID Upload Design
|
||||
|
||||
**Goal:** Let the browser upload videos to Volcengine Ark Files API, then send the returned `file_id` to this app's backend so Doubao subtitle generation can use `Responses API` with `file_id` instead of inline base64 video payloads.
|
||||
|
||||
## Context
|
||||
|
||||
The current subtitle flow uploads the full video to this app's backend, then the backend reads the file and sends a `data:video/mp4;base64,...` payload to Doubao. That works for smaller files, but it inherits request body size limits and repeats the full video upload on every subtitle generation request.
|
||||
|
||||
The user wants a staged flow:
|
||||
|
||||
1. Frontend uploads the selected video directly to Ark Files API.
|
||||
2. Frontend receives a `file_id`.
|
||||
3. Frontend calls this app's `/api/generate-subtitles` endpoint with that `file_id`.
|
||||
4. Backend keeps ownership of the Doubao `Responses API` request, logging, normalization, and subtitle result shaping.
|
||||
|
||||
## Approaches Considered
|
||||
|
||||
### Option A: Frontend uploads to Files API, backend uses `file_id` for Doubao
|
||||
|
||||
This keeps the current app architecture mostly intact. Only the upload stage moves to the browser. The backend still handles provider selection, subtitle parsing, error mapping, and normalized response shaping.
|
||||
|
||||
**Pros**
|
||||
- Smallest architectural change
|
||||
- Keeps existing backend logging and response normalization
|
||||
- Preserves the existing `/api/generate-subtitles` contract with a backward-compatible extension
|
||||
- Allows a gradual rollout because base64 upload can remain as fallback
|
||||
|
||||
**Cons**
|
||||
- Frontend gains Ark-specific upload logic
|
||||
- The browser now coordinates two network calls for Doubao
|
||||
|
||||
### Option B: Frontend uploads to Files API and also calls Doubao `Responses API`
|
||||
|
||||
This removes backend involvement for Doubao subtitle generation, but it pushes subtitle parsing and normalization into the browser.
|
||||
|
||||
**Pros**
|
||||
- Shorter network path for Doubao
|
||||
|
||||
**Cons**
|
||||
- Large frontend refactor
|
||||
- Duplicates provider logic across frontend and backend
|
||||
- Loses centralized logging and error handling
|
||||
- Makes Gemini and Doubao flows diverge more sharply
|
||||
|
||||
### Recommendation
|
||||
|
||||
Use **Option A**. It solves the request-size problem without discarding the backend subtitle pipeline that already exists.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Frontend
|
||||
|
||||
Add a small Ark upload helper that:
|
||||
|
||||
1. Accepts the selected `File`
|
||||
2. Sends `FormData` to `https://ark.cn-beijing.volces.com/api/v3/files`
|
||||
3. Includes:
|
||||
- `purpose=user_data`
|
||||
- `file=@<video>`
|
||||
- `preprocess_configs[video][fps]=1`
|
||||
4. Reads the response JSON and returns the Ark `file_id`
|
||||
|
||||
`generateSubtitlePipeline(...)` will gain an optional `options` object. When the provider is `doubao`, it will:
|
||||
|
||||
1. Upload the file to Ark
|
||||
2. Call this app's `/api/generate-subtitles` with `fileId`, `provider`, `targetLanguage`, and optional `trimRange`
|
||||
|
||||
For `gemini`, it will keep the current multipart upload path unchanged.
|
||||
|
||||
### Backend
|
||||
|
||||
The `/api/generate-subtitles` endpoint will support two request shapes:
|
||||
|
||||
1. Existing multipart upload with `video`
|
||||
2. New JSON or urlencoded body with `fileId`
|
||||
|
||||
The subtitle request parser will be extended to accept optional `fileId`.
|
||||
|
||||
The video subtitle generation pipeline will accept either:
|
||||
|
||||
1. `videoPath`
|
||||
2. `fileId`
|
||||
|
||||
For Doubao:
|
||||
|
||||
- If `fileId` is present, send:
|
||||
- `type: "input_video"`
|
||||
- `file_id: "<ark-file-id>"`
|
||||
- `fps: 1`
|
||||
- If `fileId` is absent, preserve the current base64 fallback path
|
||||
|
||||
For Gemini:
|
||||
|
||||
- Continue requiring a local uploaded file path
|
||||
- Return a clear error if Gemini is requested without `video`
|
||||
|
||||
## Data Flow
|
||||
|
||||
### Doubao Path
|
||||
|
||||
1. User selects video in the browser
|
||||
2. `EditorScreen` triggers subtitle generation
|
||||
3. Frontend uploads the `File` to Ark Files API
|
||||
4. Frontend receives `file_id`
|
||||
5. Frontend posts `fileId` to `/api/generate-subtitles`
|
||||
6. Backend resolves Doubao provider config
|
||||
7. Backend calls Ark `Responses API` with `file_id`
|
||||
8. Backend parses and normalizes subtitle JSON
|
||||
9. Frontend renders normalized subtitles
|
||||
|
||||
### Gemini Path
|
||||
|
||||
1. User selects video in the browser
|
||||
2. Frontend posts multipart form data with `video`
|
||||
3. Backend sends inline video bytes to Gemini as today
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Frontend Upload Errors
|
||||
|
||||
If Ark Files API fails, the frontend should surface a direct upload error and avoid calling this app's backend. The user should see the upstream message when possible.
|
||||
|
||||
### Backend Request Validation
|
||||
|
||||
The backend should reject requests when:
|
||||
|
||||
- Neither `video` nor `fileId` is provided
|
||||
- `targetLanguage` is missing
|
||||
- `gemini` is requested with `fileId` only
|
||||
|
||||
### Provider-Specific Behavior
|
||||
|
||||
- `doubao + fileId` uses the new Ark file reference path
|
||||
- `doubao + video` remains supported as fallback
|
||||
- `gemini + video` remains unchanged
|
||||
- `gemini + fileId` returns a clear validation error
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Frontend
|
||||
|
||||
- Unit test Ark file upload helper request shape
|
||||
- Unit test `generateSubtitlePipeline` uses `fileId` for Doubao and skips multipart video upload to this app's backend
|
||||
- Unit test `generateSubtitlePipeline` keeps multipart upload for Gemini
|
||||
- UI test `EditorScreen` still passes the selected provider through subtitle generation
|
||||
|
||||
### Backend
|
||||
|
||||
- Unit test subtitle request parsing with `fileId`
|
||||
- Unit test Doubao video generation uses `file_id` when present
|
||||
- Unit test base64 fallback remains intact
|
||||
- Unit test Gemini path rejects `fileId`-only requests
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
Keep the base64 Doubao fallback during this change. That makes the new flow additive instead of a risky cutover and keeps local tests simpler while the frontend upload path settles.
|
||||
172
docs/plans/2026-03-19-doubao-file-id-frontend.md
Normal file
172
docs/plans/2026-03-19-doubao-file-id-frontend.md
Normal file
@ -0,0 +1,172 @@
|
||||
# Doubao Frontend File ID Upload Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Allow the frontend to upload a video to Ark Files API, receive a `file_id`, and send that `file_id` to this app's backend so Doubao subtitle generation uses `Responses API` file references instead of inline base64 video payloads.
|
||||
|
||||
**Architecture:** Add a frontend Ark upload helper plus a backward-compatible extension to the subtitle request contract. The backend will accept either an uploaded `video` file or a `fileId`, and the Doubao path will prefer `file_id` while Gemini stays on the current multipart upload flow.
|
||||
|
||||
**Tech Stack:** React, TypeScript, Express, Vitest, Fetch API, Volcengine Ark Files API, Volcengine Ark Responses API
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add failing frontend tests for the new Doubao request flow
|
||||
|
||||
**Files:**
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\src\services\subtitleService.test.ts`
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add tests that verify:
|
||||
- Doubao first uploads the file to Ark Files API and then posts `fileId` to `/api/generate-subtitles`
|
||||
- Gemini still uploads multipart form data with `video`
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `npm.cmd run test -- src/services/subtitleService.test.ts`
|
||||
Expected: FAIL because the service does not yet upload to Ark or send `fileId`
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Update the frontend subtitle service to support an Ark upload step and dual request modes.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `npm.cmd run test -- src/services/subtitleService.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/services/subtitleService.test.ts src/services/subtitleService.ts
|
||||
git commit -m "feat: add frontend doubao file id upload flow"
|
||||
```
|
||||
|
||||
### Task 2: Add failing backend tests for `fileId` parsing and Doubao request shape
|
||||
|
||||
**Files:**
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\src\server\subtitleRequest.test.ts`
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\src\server\videoSubtitleGeneration.test.ts`
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add tests that verify:
|
||||
- Subtitle request parsing accepts `fileId`
|
||||
- Doubao `Responses API` request uses `file_id` and `fps`
|
||||
- Gemini rejects requests that provide only `fileId`
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `npm.cmd run test -- src/server/subtitleRequest.test.ts src/server/videoSubtitleGeneration.test.ts`
|
||||
Expected: FAIL because parsing and generation do not yet support `fileId`
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Extend server request parsing and video generation to accept `fileId`.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `npm.cmd run test -- src/server/subtitleRequest.test.ts src/server/videoSubtitleGeneration.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/server/subtitleRequest.test.ts src/server/videoSubtitleGeneration.test.ts src/server/subtitleRequest.ts src/server/videoSubtitleGeneration.ts
|
||||
git commit -m "feat: support doubao file id subtitle requests"
|
||||
```
|
||||
|
||||
### Task 3: Wire the backend route to accept `fileId` without a multipart video upload
|
||||
|
||||
**Files:**
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\server.ts`
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\src\server\subtitleGeneration.ts`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
If route-level coverage is already represented through unit seams, add or update a pipeline test that proves `fileId` can flow into subtitle generation without `videoPath`.
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `npm.cmd run test -- src/server/subtitleGeneration.test.ts`
|
||||
Expected: FAIL because the pipeline still assumes a local video path
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Allow subtitle generation to receive either:
|
||||
- `videoPath`
|
||||
- `fileId`
|
||||
|
||||
Require at least one, and keep backend cleanup safe when no uploaded file exists.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `npm.cmd run test -- src/server/subtitleGeneration.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add server.ts src/server/subtitleGeneration.ts src/server/subtitleGeneration.test.ts
|
||||
git commit -m "feat: accept file id subtitle generation requests"
|
||||
```
|
||||
|
||||
### Task 4: Verify editor behavior still works with provider switching
|
||||
|
||||
**Files:**
|
||||
- Modify: `E:\Downloads\ai-video-dubbing-&-translation\src\components\EditorScreen.test.tsx`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add or update coverage so the editor still calls subtitle generation correctly after the service signature change.
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `npm.cmd run test -- src/components/EditorScreen.test.tsx`
|
||||
Expected: FAIL because mocks or call signatures need updating
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
Adjust the editor or tests so the new service contract is reflected without changing the visible UX.
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `npm.cmd run test -- src/components/EditorScreen.test.tsx`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add src/components/EditorScreen.test.tsx src/components/EditorScreen.tsx
|
||||
git commit -m "test: align editor subtitle generation with file id flow"
|
||||
```
|
||||
|
||||
### Task 5: Run focused regression coverage
|
||||
|
||||
**Files:**
|
||||
- Test: `E:\Downloads\ai-video-dubbing-&-translation\src\services\subtitleService.test.ts`
|
||||
- Test: `E:\Downloads\ai-video-dubbing-&-translation\src\server\subtitleRequest.test.ts`
|
||||
- Test: `E:\Downloads\ai-video-dubbing-&-translation\src\server\subtitleGeneration.test.ts`
|
||||
- Test: `E:\Downloads\ai-video-dubbing-&-translation\src\server\videoSubtitleGeneration.test.ts`
|
||||
- Test: `E:\Downloads\ai-video-dubbing-&-translation\src\components\EditorScreen.test.tsx`
|
||||
|
||||
**Step 1: Run the focused suite**
|
||||
|
||||
Run: `npm.cmd run test -- src/services/subtitleService.test.ts src/server/subtitleRequest.test.ts src/server/subtitleGeneration.test.ts src/server/videoSubtitleGeneration.test.ts src/components/EditorScreen.test.tsx`
|
||||
Expected: PASS
|
||||
|
||||
**Step 2: Fix any regressions**
|
||||
|
||||
Make only the minimal changes required to keep Doubao and Gemini flows working.
|
||||
|
||||
**Step 3: Re-run the focused suite**
|
||||
|
||||
Run: `npm.cmd run test -- src/services/subtitleService.test.ts src/server/subtitleRequest.test.ts src/server/subtitleGeneration.test.ts src/server/videoSubtitleGeneration.test.ts src/components/EditorScreen.test.tsx`
|
||||
Expected: PASS
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add src/services/subtitleService.ts src/services/subtitleService.test.ts src/server/subtitleRequest.ts src/server/subtitleRequest.test.ts src/server/subtitleGeneration.ts src/server/subtitleGeneration.test.ts src/server/videoSubtitleGeneration.ts src/server/videoSubtitleGeneration.test.ts src/components/EditorScreen.test.tsx server.ts docs/plans/2026-03-19-doubao-file-id-frontend-design.md docs/plans/2026-03-19-doubao-file-id-frontend.md
|
||||
git commit -m "feat: use ark file ids for doubao subtitle generation"
|
||||
```
|
||||
58
server.ts
58
server.ts
@ -20,6 +20,7 @@ import {
|
||||
DEFAULT_EXPORT_TEXT_STYLES,
|
||||
shiftSubtitlesToExportTimeline,
|
||||
} from './src/server/exportVideo';
|
||||
import { formatLogContext, serializeError } from './src/server/errorLogging';
|
||||
import { TextStyles } from './src/types';
|
||||
|
||||
const upload = multer({
|
||||
@ -49,6 +50,7 @@ if (ffprobePath) {
|
||||
async function startServer() {
|
||||
const app = express();
|
||||
const PORT = 3000;
|
||||
const subtitleUpload = upload.single('video');
|
||||
|
||||
app.use(cors());
|
||||
app.use(express.json({ limit: '500mb' }));
|
||||
@ -191,26 +193,55 @@ async function startServer() {
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/generate-subtitles', upload.single('video'), async (req, res) => {
|
||||
const videoPath = req.file?.path;
|
||||
|
||||
try {
|
||||
if (!videoPath) {
|
||||
return res.status(400).json({ error: 'No video file provided' });
|
||||
app.post('/api/generate-subtitles', (req, res, next) => {
|
||||
if ((req.headers['content-type'] || '').includes('multipart/form-data')) {
|
||||
return subtitleUpload(req, res, next);
|
||||
}
|
||||
|
||||
const { provider, targetLanguage } = parseSubtitleRequest(req.body);
|
||||
next();
|
||||
}, async (req, res) => {
|
||||
const videoPath = req.file?.path;
|
||||
const requestId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const startedAt = Date.now();
|
||||
|
||||
try {
|
||||
const { provider, targetLanguage, fileId } = parseSubtitleRequest(req.body);
|
||||
if (!videoPath && !fileId) {
|
||||
return res.status(400).json({ error: 'No video file provided' });
|
||||
}
|
||||
console.info(
|
||||
`[subtitle] request started ${formatLogContext({
|
||||
requestId,
|
||||
provider,
|
||||
targetLanguage,
|
||||
fileName: req.file?.originalname,
|
||||
fileSize: req.file?.size,
|
||||
})}`,
|
||||
);
|
||||
|
||||
const result = await generateSubtitlePipeline({
|
||||
videoPath,
|
||||
fileId,
|
||||
provider,
|
||||
targetLanguage,
|
||||
env: process.env,
|
||||
requestId,
|
||||
});
|
||||
|
||||
console.info(
|
||||
`[subtitle] request succeeded ${formatLogContext({
|
||||
requestId,
|
||||
provider,
|
||||
targetLanguage,
|
||||
durationMs: Date.now() - startedAt,
|
||||
subtitleCount: result.subtitles.length,
|
||||
})}`,
|
||||
);
|
||||
|
||||
res.json({
|
||||
...result,
|
||||
provider,
|
||||
requestId,
|
||||
});
|
||||
} catch (error: any) {
|
||||
const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
|
||||
@ -228,8 +259,17 @@ async function startServer() {
|
||||
? 401
|
||||
: 502;
|
||||
|
||||
console.error('Subtitle Generation Error:', error);
|
||||
res.status(status).json({ error: message });
|
||||
console.error(
|
||||
`[subtitle] request failed ${formatLogContext({
|
||||
requestId,
|
||||
durationMs: Date.now() - startedAt,
|
||||
fileName: req.file?.originalname,
|
||||
fileSize: req.file?.size,
|
||||
status,
|
||||
})}`,
|
||||
serializeError(error),
|
||||
);
|
||||
res.status(status).json({ error: message, requestId });
|
||||
} finally {
|
||||
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
|
||||
}
|
||||
|
||||
@ -95,4 +95,21 @@ describe('EditorScreen', () => {
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
it('only auto-generates subtitles once in StrictMode', async () => {
|
||||
render(
|
||||
<React.StrictMode>
|
||||
<EditorScreen
|
||||
videoFile={new File(['video'], 'clip.mp4', { type: 'video/mp4' })}
|
||||
targetLanguage="en"
|
||||
trimRange={null}
|
||||
onBack={() => {}}
|
||||
/>
|
||||
</React.StrictMode>,
|
||||
);
|
||||
|
||||
await waitFor(() => expect(generateSubtitlePipelineMock).toHaveBeenCalled());
|
||||
|
||||
expect(generateSubtitlePipelineMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@ -39,6 +39,7 @@ export default function EditorScreen({ videoFile, targetLanguage, trimRange, onB
|
||||
const [videoAspectRatio, setVideoAspectRatio] = useState<number>(16/9);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [renderedVideoWidth, setRenderedVideoWidth] = useState<number | '100%'>('100%');
|
||||
const autoGenerationKeyRef = useRef<string | null>(null);
|
||||
|
||||
// Timeline Dragging State
|
||||
const [draggingId, setDraggingId] = useState<string | null>(null);
|
||||
@ -137,8 +138,21 @@ export default function EditorScreen({ videoFile, targetLanguage, trimRange, onB
|
||||
|
||||
// Generate subtitles on mount
|
||||
useEffect(() => {
|
||||
const autoGenerationKey = JSON.stringify({
|
||||
fileName: videoFile?.name || '',
|
||||
fileSize: videoFile?.size || 0,
|
||||
targetLanguage,
|
||||
trimRange,
|
||||
llmProvider,
|
||||
});
|
||||
|
||||
if (autoGenerationKeyRef.current === autoGenerationKey) {
|
||||
return;
|
||||
}
|
||||
|
||||
autoGenerationKeyRef.current = autoGenerationKey;
|
||||
fetchSubtitles();
|
||||
}, [fetchSubtitles]);
|
||||
}, [fetchSubtitles, videoFile, targetLanguage, trimRange, llmProvider]);
|
||||
|
||||
const [textStyles, setTextStyles] = useState<TextStyles>({
|
||||
fontFamily: 'MiSans-Late',
|
||||
|
||||
36
src/server/errorLogging.test.ts
Normal file
36
src/server/errorLogging.test.ts
Normal file
@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { formatLogContext, serializeError } from './errorLogging';
|
||||
|
||||
describe('errorLogging', () => {
|
||||
it('serializes nested causes and error codes', () => {
|
||||
const cause = new Error('Headers Timeout Error');
|
||||
const error = new TypeError('fetch failed', { cause }) as TypeError & {
|
||||
code?: string;
|
||||
};
|
||||
error.code = 'UND_ERR_HEADERS_TIMEOUT';
|
||||
|
||||
expect(serializeError(error)).toEqual(
|
||||
expect.objectContaining({
|
||||
message: 'fetch failed',
|
||||
name: 'TypeError',
|
||||
code: 'UND_ERR_HEADERS_TIMEOUT',
|
||||
cause: {
|
||||
message: 'Headers Timeout Error',
|
||||
name: 'Error',
|
||||
code: undefined,
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('formats context as stable key-value pairs', () => {
|
||||
expect(
|
||||
formatLogContext({
|
||||
requestId: 'req-1',
|
||||
provider: 'doubao',
|
||||
durationMs: 1234,
|
||||
ignored: undefined,
|
||||
}),
|
||||
).toBe('requestId=req-1 provider=doubao durationMs=1234');
|
||||
});
|
||||
});
|
||||
51
src/server/errorLogging.ts
Normal file
51
src/server/errorLogging.ts
Normal file
@ -0,0 +1,51 @@
|
||||
export interface ErrorLogDetails {
|
||||
message: string;
|
||||
stack?: string;
|
||||
cause?: unknown;
|
||||
code?: string;
|
||||
name?: string;
|
||||
}
|
||||
|
||||
export const serializeError = (error: unknown): ErrorLogDetails => {
|
||||
if (error instanceof Error) {
|
||||
const details: ErrorLogDetails = {
|
||||
message: error.message,
|
||||
name: error.name,
|
||||
stack: error.stack,
|
||||
};
|
||||
|
||||
const errorWithCode = error as Error & { code?: string; cause?: unknown };
|
||||
if (errorWithCode.code) {
|
||||
details.code = errorWithCode.code;
|
||||
}
|
||||
|
||||
if (errorWithCode.cause !== undefined) {
|
||||
details.cause = serializeCause(errorWithCode.cause);
|
||||
}
|
||||
|
||||
return details;
|
||||
}
|
||||
|
||||
return {
|
||||
message: typeof error === 'string' ? error : JSON.stringify(error),
|
||||
};
|
||||
};
|
||||
|
||||
export const formatLogContext = (context: Record<string, unknown>) =>
|
||||
Object.entries(context)
|
||||
.filter(([, value]) => value !== undefined)
|
||||
.map(([key, value]) => `${key}=${String(value)}`)
|
||||
.join(' ');
|
||||
|
||||
const serializeCause = (cause: unknown): unknown => {
|
||||
if (cause instanceof Error) {
|
||||
const nested = serializeError(cause);
|
||||
return {
|
||||
message: nested.message,
|
||||
name: nested.name,
|
||||
code: nested.code,
|
||||
};
|
||||
}
|
||||
|
||||
return cause;
|
||||
};
|
||||
@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
DEFAULT_DOUBAO_MODEL,
|
||||
DEFAULT_DOUBAO_TIMEOUT_MS,
|
||||
DEFAULT_LLM_PROVIDER,
|
||||
normalizeLlmProvider,
|
||||
resolveLlmProviderConfig,
|
||||
@ -30,6 +31,22 @@ describe('llmProvider', () => {
|
||||
apiKey: 'ark-key',
|
||||
model: DEFAULT_DOUBAO_MODEL,
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: DEFAULT_DOUBAO_TIMEOUT_MS,
|
||||
});
|
||||
});
|
||||
|
||||
it('reads a custom doubao timeout from env', () => {
|
||||
expect(
|
||||
resolveLlmProviderConfig('doubao', {
|
||||
ARK_API_KEY: 'ark-key',
|
||||
DOUBAO_TIMEOUT_MS: '600000',
|
||||
}),
|
||||
).toEqual({
|
||||
provider: 'doubao',
|
||||
apiKey: 'ark-key',
|
||||
model: DEFAULT_DOUBAO_MODEL,
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: 600000,
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ export const DEFAULT_LLM_PROVIDER = 'doubao';
|
||||
export const DEFAULT_DOUBAO_MODEL = 'doubao-seed-2-0-pro-260215';
|
||||
export const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash';
|
||||
export const DEFAULT_DOUBAO_RESPONSES_URL = 'https://ark.cn-beijing.volces.com/api/v3/responses';
|
||||
export const DEFAULT_DOUBAO_TIMEOUT_MS = 600000;
|
||||
|
||||
export type LlmProvider = 'doubao' | 'gemini';
|
||||
|
||||
@ -10,6 +11,7 @@ export interface DoubaoProviderConfig {
|
||||
apiKey: string;
|
||||
model: string;
|
||||
baseUrl: string;
|
||||
timeoutMs: number;
|
||||
}
|
||||
|
||||
export interface GeminiProviderConfig {
|
||||
@ -20,6 +22,15 @@ export interface GeminiProviderConfig {
|
||||
|
||||
export type LlmProviderConfig = DoubaoProviderConfig | GeminiProviderConfig;
|
||||
|
||||
const resolveDoubaoTimeoutMs = (value?: string) => {
|
||||
const parsed = Number(value?.trim());
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return DEFAULT_DOUBAO_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
return Math.floor(parsed);
|
||||
};
|
||||
|
||||
export const normalizeLlmProvider = (value?: string | null): LlmProvider => {
|
||||
if (!value) {
|
||||
return DEFAULT_LLM_PROVIDER;
|
||||
@ -48,6 +59,7 @@ export const resolveLlmProviderConfig = (
|
||||
apiKey,
|
||||
model: env.DOUBAO_MODEL?.trim() || DEFAULT_DOUBAO_MODEL,
|
||||
baseUrl: (env.DOUBAO_BASE_URL?.trim() || DEFAULT_DOUBAO_RESPONSES_URL).replace(/\/+$/, ''),
|
||||
timeoutMs: resolveDoubaoTimeoutMs(env.DOUBAO_TIMEOUT_MS),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ describe('createSentenceTranslator', () => {
|
||||
apiKey: 'ark-key',
|
||||
model: 'doubao-seed-2-0-pro-260215',
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: 600000,
|
||||
});
|
||||
|
||||
expect(translator).toBe('doubao-translator');
|
||||
|
||||
@ -66,6 +66,7 @@ describe('generateSubtitlePipeline', () => {
|
||||
apiKey: 'ark-key',
|
||||
model: 'doubao-seed-2-0-pro-260215',
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: 600000,
|
||||
},
|
||||
}),
|
||||
);
|
||||
@ -89,6 +90,7 @@ describe('generateSubtitlePipeline', () => {
|
||||
ARK_API_KEY: 'ark-key',
|
||||
},
|
||||
fetchImpl,
|
||||
requestId: 'req-123',
|
||||
deps: {
|
||||
generateSubtitlesFromVideo,
|
||||
},
|
||||
@ -97,6 +99,36 @@ describe('generateSubtitlePipeline', () => {
|
||||
expect(generateSubtitlesFromVideo).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fetchImpl,
|
||||
requestId: 'req-123',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('passes file id through to video subtitle generation', async () => {
|
||||
const subtitleResult: SubtitlePipelineResult = {
|
||||
subtitles: [],
|
||||
speakers: [],
|
||||
quality: 'fallback',
|
||||
targetLanguage: 'English',
|
||||
};
|
||||
const generateSubtitlesFromVideo = vi.fn(async () => subtitleResult);
|
||||
|
||||
await generateSubtitlePipeline({
|
||||
fileId: 'file-123',
|
||||
targetLanguage: 'English',
|
||||
provider: 'doubao',
|
||||
env: {
|
||||
ARK_API_KEY: 'ark-key',
|
||||
},
|
||||
deps: {
|
||||
generateSubtitlesFromVideo,
|
||||
},
|
||||
});
|
||||
|
||||
expect(generateSubtitlesFromVideo).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fileId: 'file-123',
|
||||
videoPath: undefined,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@ -3,11 +3,13 @@ import { resolveLlmProviderConfig, normalizeLlmProvider } from './llmProvider';
|
||||
import { generateSubtitlesFromVideo as defaultGenerateSubtitlesFromVideo } from './videoSubtitleGeneration';
|
||||
|
||||
export interface GenerateSubtitlePipelineOptions {
|
||||
videoPath: string;
|
||||
videoPath?: string;
|
||||
fileId?: string;
|
||||
targetLanguage: string;
|
||||
provider?: string | null;
|
||||
env: NodeJS.ProcessEnv;
|
||||
fetchImpl?: typeof fetch;
|
||||
requestId?: string;
|
||||
deps?: {
|
||||
generateSubtitlesFromVideo?: typeof defaultGenerateSubtitlesFromVideo;
|
||||
};
|
||||
@ -15,12 +17,18 @@ export interface GenerateSubtitlePipelineOptions {
|
||||
|
||||
export const generateSubtitlePipeline = async ({
|
||||
videoPath,
|
||||
fileId,
|
||||
targetLanguage,
|
||||
provider,
|
||||
env,
|
||||
fetchImpl,
|
||||
requestId,
|
||||
deps,
|
||||
}: GenerateSubtitlePipelineOptions) => {
|
||||
if (!videoPath && !fileId) {
|
||||
throw new Error('A video upload or fileId is required.');
|
||||
}
|
||||
|
||||
const audioPipelineConfig = resolveAudioPipelineConfig(env);
|
||||
const selectedProvider = provider
|
||||
? normalizeLlmProvider(provider)
|
||||
@ -32,7 +40,9 @@ export const generateSubtitlePipeline = async ({
|
||||
return generateSubtitlesFromVideo({
|
||||
providerConfig,
|
||||
videoPath,
|
||||
fileId,
|
||||
targetLanguage,
|
||||
requestId,
|
||||
...(fetchImpl ? { fetchImpl } : {}),
|
||||
});
|
||||
};
|
||||
|
||||
@ -26,4 +26,17 @@ describe('parseSubtitleRequest', () => {
|
||||
/target language/i,
|
||||
);
|
||||
});
|
||||
|
||||
it('preserves a file id when provided', () => {
|
||||
expect(
|
||||
parseSubtitleRequest({
|
||||
targetLanguage: 'English',
|
||||
fileId: 'file-123',
|
||||
}),
|
||||
).toEqual({
|
||||
provider: 'doubao',
|
||||
targetLanguage: 'English',
|
||||
fileId: 'file-123',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -3,11 +3,13 @@ import { LlmProvider, normalizeLlmProvider } from './llmProvider';
|
||||
export interface SubtitleRequestBody {
|
||||
provider?: string | null;
|
||||
targetLanguage?: string | null;
|
||||
fileId?: string | null;
|
||||
}
|
||||
|
||||
export interface ParsedSubtitleRequest {
|
||||
provider: LlmProvider;
|
||||
targetLanguage: string;
|
||||
fileId?: string;
|
||||
}
|
||||
|
||||
export const parseSubtitleRequest = (
|
||||
@ -21,5 +23,6 @@ export const parseSubtitleRequest = (
|
||||
return {
|
||||
provider: normalizeLlmProvider(body.provider),
|
||||
targetLanguage,
|
||||
...(body.fileId?.trim() ? { fileId: body.fileId.trim() } : {}),
|
||||
};
|
||||
};
|
||||
|
||||
126
src/server/videoSubtitleGeneration.test.ts
Normal file
126
src/server/videoSubtitleGeneration.test.ts
Normal file
@ -0,0 +1,126 @@
|
||||
import fs from 'fs';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import { generateSubtitlesFromVideo } from './videoSubtitleGeneration';
|
||||
|
||||
describe('generateSubtitlesFromVideo', () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('passes the configured doubao timeout to fetch', async () => {
|
||||
vi.spyOn(fs, 'readFileSync').mockReturnValue(Buffer.from('video-bytes'));
|
||||
const fetchImpl = vi.fn<typeof fetch>(async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
output: [
|
||||
{
|
||||
content: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
sourceLanguage: 'zh',
|
||||
subtitles: [
|
||||
{
|
||||
originalText: '你好',
|
||||
translatedText: 'Hello',
|
||||
startTime: 0,
|
||||
endTime: 1,
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
await generateSubtitlesFromVideo({
|
||||
providerConfig: {
|
||||
provider: 'doubao',
|
||||
apiKey: 'ark-key',
|
||||
model: 'doubao-seed-2-0-pro-260215',
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: 600000,
|
||||
},
|
||||
videoPath: 'clip.mp4',
|
||||
targetLanguage: 'English',
|
||||
fetchImpl,
|
||||
});
|
||||
|
||||
expect(fetchImpl).toHaveBeenCalledWith(
|
||||
'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
expect.objectContaining({
|
||||
method: 'POST',
|
||||
signal: expect.any(AbortSignal),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('uses ark file ids for doubao requests when available', async () => {
|
||||
const fetchImpl = vi.fn<typeof fetch>(async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
output: [
|
||||
{
|
||||
content: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
sourceLanguage: 'zh',
|
||||
subtitles: [
|
||||
{
|
||||
originalText: '你好',
|
||||
translatedText: 'Hello',
|
||||
startTime: 0,
|
||||
endTime: 1,
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
await generateSubtitlesFromVideo({
|
||||
providerConfig: {
|
||||
provider: 'doubao',
|
||||
apiKey: 'ark-key',
|
||||
model: 'doubao-seed-2-0-pro-260215',
|
||||
baseUrl: 'https://ark.cn-beijing.volces.com/api/v3/responses',
|
||||
timeoutMs: 600000,
|
||||
},
|
||||
fileId: 'file-123',
|
||||
targetLanguage: 'English',
|
||||
fetchImpl,
|
||||
});
|
||||
|
||||
const [, request] = fetchImpl.mock.calls[0] as [string, RequestInit];
|
||||
const payload = JSON.parse(String(request.body));
|
||||
|
||||
expect(payload.input[0].role).toBe('system');
|
||||
expect(payload.input[0].content[0].type).toBe('input_text');
|
||||
expect(payload.input[0].content[0].text).toContain('# Role');
|
||||
expect(payload.input[0].content[0].text).toContain('Voice Selection');
|
||||
|
||||
expect(payload.input[1].role).toBe('user');
|
||||
expect(payload.input[1].content[0]).toEqual({
|
||||
type: 'input_video',
|
||||
file_id: 'file-123',
|
||||
});
|
||||
expect(payload.input[1].content[1].type).toBe('input_text');
|
||||
expect(payload.input[1].content[1].text).toContain('Target language: English');
|
||||
expect(payload.input[1].content[1].text).toContain('Available voices');
|
||||
expect(payload.input[1].content[1].text).toContain('Sweet_Girl');
|
||||
});
|
||||
});
|
||||
@ -1,6 +1,8 @@
|
||||
import fs from 'fs';
|
||||
import { GoogleGenAI } from '@google/genai';
|
||||
import { SubtitlePipelineResult } from '../types';
|
||||
import { MINIMAX_VOICES } from '../voices';
|
||||
import { formatLogContext, serializeError } from './errorLogging';
|
||||
import { DoubaoProviderConfig, GeminiProviderConfig, LlmProviderConfig } from './llmProvider';
|
||||
|
||||
interface RawModelSubtitle {
|
||||
@ -19,13 +21,7 @@ interface RawModelResponse {
|
||||
}
|
||||
|
||||
const DEFAULT_VOICE_ID = 'male-qn-qingse';
|
||||
const SUPPORTED_VOICE_IDS = new Set([
|
||||
DEFAULT_VOICE_ID,
|
||||
'female-shaonv',
|
||||
'female-yujie',
|
||||
'male-qn-jingying',
|
||||
'male-qn-badao',
|
||||
]);
|
||||
const SUPPORTED_VOICE_IDS = new Set(MINIMAX_VOICES.map((voice) => voice.id));
|
||||
|
||||
const stripJsonFences = (text: string) => text.replace(/```json\n?|\n?```/g, '').trim();
|
||||
|
||||
@ -60,10 +56,85 @@ const sanitizeVoiceId = (value: unknown) => {
|
||||
return SUPPORTED_VOICE_IDS.has(value) ? value : DEFAULT_VOICE_ID;
|
||||
};
|
||||
|
||||
const createPrompt = (targetLanguage: string) => `You are a subtitle localization engine.
|
||||
Analyze the input video and output STRICT JSON only.
|
||||
const LANGUAGE_ALIASES: Record<string, string> = {
|
||||
zh: 'zh',
|
||||
chinese: 'zh',
|
||||
mandarin: 'zh',
|
||||
'chinese mandarin': 'zh',
|
||||
english: 'en',
|
||||
en: 'en',
|
||||
french: 'fr',
|
||||
fr: 'fr',
|
||||
indonesian: 'id',
|
||||
id: 'id',
|
||||
german: 'de',
|
||||
de: 'de',
|
||||
filipino: 'fil',
|
||||
fil: 'fil',
|
||||
cantonese: 'yue',
|
||||
yue: 'yue',
|
||||
};
|
||||
|
||||
Return an object:
|
||||
const normalizeTargetLanguageCode = (targetLanguage: string) =>
|
||||
LANGUAGE_ALIASES[targetLanguage.trim().toLowerCase()] || targetLanguage.trim().toLowerCase();
|
||||
|
||||
const formatVoiceCatalogForPrompt = (targetLanguage: string) => {
|
||||
const languageCode = normalizeTargetLanguageCode(targetLanguage);
|
||||
const matchingVoices = MINIMAX_VOICES.filter((voice) => voice.language === languageCode);
|
||||
const voices = matchingVoices.length > 0 ? matchingVoices : MINIMAX_VOICES;
|
||||
|
||||
return voices
|
||||
.map(
|
||||
(voice) =>
|
||||
`- voiceId: ${voice.id} | gender: ${voice.gender} | language: ${voice.language} | tag: ${voice.tag} | name: ${voice.name}`,
|
||||
)
|
||||
.join('\n');
|
||||
};
|
||||
|
||||
const createSystemPrompt = () => `# Role
|
||||
You are a senior film and TV subtitle expert and an advanced localization translator.
|
||||
You deeply understand screen reading experience.
|
||||
Subtitles must be short, easy to read, precisely timed to the visuals and speech rhythm, and must never cause viewer reading fatigue.
|
||||
|
||||
# Task
|
||||
Listen to and watch the user-provided audio or video.
|
||||
Transcribe the spoken content and translate it into the target language specified by the user.
|
||||
Extract highly accurate start and end timestamps, speaker labels, and speaker gender.
|
||||
Select the most suitable voiceId for each subtitle item by matching the speaker's gender, tone, style, and delivery to the voice options provided by the user.
|
||||
Return the result strictly in the required JSON format.
|
||||
|
||||
# Constraints
|
||||
1. Strict Subtitle Splitting:
|
||||
Absolutely do not accumulate or merge long sentences into oversized subtitle lines.
|
||||
You must split subtitles according to the speaker's actual breathing, pauses, commas, short hesitations, and natural phrasing.
|
||||
|
||||
2. Screen-Friendly Length:
|
||||
Each subtitle item must be short.
|
||||
Chinese text should ideally stay within 15 to 20 characters.
|
||||
English text should ideally stay within 7 to 10 words.
|
||||
If a sentence is too long, you must split it into multiple subtitle objects with consecutive timestamps.
|
||||
|
||||
3. Highly Precise Timestamps:
|
||||
Timestamps must align closely with the actual speech.
|
||||
Use floating-point seconds.
|
||||
The duration of a single subtitle item should usually not exceed 3 to 5 seconds.
|
||||
|
||||
4. Speaker and Gender:
|
||||
Accurately identify the speaker label and speaker gender.
|
||||
Gender must be either "male" or "female".
|
||||
|
||||
5. Voice Selection:
|
||||
The user will provide the target language and a list of available voices.
|
||||
Each voice includes a voiceId and descriptive metadata.
|
||||
You must analyze the user-provided voice list and choose the best matching voiceId for each subtitle item.
|
||||
Only return a voiceId that exists in the user-provided voice list.
|
||||
Do not invent new voiceId values.
|
||||
|
||||
6. Output Format:
|
||||
Return only valid JSON.
|
||||
Do not output markdown, code fences, explanations, or any extra text.
|
||||
|
||||
Return an object with this exact structure:
|
||||
{
|
||||
"sourceLanguage": "detected language code",
|
||||
"subtitles": [
|
||||
@ -72,19 +143,29 @@ Return an object:
|
||||
"startTime": 0.0,
|
||||
"endTime": 1.2,
|
||||
"originalText": "source dialogue",
|
||||
"translatedText": "translated dialogue in ${targetLanguage}",
|
||||
"translatedText": "translated dialogue in the target language",
|
||||
"speaker": "short speaker label",
|
||||
"voiceId": "one of: male-qn-qingse, female-shaonv, female-yujie, male-qn-jingying, male-qn-badao"
|
||||
"gender": "male or female",
|
||||
"voiceId": "one of the user-provided voice ids"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
1. Use video timeline seconds for startTime/endTime.
|
||||
Additional rules:
|
||||
1. Use video timeline seconds for startTime and endTime.
|
||||
2. Keep subtitles chronological and non-overlapping.
|
||||
3. Do not invent dialogue if not audible.
|
||||
4. translatedText must be in ${targetLanguage}.
|
||||
5. Do not include markdown. JSON only.`;
|
||||
3. Do not invent dialogue if it is not actually audible.
|
||||
4. Preserve meaning naturally while keeping subtitle lines short and readable.
|
||||
5. If a long utterance must be split, preserve continuity across consecutive subtitle items.
|
||||
6. Output JSON only.`;
|
||||
|
||||
const createUserPrompt = (targetLanguage: string) => `Target language: ${targetLanguage}
|
||||
|
||||
Available voices:
|
||||
${formatVoiceCatalogForPrompt(targetLanguage)}
|
||||
|
||||
Please watch and listen to the provided video.
|
||||
Transcribe the dialogue, translate it into ${targetLanguage}, and assign the best matching voiceId from the available voices for each subtitle item.`;
|
||||
|
||||
const normalizeSubtitles = (raw: RawModelSubtitle[]) => {
|
||||
let lastEnd = 0;
|
||||
@ -135,16 +216,33 @@ const extractDoubaoTextOutput = (payload: any): string => {
|
||||
const generateWithDoubao = async ({
|
||||
config,
|
||||
videoDataUrl,
|
||||
fileId,
|
||||
targetLanguage,
|
||||
fetchImpl = fetch,
|
||||
requestId,
|
||||
}: {
|
||||
config: DoubaoProviderConfig;
|
||||
videoDataUrl: string;
|
||||
videoDataUrl?: string;
|
||||
fileId?: string;
|
||||
targetLanguage: string;
|
||||
fetchImpl?: typeof fetch;
|
||||
requestId?: string;
|
||||
}) => {
|
||||
const response = await fetchImpl(config.baseUrl, {
|
||||
const startedAt = Date.now();
|
||||
const logContext = formatLogContext({
|
||||
requestId,
|
||||
provider: 'doubao',
|
||||
timeoutMs: config.timeoutMs,
|
||||
targetLanguage,
|
||||
});
|
||||
|
||||
console.info(`[subtitle] doubao request started ${logContext}`);
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchImpl(config.baseUrl, {
|
||||
method: 'POST',
|
||||
signal: AbortSignal.timeout(config.timeoutMs),
|
||||
headers: {
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
@ -152,23 +250,57 @@ const generateWithDoubao = async ({
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
input: [
|
||||
{
|
||||
role: 'system',
|
||||
content: [
|
||||
{ type: 'input_text', text: createSystemPrompt() },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'input_video', video_url: videoDataUrl },
|
||||
{ type: 'input_text', text: createPrompt(targetLanguage) },
|
||||
fileId
|
||||
? { type: 'input_video', file_id: fileId }
|
||||
: { type: 'input_video', video_url: videoDataUrl },
|
||||
{ type: 'input_text', text: createUserPrompt(targetLanguage) },
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(
|
||||
`[subtitle] doubao request failed ${formatLogContext({
|
||||
requestId,
|
||||
provider: 'doubao',
|
||||
timeoutMs: config.timeoutMs,
|
||||
durationMs: Date.now() - startedAt,
|
||||
})}`,
|
||||
serializeError(error),
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const payload = await response.text();
|
||||
console.error(
|
||||
`[subtitle] doubao request returned non-200 ${formatLogContext({
|
||||
requestId,
|
||||
status: response.status,
|
||||
durationMs: Date.now() - startedAt,
|
||||
})}`,
|
||||
payload,
|
||||
);
|
||||
throw new Error(`Doubao subtitle request failed (${response.status}): ${payload}`);
|
||||
}
|
||||
|
||||
const payload = await response.json();
|
||||
console.info(
|
||||
`[subtitle] doubao request finished ${formatLogContext({
|
||||
requestId,
|
||||
durationMs: Date.now() - startedAt,
|
||||
})}`,
|
||||
);
|
||||
const text = extractDoubaoTextOutput(payload);
|
||||
return extractJson(text);
|
||||
};
|
||||
@ -195,7 +327,7 @@ const generateWithGemini = async ({
|
||||
data: videoBase64,
|
||||
},
|
||||
},
|
||||
{ text: createPrompt(targetLanguage) },
|
||||
{ text: `${createSystemPrompt()}\n\n${createUserPrompt(targetLanguage)}` },
|
||||
],
|
||||
},
|
||||
],
|
||||
@ -207,29 +339,39 @@ const generateWithGemini = async ({
|
||||
export const generateSubtitlesFromVideo = async ({
|
||||
providerConfig,
|
||||
videoPath,
|
||||
fileId,
|
||||
targetLanguage,
|
||||
fetchImpl = fetch,
|
||||
requestId,
|
||||
}: {
|
||||
providerConfig: LlmProviderConfig;
|
||||
videoPath: string;
|
||||
videoPath?: string;
|
||||
fileId?: string;
|
||||
targetLanguage: string;
|
||||
fetchImpl?: typeof fetch;
|
||||
requestId?: string;
|
||||
}): Promise<SubtitlePipelineResult> => {
|
||||
const videoBuffer = fs.readFileSync(videoPath);
|
||||
const videoBase64 = videoBuffer.toString('base64');
|
||||
const videoDataUrl = `data:video/mp4;base64,${videoBase64}`;
|
||||
if (providerConfig.provider === 'gemini' && !videoPath) {
|
||||
throw new Error('Gemini subtitle generation requires an uploaded video file.');
|
||||
}
|
||||
|
||||
const videoBuffer = videoPath ? fs.readFileSync(videoPath) : null;
|
||||
const videoBase64 = videoBuffer?.toString('base64');
|
||||
const videoDataUrl = videoBase64 ? `data:video/mp4;base64,${videoBase64}` : undefined;
|
||||
|
||||
const raw =
|
||||
providerConfig.provider === 'doubao'
|
||||
? await generateWithDoubao({
|
||||
config: providerConfig,
|
||||
videoDataUrl,
|
||||
fileId,
|
||||
targetLanguage,
|
||||
fetchImpl,
|
||||
requestId,
|
||||
})
|
||||
: await generateWithGemini({
|
||||
config: providerConfig,
|
||||
videoBase64,
|
||||
videoBase64: videoBase64!,
|
||||
targetLanguage,
|
||||
});
|
||||
|
||||
|
||||
@ -1,10 +1,19 @@
|
||||
// @vitest-environment jsdom
|
||||
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { generateSubtitlePipeline } from './subtitleService';
|
||||
|
||||
describe('generateSubtitlePipeline', () => {
|
||||
it('posts the selected provider to the server', async () => {
|
||||
beforeEach(() => {
|
||||
vi.stubEnv('VITE_ARK_API_KEY', 'ark-key');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllEnvs();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('posts the selected provider to the server for gemini', async () => {
|
||||
const fetchMock = vi.fn(async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
@ -24,7 +33,7 @@ describe('generateSubtitlePipeline', () => {
|
||||
await generateSubtitlePipeline(
|
||||
new File(['video'], 'clip.mp4', { type: 'video/mp4' }),
|
||||
'English',
|
||||
'doubao',
|
||||
'gemini',
|
||||
null,
|
||||
fetchMock as unknown as typeof fetch,
|
||||
);
|
||||
@ -40,6 +49,210 @@ describe('generateSubtitlePipeline', () => {
|
||||
const [, requestInit] = fetchMock.mock.calls[0] as unknown as [string, RequestInit];
|
||||
const formData = requestInit.body as FormData;
|
||||
expect(formData.get('targetLanguage')).toBe('English');
|
||||
expect(formData.get('provider')).toBe('doubao');
|
||||
expect(formData.get('provider')).toBe('gemini');
|
||||
});
|
||||
|
||||
it('uploads doubao videos to ark files before requesting subtitles', async () => {
|
||||
vi.useFakeTimers();
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
id: 'file-123',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
id: 'file-123',
|
||||
status: 'processing',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
id: 'file-123',
|
||||
status: 'active',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
subtitles: [],
|
||||
speakers: [],
|
||||
quality: 'fallback',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
const promise = generateSubtitlePipeline(
|
||||
new File(['video'], 'clip.mp4', { type: 'video/mp4' }),
|
||||
'English',
|
||||
'doubao',
|
||||
null,
|
||||
fetchMock as unknown as typeof fetch,
|
||||
);
|
||||
await vi.runAllTimersAsync();
|
||||
await promise;
|
||||
|
||||
expect(fetchMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
'https://ark.cn-beijing.volces.com/api/v3/files',
|
||||
expect.objectContaining({
|
||||
method: 'POST',
|
||||
body: expect.any(FormData),
|
||||
}),
|
||||
);
|
||||
|
||||
expect(fetchMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'https://ark.cn-beijing.volces.com/api/v3/files/file-123',
|
||||
expect.objectContaining({
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Authorization: 'Bearer ark-key',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
expect(fetchMock).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
'https://ark.cn-beijing.volces.com/api/v3/files/file-123',
|
||||
expect.objectContaining({
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Authorization: 'Bearer ark-key',
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const [, subtitleRequest] = fetchMock.mock.calls[3] as unknown as [string, RequestInit];
|
||||
const subtitleBody = JSON.parse(String(subtitleRequest.body));
|
||||
|
||||
expect(fetchMock).toHaveBeenNthCalledWith(
|
||||
4,
|
||||
'/api/generate-subtitles',
|
||||
expect.objectContaining({
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
}),
|
||||
);
|
||||
expect(subtitleBody).toEqual({
|
||||
fileId: 'file-123',
|
||||
provider: 'doubao',
|
||||
targetLanguage: 'English',
|
||||
});
|
||||
});
|
||||
|
||||
it('stops when ark reports file preprocessing failure', async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
id: 'file-123',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
id: 'file-123',
|
||||
status: 'failed',
|
||||
error: {
|
||||
message: 'video preprocess failed',
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
await expect(
|
||||
generateSubtitlePipeline(
|
||||
new File(['video'], 'clip.mp4', { type: 'video/mp4' }),
|
||||
'English',
|
||||
'doubao',
|
||||
null,
|
||||
fetchMock as unknown as typeof fetch,
|
||||
),
|
||||
).rejects.toThrow(/video preprocess failed/i);
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('keeps multipart uploads for gemini requests', async () => {
|
||||
const fetchMock = vi.fn(async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
subtitles: [],
|
||||
speakers: [],
|
||||
quality: 'fallback',
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
await generateSubtitlePipeline(
|
||||
new File(['video'], 'clip.mp4', { type: 'video/mp4' }),
|
||||
'English',
|
||||
'gemini',
|
||||
null,
|
||||
fetchMock as unknown as typeof fetch,
|
||||
);
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
'/api/generate-subtitles',
|
||||
expect.objectContaining({
|
||||
method: 'POST',
|
||||
body: expect.any(FormData),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@ -5,6 +5,10 @@ type JsonResponseResult<T> =
|
||||
| { ok: true; status: number; data: T }
|
||||
| { ok: false; status: number; error: string };
|
||||
|
||||
const ARK_FILES_URL = 'https://ark.cn-beijing.volces.com/api/v3/files';
|
||||
const ARK_FILE_STATUS_POLL_INTERVAL_MS = 1000;
|
||||
const ARK_FILE_STATUS_TIMEOUT_MS = 120000;
|
||||
|
||||
const normalizePipelineQuality = (value: unknown): PipelineQuality => {
|
||||
if (value === 'full' || value === 'partial' || value === 'fallback') {
|
||||
return value;
|
||||
@ -38,6 +42,90 @@ const readJsonResponseOnce = async <T>(resp: Response): Promise<JsonResponseResu
|
||||
};
|
||||
};
|
||||
|
||||
const uploadDoubaoVideoFile = async (
|
||||
videoFile: File,
|
||||
fetchImpl: typeof fetch,
|
||||
): Promise<{ fileId: string; apiKey: string }> => {
|
||||
const apiKey = import.meta.env.VITE_ARK_API_KEY?.trim();
|
||||
if (!apiKey) {
|
||||
throw new Error('VITE_ARK_API_KEY is required for frontend Doubao file uploads.');
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('purpose', 'user_data');
|
||||
formData.append('file', videoFile);
|
||||
formData.append('preprocess_configs[video][fps]', '1');
|
||||
|
||||
const resp = await fetchImpl(ARK_FILES_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: formData,
|
||||
});
|
||||
|
||||
const parsed = await readJsonResponseOnce<{ id?: string }>(resp);
|
||||
if (parsed.ok === false) {
|
||||
throw new Error(parsed.error);
|
||||
}
|
||||
|
||||
const fileId = parsed.data.id?.trim();
|
||||
if (!fileId) {
|
||||
throw new Error('Ark Files API did not return a file id.');
|
||||
}
|
||||
|
||||
return { fileId, apiKey };
|
||||
};
|
||||
|
||||
const sleep = (durationMs: number) =>
|
||||
new Promise((resolve) => {
|
||||
setTimeout(resolve, durationMs);
|
||||
});
|
||||
|
||||
const waitForArkFileToBecomeActive = async (
|
||||
fileId: string,
|
||||
apiKey: string,
|
||||
fetchImpl: typeof fetch,
|
||||
): Promise<void> => {
|
||||
const deadline = Date.now() + ARK_FILE_STATUS_TIMEOUT_MS;
|
||||
|
||||
while (true) {
|
||||
const resp = await fetchImpl(`${ARK_FILES_URL}/${fileId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
const parsed = await readJsonResponseOnce<{
|
||||
status?: string;
|
||||
error?: { message?: string } | string;
|
||||
}>(resp);
|
||||
if (parsed.ok === false) {
|
||||
throw new Error(parsed.error);
|
||||
}
|
||||
|
||||
const status = parsed.data.status?.trim().toLowerCase();
|
||||
if (status === 'active') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (status === 'failed') {
|
||||
const errorMessage =
|
||||
typeof parsed.data.error === 'string'
|
||||
? parsed.data.error
|
||||
: parsed.data.error?.message || 'Ark file preprocessing failed.';
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
if (Date.now() >= deadline) {
|
||||
throw new Error('Timed out while waiting for Ark file preprocessing to complete.');
|
||||
}
|
||||
|
||||
await sleep(ARK_FILE_STATUS_POLL_INTERVAL_MS);
|
||||
}
|
||||
};
|
||||
|
||||
export const generateSubtitlePipeline = async (
|
||||
videoFile: File,
|
||||
targetLanguage: string,
|
||||
@ -49,6 +137,41 @@ export const generateSubtitlePipeline = async (
|
||||
throw new Error('Target language is required.');
|
||||
}
|
||||
|
||||
if (provider === 'doubao') {
|
||||
const { fileId, apiKey } = await uploadDoubaoVideoFile(videoFile, fetchImpl);
|
||||
await waitForArkFileToBecomeActive(fileId, apiKey, fetchImpl);
|
||||
const resp = await fetchImpl(apiUrl('/generate-subtitles'), {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
fileId,
|
||||
targetLanguage,
|
||||
provider,
|
||||
...(trimRange ? { trimRange } : {}),
|
||||
}),
|
||||
});
|
||||
|
||||
const parsed = await readJsonResponseOnce<Partial<SubtitlePipelineResult>>(resp);
|
||||
if (parsed.ok === false) {
|
||||
const error = new Error(parsed.error);
|
||||
(error as any).status = resp.status;
|
||||
throw error;
|
||||
}
|
||||
|
||||
return {
|
||||
subtitles: Array.isArray(parsed.data.subtitles) ? parsed.data.subtitles : [],
|
||||
speakers: Array.isArray(parsed.data.speakers) ? parsed.data.speakers : [],
|
||||
quality: normalizePipelineQuality(parsed.data.quality),
|
||||
sourceLanguage: parsed.data.sourceLanguage,
|
||||
targetLanguage: parsed.data.targetLanguage || targetLanguage,
|
||||
duration:
|
||||
typeof parsed.data.duration === 'number' ? parsed.data.duration : undefined,
|
||||
alignmentEngine: parsed.data.alignmentEngine,
|
||||
};
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('video', videoFile);
|
||||
formData.append('targetLanguage', targetLanguage);
|
||||
|
||||
1
src/vite-env.d.ts
vendored
Normal file
1
src/vite-env.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
||||
/// <reference types="vite/client" />
|
||||
@ -1,4 +0,0 @@
|
||||
@echo off
|
||||
setlocal
|
||||
cd /d "%~dp0"
|
||||
node ".\node_modules\tsx\dist\cli.mjs" server.ts
|
||||
@ -5,6 +5,10 @@ import { defineConfig, loadEnv } from 'vite';
|
||||
|
||||
export default defineConfig(({ mode }) => {
|
||||
const env = loadEnv(mode, '.', '');
|
||||
const allowedHosts = env.VITE_ALLOWED_HOSTS
|
||||
? env.VITE_ALLOWED_HOSTS.split(',').map((host) => host.trim()).filter(Boolean)
|
||||
: ['ced4302.r20.vip.cpolar.cn'];
|
||||
|
||||
return {
|
||||
base: env.VITE_BASE_URL || '/',
|
||||
plugins: [react(), tailwindcss()],
|
||||
@ -17,10 +21,10 @@ export default defineConfig(({ mode }) => {
|
||||
},
|
||||
},
|
||||
server: {
|
||||
allowedHosts,
|
||||
// HMR is disabled in AI Studio via DISABLE_HMR env var.
|
||||
// Do not modifyâfile watching is disabled to prevent flickering during agent edits.
|
||||
// Do not modify. File watching is disabled to prevent flickering during agent edits.
|
||||
hmr: process.env.DISABLE_HMR !== 'true',
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user