video_translate/server.ts
Song367 04072dc94b
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 1m6s
commit code
2026-03-19 20:13:24 +08:00

649 lines
21 KiB
TypeScript

import express from 'express';
import cors from 'cors';
import dotenv from 'dotenv';
import { createServer as createViteServer } from 'vite';
import path from 'path';
import fs from 'fs';
import ffmpeg from 'fluent-ffmpeg';
import axios from 'axios';
import multer from 'multer';
import {
createMiniMaxTtsUrl,
getMiniMaxTtsHttpStatus,
resolveMiniMaxTtsConfig,
} from './src/server/minimaxTts';
import { generateSubtitlePipeline } from './src/server/subtitleGeneration';
import { parseSubtitleRequest } from './src/server/subtitleRequest';
import {
buildAssSubtitleContent,
buildExportAudioPlan,
DEFAULT_EXPORT_TEXT_STYLES,
shiftSubtitlesToExportTimeline,
} from './src/server/exportVideo';
import { formatLogContext, logEvent, serializeError } from './src/server/errorLogging';
import {
createSubtitleJob,
createSubtitleJobStore,
getSubtitleJob,
pruneExpiredSubtitleJobs,
toSubtitleJobResponse,
updateSubtitleJob,
} from './src/server/subtitleJobs';
import { TextStyles } from './src/types';
const upload = multer({
dest: 'uploads/',
limits: {
fileSize: 1024 * 1024 * 1024, // 1GB file limit
fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings
}
});
if (!fs.existsSync('uploads')) {
fs.mkdirSync('uploads');
}
const summarizeRequestHeaders = (headers: express.Request['headers']) => ({
host: headers.host,
'content-type': headers['content-type'],
'content-length': headers['content-length'],
'user-agent': headers['user-agent'],
'x-forwarded-for': headers['x-forwarded-for'],
'x-forwarded-proto': headers['x-forwarded-proto'],
});
const SUBTITLE_JOB_TTL_MS = 60 * 60 * 1000;
const subtitleJobStore = createSubtitleJobStore();
const deriveSubtitleErrorStatus = (message: string) => {
const lowerMessage = message.toLowerCase();
return lowerMessage.includes('target language') ||
lowerMessage.includes('unsupported llm provider') ||
lowerMessage.includes('_api_key is required') ||
lowerMessage.includes('studio project fallback is disabled')
? 400
: lowerMessage.includes('unauthorized') ||
lowerMessage.includes('authentication') ||
lowerMessage.includes('auth fail') ||
lowerMessage.includes('status 401')
? 401
: 502;
};
dotenv.config();
const ffmpegPath = process.env.FFMPEG_PATH?.trim();
if (ffmpegPath) {
ffmpeg.setFfmpegPath(ffmpegPath);
}
const ffprobePath = process.env.FFPROBE_PATH?.trim();
if (ffprobePath) {
ffmpeg.setFfprobePath(ffprobePath);
}
async function startServer() {
const app = express();
const PORT = 3000;
const subtitleUpload = upload.single('video');
app.use(cors());
app.use(express.json({ limit: '500mb' }));
app.use(express.urlencoded({ limit: '500mb', extended: true }));
// MiniMax TTS Endpoint
app.post('/api/tts', async (req, res) => {
try {
const { text, voiceId } = req.body;
if (!text) return res.status(400).json({ error: 'No text provided' });
const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env);
const response = await axios.post(
createMiniMaxTtsUrl(apiHost),
{
model: "speech-2.8-hd",
text: text,
stream: false,
output_format: "hex",
voice_setting: {
voice_id: voiceId || 'male-qn-qingse',
speed: 1.0,
vol: 1.0,
pitch: 0
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: "mp3",
channel: 1,
}
},
{
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
}
}
);
if (response.data?.base_resp?.status_code !== 0) {
console.error('MiniMax API Error:', response.data?.base_resp);
return res
.status(getMiniMaxTtsHttpStatus(response.data?.base_resp))
.json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' });
}
const hexAudio = response.data.data.audio;
const audioBuffer = Buffer.from(hexAudio, 'hex');
const audioBase64 = audioBuffer.toString('base64');
res.json({ audio: audioBase64 });
} catch (error: any) {
if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) {
console.error('TTS Config Error:', error.message);
return res.status(400).json({ error: error.message });
}
console.error('TTS Error:', error.response?.data || error.message);
res
.status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp))
.json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' });
}
});
// Vocal Separation Endpoint
app.post('/api/separate-vocal', upload.single('video'), async (req, res) => {
const videoPath = req.file?.path;
const timestamp = Date.now();
const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`);
try {
if (!videoPath) return res.status(400).json({ error: 'No video file provided' });
// Simple vocal reduction using FFmpeg (Center-panned vocal removal trick)
// This is a basic fallback as true AI separation requires specialized models.
await new Promise((resolve, reject) => {
ffmpeg(videoPath)
.noVideo()
.audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction
.format('mp3')
.on('end', resolve)
.on('error', reject)
.save(instrumentalPath);
});
const instrumentalBuffer = fs.readFileSync(instrumentalPath);
const instrumentalBase64 = instrumentalBuffer.toString('base64');
// Cleanup
if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
res.json({ instrumental: instrumentalBase64 });
} catch (error: any) {
console.error('Vocal Separation Error:', error);
res.status(500).json({ error: error.message || 'Failed to separate vocals' });
} finally {
// Cleanup
if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
}
});
app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => {
const videoPath = req.file?.path;
const timestamp = Date.now();
const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`);
try {
if (!videoPath) return res.status(400).json({ error: 'No video file provided' });
// 1. Extract Audio (16kHz, Mono, WAV)
await new Promise((resolve, reject) => {
ffmpeg(videoPath)
.noVideo()
.audioFrequency(16000)
.audioChannels(1)
.format('wav')
.on('end', resolve)
.on('error', reject)
.save(audioPath);
});
const audioFile = fs.readFileSync(audioPath);
const audioBase64 = audioFile.toString('base64');
// Cleanup
if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
res.json({ audioBase64 });
} catch (error: any) {
console.error('Audio Extraction Error:', error);
res.status(500).json({ error: error.message || 'Failed to extract audio' });
} finally {
// Cleanup
if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
}
});
app.post('/api/generate-subtitles', (req, res, next) => {
if ((req.headers['content-type'] || '').includes('multipart/form-data')) {
return subtitleUpload(req, res, next);
}
next();
}, async (req, res) => {
const videoPath = req.file?.path;
const requestId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const startedAt = Date.now();
const requestContext = {
requestId,
method: req.method,
path: req.originalUrl,
fileName: req.file?.originalname,
fileSize: req.file?.size,
contentType: req.headers['content-type'],
contentLength: req.headers['content-length'],
host: req.headers.host,
};
try {
logEvent({
level: 'info',
message: '[subtitle] request received',
context: requestContext,
details: {
headers: summarizeRequestHeaders(req.headers),
bodyKeys: Object.keys(req.body || {}).slice(0, 20),
hasUploadFile: Boolean(req.file),
},
});
const { provider, targetLanguage, ttsLanguage, fileId } = parseSubtitleRequest(req.body);
if (!videoPath && !fileId) {
logEvent({
level: 'warn',
message: '[subtitle] request rejected: missing video source',
context: {
...requestContext,
durationMs: Date.now() - startedAt,
},
details: {
fileId,
parsedProvider: provider,
targetLanguage,
ttsLanguage,
},
});
return res.status(400).json({ error: 'No video file provided' });
}
pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS);
const subtitleJob = createSubtitleJob(subtitleJobStore, {
requestId,
provider,
targetLanguage,
ttsLanguage,
fileId,
filePath: videoPath,
});
updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
status: 'running',
stage: videoPath ? 'upload_received' : 'queued',
progress: videoPath ? 15 : 5,
message: videoPath ? 'Upload received' : 'Queued',
});
logEvent({
level: 'info',
message: `[subtitle] job accepted ${formatLogContext({
jobId: subtitleJob.id,
requestId,
provider,
targetLanguage,
ttsLanguage,
fileName: req.file?.originalname,
fileSize: req.file?.size,
fileId,
})}`,
context: {
jobId: subtitleJob.id,
requestId,
provider,
targetLanguage,
ttsLanguage,
fileName: req.file?.originalname,
fileSize: req.file?.size,
fileId,
},
});
res.status(202).json(toSubtitleJobResponse(getSubtitleJob(subtitleJobStore, subtitleJob.id)!));
void (async () => {
try {
const result = await generateSubtitlePipeline({
videoPath,
fileId,
provider,
targetLanguage,
ttsLanguage,
env: process.env,
requestId,
onProgress: (progress) => {
updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
status: progress.status,
stage: progress.stage,
progress: progress.progress,
message: progress.message,
});
},
});
updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
status: 'succeeded',
stage: 'succeeded',
progress: 100,
message: 'Subtitle generation completed',
result: {
...result,
provider,
requestId,
},
});
logEvent({
level: 'info',
message: `[subtitle] background job succeeded ${formatLogContext({
jobId: subtitleJob.id,
requestId,
provider,
targetLanguage,
durationMs: Date.now() - startedAt,
subtitleCount: result.subtitles.length,
})}`,
context: {
jobId: subtitleJob.id,
requestId,
provider,
targetLanguage,
durationMs: Date.now() - startedAt,
subtitleCount: result.subtitles.length,
quality: result.quality,
alignmentEngine: result.alignmentEngine,
},
});
} catch (error: any) {
const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
const status = deriveSubtitleErrorStatus(message);
updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
status: 'failed',
stage: 'failed',
message,
error: message,
});
logEvent({
level: 'error',
message: `[subtitle] background job failed ${formatLogContext({
jobId: subtitleJob.id,
requestId,
durationMs: Date.now() - startedAt,
fileName: req.file?.originalname,
fileSize: req.file?.size,
status,
})}`,
context: {
...requestContext,
jobId: subtitleJob.id,
durationMs: Date.now() - startedAt,
status,
},
details: {
error: serializeError(error),
headers: summarizeRequestHeaders(req.headers),
bodyKeys: Object.keys(req.body || {}).slice(0, 20),
},
});
} finally {
if (videoPath && fs.existsSync(videoPath)) {
fs.unlinkSync(videoPath);
updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
filePath: undefined,
});
logEvent({
level: 'info',
message: '[subtitle] uploaded temp file cleaned',
context: {
jobId: subtitleJob.id,
requestId,
videoPath,
durationMs: Date.now() - startedAt,
},
});
}
}
})();
} catch (error: any) {
const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
const status = deriveSubtitleErrorStatus(message);
logEvent({
level: 'error',
message: `[subtitle] request failed ${formatLogContext({
requestId,
durationMs: Date.now() - startedAt,
fileName: req.file?.originalname,
fileSize: req.file?.size,
status,
})}`,
context: {
...requestContext,
durationMs: Date.now() - startedAt,
status,
},
details: {
error: serializeError(error),
headers: summarizeRequestHeaders(req.headers),
bodyKeys: Object.keys(req.body || {}).slice(0, 20),
},
});
res.status(status).json({ error: message, requestId });
}
});
app.get('/api/generate-subtitles/:jobId', (req, res) => {
pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS);
const subtitleJob = getSubtitleJob(subtitleJobStore, req.params.jobId);
if (!subtitleJob) {
return res.status(404).json({ error: 'Subtitle job not found.' });
}
return res.json(toSubtitleJobResponse(subtitleJob));
});
app.post('/api/export-video', upload.single('video'), async (req, res) => {
const tempFiles: string[] = [];
try {
const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body;
const videoFile = req.file;
if (!videoFile) return res.status(400).json({ error: 'No video file provided' });
const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : [];
const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null;
const textStyles: TextStyles = textStylesStr
? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) }
: DEFAULT_EXPORT_TEXT_STYLES;
const timestamp = Date.now();
const inputPath = videoFile.path;
const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`);
const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`);
tempFiles.push(subtitlePath, outputPath, inputPath);
// 2. Prepare Audio Filters
const probeData: any = await new Promise((resolve, reject) => {
ffmpeg.ffprobe(inputPath, (err, metadata) => {
if (err) reject(err);
else resolve(metadata);
});
});
const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio');
const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video');
const videoWidth = videoStream?.width || 1080;
const videoHeight = videoStream?.height || 1920;
const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange);
const hasSubtitles = exportSubtitles.length > 0;
if (hasSubtitles) {
const assContent = buildAssSubtitleContent({
subtitles: exportSubtitles,
textStyles,
videoWidth,
videoHeight,
});
fs.writeFileSync(subtitlePath, assContent);
}
let command = ffmpeg(inputPath);
const filterComplexParts: string[] = [];
const audioMixInputs: string[] = [];
let inputIndex = 1;
const audioPlan = buildExportAudioPlan({
hasSourceAudio: hasAudio,
hasBgm: Boolean(bgmBase64),
subtitles: exportSubtitles,
});
if (bgmBase64) {
const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`);
fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64'));
command = command.input(bgmPath);
tempFiles.push(bgmPath);
filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`);
audioMixInputs.push('[bgm]');
inputIndex++;
}
if (audioPlan.includeSourceAudio) {
filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`);
audioMixInputs.push('[sourcea]');
}
for (let i = 0; i < audioPlan.ttsTracks.length; i++) {
const track = audioPlan.ttsTracks[i];
if (track.audioUrl) {
const base64Data = track.audioUrl.split(',')[1];
const isWav = track.audioUrl.includes('audio/wav');
const ext = isWav ? 'wav' : 'mp3';
const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`);
fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64'));
command = command.input(ttsPath);
tempFiles.push(ttsPath);
filterComplexParts.push(
`[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`,
);
audioMixInputs.push(`[tts${i}]`);
inputIndex++;
}
}
const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:');
if (hasSubtitles) {
filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`);
}
let audioMap: string | null = null;
if (audioMixInputs.length > 1) {
filterComplexParts.push(
`${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`,
);
audioMap = '[aout]';
} else if (audioMixInputs.length === 1) {
audioMap = audioMixInputs[0];
}
if (filterComplexParts.length > 0) {
command = command.complexFilter(filterComplexParts);
}
const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`];
if (audioMap) {
outputMaps.push(`-map ${audioMap}`);
}
command = command.outputOptions(outputMaps);
if (trimRange) {
command = command.outputOptions([
`-ss ${trimRange.start}`,
`-t ${trimRange.end - trimRange.start}`
]);
}
await new Promise((resolve, reject) => {
command
.output(outputPath)
.on('end', resolve)
.on('error', (err, stdout, stderr) => {
console.error('FFmpeg export error:', err);
console.error('FFmpeg stderr:', stderr);
reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`));
})
.run();
});
if (!fs.existsSync(outputPath)) {
throw new Error('FFmpeg finished but output file was not created');
}
const outputBuffer = fs.readFileSync(outputPath);
console.log(`Exported video size: ${outputBuffer.length} bytes`);
const outputBase64 = outputBuffer.toString('base64');
const dataUrl = `data:video/mp4;base64,${outputBase64}`;
res.json({ videoUrl: dataUrl });
} catch (error: any) {
console.error('Export Error:', error);
res.status(500).json({ error: error.message || 'Failed to export video' });
} finally {
// Cleanup
for (const file of tempFiles) {
if (fs.existsSync(file)) {
try {
fs.unlinkSync(file);
} catch (e) {
console.error(`Failed to delete temp file ${file}:`, e);
}
}
}
}
});
if (process.env.NODE_ENV !== 'production') {
const vite = await createViteServer({
server: { middlewareMode: true },
appType: 'spa',
});
app.use(vite.middlewares);
} else {
const distPath = path.join(process.cwd(), 'dist');
app.use(express.static(distPath));
app.get('*', (req, res) => {
res.sendFile(path.join(distPath, 'index.html'));
});
}
app.listen(PORT, '0.0.0.0', () => {
console.log(`Server running on http://localhost:${PORT}`);
});
}
startServer();