video_translate/server.ts
Song367 4031d21dcc
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 28s
优化package
2026-03-18 15:22:37 +08:00

417 lines
14 KiB
TypeScript

import express from 'express';
import cors from 'cors';
import dotenv from 'dotenv';
import { createServer as createViteServer } from 'vite';
import path from 'path';
import fs from 'fs';
import ffmpeg from 'fluent-ffmpeg';
import axios from 'axios';
import multer from 'multer';
import {
createMiniMaxTtsUrl,
getMiniMaxTtsHttpStatus,
resolveMiniMaxTtsConfig,
} from './src/server/minimaxTts';
import { generateSubtitlePipeline } from './src/server/subtitleGeneration';
import { parseSubtitleRequest } from './src/server/subtitleRequest';
import {
buildAssSubtitleContent,
buildExportAudioPlan,
DEFAULT_EXPORT_TEXT_STYLES,
shiftSubtitlesToExportTimeline,
} from './src/server/exportVideo';
import { TextStyles } from './src/types';
const upload = multer({
dest: 'uploads/',
limits: {
fileSize: 1024 * 1024 * 1024, // 1GB file limit
fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings
}
});
if (!fs.existsSync('uploads')) {
fs.mkdirSync('uploads');
}
dotenv.config();
const ffmpegPath = process.env.FFMPEG_PATH?.trim();
if (ffmpegPath) {
ffmpeg.setFfmpegPath(ffmpegPath);
}
const ffprobePath = process.env.FFPROBE_PATH?.trim();
if (ffprobePath) {
ffmpeg.setFfprobePath(ffprobePath);
}
async function startServer() {
const app = express();
const PORT = 3000;
app.use(cors());
app.use(express.json({ limit: '500mb' }));
app.use(express.urlencoded({ limit: '500mb', extended: true }));
// MiniMax TTS Endpoint
app.post('/api/tts', async (req, res) => {
try {
const { text, voiceId } = req.body;
if (!text) return res.status(400).json({ error: 'No text provided' });
const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env);
const response = await axios.post(
createMiniMaxTtsUrl(apiHost),
{
model: "speech-2.8-hd",
text: text,
stream: false,
output_format: "hex",
voice_setting: {
voice_id: voiceId || 'male-qn-qingse',
speed: 1.0,
vol: 1.0,
pitch: 0
},
audio_setting: {
sample_rate: 32000,
bitrate: 128000,
format: "mp3",
channel: 1,
}
},
{
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
}
}
);
if (response.data?.base_resp?.status_code !== 0) {
console.error('MiniMax API Error:', response.data?.base_resp);
return res
.status(getMiniMaxTtsHttpStatus(response.data?.base_resp))
.json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' });
}
const hexAudio = response.data.data.audio;
const audioBuffer = Buffer.from(hexAudio, 'hex');
const audioBase64 = audioBuffer.toString('base64');
res.json({ audio: audioBase64 });
} catch (error: any) {
if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) {
console.error('TTS Config Error:', error.message);
return res.status(400).json({ error: error.message });
}
console.error('TTS Error:', error.response?.data || error.message);
res
.status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp))
.json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' });
}
});
// Vocal Separation Endpoint
app.post('/api/separate-vocal', upload.single('video'), async (req, res) => {
const videoPath = req.file?.path;
const timestamp = Date.now();
const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`);
try {
if (!videoPath) return res.status(400).json({ error: 'No video file provided' });
// Simple vocal reduction using FFmpeg (Center-panned vocal removal trick)
// This is a basic fallback as true AI separation requires specialized models.
await new Promise((resolve, reject) => {
ffmpeg(videoPath)
.noVideo()
.audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction
.format('mp3')
.on('end', resolve)
.on('error', reject)
.save(instrumentalPath);
});
const instrumentalBuffer = fs.readFileSync(instrumentalPath);
const instrumentalBase64 = instrumentalBuffer.toString('base64');
// Cleanup
if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
res.json({ instrumental: instrumentalBase64 });
} catch (error: any) {
console.error('Vocal Separation Error:', error);
res.status(500).json({ error: error.message || 'Failed to separate vocals' });
} finally {
// Cleanup
if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
}
});
app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => {
const videoPath = req.file?.path;
const timestamp = Date.now();
const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`);
try {
if (!videoPath) return res.status(400).json({ error: 'No video file provided' });
// 1. Extract Audio (16kHz, Mono, WAV)
await new Promise((resolve, reject) => {
ffmpeg(videoPath)
.noVideo()
.audioFrequency(16000)
.audioChannels(1)
.format('wav')
.on('end', resolve)
.on('error', reject)
.save(audioPath);
});
const audioFile = fs.readFileSync(audioPath);
const audioBase64 = audioFile.toString('base64');
// Cleanup
if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
res.json({ audioBase64 });
} catch (error: any) {
console.error('Audio Extraction Error:', error);
res.status(500).json({ error: error.message || 'Failed to extract audio' });
} finally {
// Cleanup
if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
}
});
app.post('/api/generate-subtitles', upload.single('video'), async (req, res) => {
const videoPath = req.file?.path;
try {
if (!videoPath) {
return res.status(400).json({ error: 'No video file provided' });
}
const { provider, targetLanguage } = parseSubtitleRequest(req.body);
const result = await generateSubtitlePipeline({
videoPath,
provider,
targetLanguage,
env: process.env,
});
res.json({
...result,
provider,
});
} catch (error: any) {
const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
const lowerMessage = message.toLowerCase();
const status =
lowerMessage.includes('target language') ||
lowerMessage.includes('unsupported llm provider') ||
lowerMessage.includes('_api_key is required') ||
lowerMessage.includes('studio project fallback is disabled')
? 400
: lowerMessage.includes('unauthorized') ||
lowerMessage.includes('authentication') ||
lowerMessage.includes('auth fail') ||
lowerMessage.includes('status 401')
? 401
: 502;
console.error('Subtitle Generation Error:', error);
res.status(status).json({ error: message });
} finally {
if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
}
});
app.post('/api/export-video', upload.single('video'), async (req, res) => {
const tempFiles: string[] = [];
try {
const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body;
const videoFile = req.file;
if (!videoFile) return res.status(400).json({ error: 'No video file provided' });
const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : [];
const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null;
const textStyles: TextStyles = textStylesStr
? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) }
: DEFAULT_EXPORT_TEXT_STYLES;
const timestamp = Date.now();
const inputPath = videoFile.path;
const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`);
const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`);
tempFiles.push(subtitlePath, outputPath, inputPath);
// 2. Prepare Audio Filters
const probeData: any = await new Promise((resolve, reject) => {
ffmpeg.ffprobe(inputPath, (err, metadata) => {
if (err) reject(err);
else resolve(metadata);
});
});
const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio');
const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video');
const videoWidth = videoStream?.width || 1080;
const videoHeight = videoStream?.height || 1920;
const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange);
const hasSubtitles = exportSubtitles.length > 0;
if (hasSubtitles) {
const assContent = buildAssSubtitleContent({
subtitles: exportSubtitles,
textStyles,
videoWidth,
videoHeight,
});
fs.writeFileSync(subtitlePath, assContent);
}
let command = ffmpeg(inputPath);
const filterComplexParts: string[] = [];
const audioMixInputs: string[] = [];
let inputIndex = 1;
const audioPlan = buildExportAudioPlan({
hasSourceAudio: hasAudio,
hasBgm: Boolean(bgmBase64),
subtitles: exportSubtitles,
});
if (bgmBase64) {
const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`);
fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64'));
command = command.input(bgmPath);
tempFiles.push(bgmPath);
filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`);
audioMixInputs.push('[bgm]');
inputIndex++;
}
if (audioPlan.includeSourceAudio) {
filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`);
audioMixInputs.push('[sourcea]');
}
for (let i = 0; i < audioPlan.ttsTracks.length; i++) {
const track = audioPlan.ttsTracks[i];
if (track.audioUrl) {
const base64Data = track.audioUrl.split(',')[1];
const isWav = track.audioUrl.includes('audio/wav');
const ext = isWav ? 'wav' : 'mp3';
const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`);
fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64'));
command = command.input(ttsPath);
tempFiles.push(ttsPath);
filterComplexParts.push(
`[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`,
);
audioMixInputs.push(`[tts${i}]`);
inputIndex++;
}
}
const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:');
if (hasSubtitles) {
filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`);
}
let audioMap: string | null = null;
if (audioMixInputs.length > 1) {
filterComplexParts.push(
`${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`,
);
audioMap = '[aout]';
} else if (audioMixInputs.length === 1) {
audioMap = audioMixInputs[0];
}
if (filterComplexParts.length > 0) {
command = command.complexFilter(filterComplexParts);
}
const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`];
if (audioMap) {
outputMaps.push(`-map ${audioMap}`);
}
command = command.outputOptions(outputMaps);
if (trimRange) {
command = command.outputOptions([
`-ss ${trimRange.start}`,
`-t ${trimRange.end - trimRange.start}`
]);
}
await new Promise((resolve, reject) => {
command
.output(outputPath)
.on('end', resolve)
.on('error', (err, stdout, stderr) => {
console.error('FFmpeg export error:', err);
console.error('FFmpeg stderr:', stderr);
reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`));
})
.run();
});
if (!fs.existsSync(outputPath)) {
throw new Error('FFmpeg finished but output file was not created');
}
const outputBuffer = fs.readFileSync(outputPath);
console.log(`Exported video size: ${outputBuffer.length} bytes`);
const outputBase64 = outputBuffer.toString('base64');
const dataUrl = `data:video/mp4;base64,${outputBase64}`;
res.json({ videoUrl: dataUrl });
} catch (error: any) {
console.error('Export Error:', error);
res.status(500).json({ error: error.message || 'Failed to export video' });
} finally {
// Cleanup
for (const file of tempFiles) {
if (fs.existsSync(file)) {
try {
fs.unlinkSync(file);
} catch (e) {
console.error(`Failed to delete temp file ${file}:`, e);
}
}
}
}
});
if (process.env.NODE_ENV !== 'production') {
const vite = await createViteServer({
server: { middlewareMode: true },
appType: 'spa',
});
app.use(vite.middlewares);
} else {
const distPath = path.join(process.cwd(), 'dist');
app.use(express.static(distPath));
app.get('*', (req, res) => {
res.sendFile(path.join(distPath, 'index.html'));
});
}
app.listen(PORT, '0.0.0.0', () => {
console.log(`Server running on http://localhost:${PORT}`);
});
}
startServer();