import express from 'express'; import cors from 'cors'; import dotenv from 'dotenv'; import { createServer as createViteServer } from 'vite'; import path from 'path'; import fs from 'fs'; import ffmpeg from 'fluent-ffmpeg'; import axios from 'axios'; import multer from 'multer'; import { createMiniMaxTtsUrl, getMiniMaxTtsHttpStatus, resolveMiniMaxTtsConfig, } from './src/server/minimaxTts'; import { generateSubtitlePipeline } from './src/server/subtitleGeneration'; import { resolveLlmProviderConfig } from './src/server/llmProvider'; import { parseSubtitleRequest } from './src/server/subtitleRequest'; import { buildAssSubtitleContent, buildExportAudioPlan, DEFAULT_EXPORT_TEXT_STYLES, shiftSubtitlesToExportTimeline, } from './src/server/exportVideo'; import { formatLogContext, logEvent, serializeError } from './src/server/errorLogging'; import { createSubtitleJob, createSubtitleJobStore, getSubtitleJob, pruneExpiredSubtitleJobs, toSubtitleJobResponse, updateSubtitleJob, } from './src/server/subtitleJobs'; import { TextStyles } from './src/types'; const upload = multer({ dest: 'uploads/', limits: { fileSize: 1024 * 1024 * 1024, // 1GB file limit fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings } }); if (!fs.existsSync('uploads')) { fs.mkdirSync('uploads'); } const summarizeRequestHeaders = (headers: express.Request['headers']) => ({ host: headers.host, 'content-type': headers['content-type'], 'content-length': headers['content-length'], 'user-agent': headers['user-agent'], 'x-forwarded-for': headers['x-forwarded-for'], 'x-forwarded-proto': headers['x-forwarded-proto'], }); const SUBTITLE_JOB_TTL_MS = 60 * 60 * 1000; const subtitleJobStore = createSubtitleJobStore(); const deriveSubtitleErrorStatus = (message: string) => { const lowerMessage = message.toLowerCase(); return lowerMessage.includes('target language') || lowerMessage.includes('unsupported llm provider') || lowerMessage.includes('_api_key is required') || lowerMessage.includes('studio project fallback is disabled') ? 400 : lowerMessage.includes('unauthorized') || lowerMessage.includes('authentication') || lowerMessage.includes('auth fail') || lowerMessage.includes('status 401') ? 401 : 502; }; dotenv.config(); const ffmpegPath = process.env.FFMPEG_PATH?.trim(); if (ffmpegPath) { ffmpeg.setFfmpegPath(ffmpegPath); } const ffprobePath = process.env.FFPROBE_PATH?.trim(); if (ffprobePath) { ffmpeg.setFfprobePath(ffprobePath); } async function startServer() { const app = express(); const PORT = 3000; const subtitleUpload = upload.single('video'); app.use(cors()); app.use(express.json({ limit: '500mb' })); app.use(express.urlencoded({ limit: '500mb', extended: true })); // MiniMax TTS Endpoint app.post('/api/tts', async (req, res) => { try { const { text, voiceId } = req.body; if (!text) return res.status(400).json({ error: 'No text provided' }); const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env); const response = await axios.post( createMiniMaxTtsUrl(apiHost), { model: "speech-2.8-hd", text: text, stream: false, output_format: "hex", voice_setting: { voice_id: voiceId || 'male-qn-qingse', speed: 1.0, vol: 1.0, pitch: 0 }, audio_setting: { sample_rate: 32000, bitrate: 128000, format: "mp3", channel: 1, } }, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } } ); if (response.data?.base_resp?.status_code !== 0) { console.error('MiniMax API Error:', response.data?.base_resp); return res .status(getMiniMaxTtsHttpStatus(response.data?.base_resp)) .json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' }); } const hexAudio = response.data.data.audio; const audioBuffer = Buffer.from(hexAudio, 'hex'); const audioBase64 = audioBuffer.toString('base64'); res.json({ audio: audioBase64 }); } catch (error: any) { if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) { console.error('TTS Config Error:', error.message); return res.status(400).json({ error: error.message }); } console.error('TTS Error:', error.response?.data || error.message); res .status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp)) .json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' }); } }); // Vocal Separation Endpoint app.post('/api/separate-vocal', upload.single('video'), async (req, res) => { const videoPath = req.file?.path; const timestamp = Date.now(); const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`); try { if (!videoPath) return res.status(400).json({ error: 'No video file provided' }); // Simple vocal reduction using FFmpeg (Center-panned vocal removal trick) // This is a basic fallback as true AI separation requires specialized models. await new Promise((resolve, reject) => { ffmpeg(videoPath) .noVideo() .audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction .format('mp3') .on('end', resolve) .on('error', reject) .save(instrumentalPath); }); const instrumentalBuffer = fs.readFileSync(instrumentalPath); const instrumentalBase64 = instrumentalBuffer.toString('base64'); // Cleanup if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath); if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath); res.json({ instrumental: instrumentalBase64 }); } catch (error: any) { console.error('Vocal Separation Error:', error); res.status(500).json({ error: error.message || 'Failed to separate vocals' }); } finally { // Cleanup if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath); if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath); } }); app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => { const videoPath = req.file?.path; const timestamp = Date.now(); const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`); try { if (!videoPath) return res.status(400).json({ error: 'No video file provided' }); // 1. Extract Audio (16kHz, Mono, WAV) await new Promise((resolve, reject) => { ffmpeg(videoPath) .noVideo() .audioFrequency(16000) .audioChannels(1) .format('wav') .on('end', resolve) .on('error', reject) .save(audioPath); }); const audioFile = fs.readFileSync(audioPath); const audioBase64 = audioFile.toString('base64'); // Cleanup if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath); if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath); res.json({ audioBase64 }); } catch (error: any) { console.error('Audio Extraction Error:', error); res.status(500).json({ error: error.message || 'Failed to extract audio' }); } finally { // Cleanup if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath); if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath); } }); app.post('/api/generate-subtitles', (req, res, next) => { if ((req.headers['content-type'] || '').includes('multipart/form-data')) { return subtitleUpload(req, res, next); } next(); }, async (req, res) => { const videoPath = req.file?.path; const requestId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; const startedAt = Date.now(); const requestContext = { requestId, method: req.method, path: req.originalUrl, fileName: req.file?.originalname, fileSize: req.file?.size, contentType: req.headers['content-type'], contentLength: req.headers['content-length'], host: req.headers.host, }; try { logEvent({ level: 'info', message: '[subtitle] request received', context: requestContext, details: { headers: summarizeRequestHeaders(req.headers), bodyKeys: Object.keys(req.body || {}).slice(0, 20), hasUploadFile: Boolean(req.file), }, }); const { provider, targetLanguage, ttsLanguage, fileId } = parseSubtitleRequest(req.body); const providerConfig = resolveLlmProviderConfig(provider, process.env); const pollTimeoutMs = providerConfig.provider === 'doubao' ? providerConfig.timeoutMs : undefined; if (!videoPath && !fileId) { logEvent({ level: 'warn', message: '[subtitle] request rejected: missing video source', context: { ...requestContext, durationMs: Date.now() - startedAt, }, details: { fileId, parsedProvider: provider, targetLanguage, ttsLanguage, }, }); return res.status(400).json({ error: 'No video file provided' }); } pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS); const subtitleJob = createSubtitleJob(subtitleJobStore, { requestId, provider, targetLanguage, ttsLanguage, pollTimeoutMs, fileId, filePath: videoPath, }); updateSubtitleJob(subtitleJobStore, subtitleJob.id, { status: 'running', stage: videoPath ? 'upload_received' : 'queued', progress: videoPath ? 15 : 5, message: videoPath ? 'Upload received' : 'Queued', }); logEvent({ level: 'info', message: `[subtitle] job accepted ${formatLogContext({ jobId: subtitleJob.id, requestId, provider, targetLanguage, ttsLanguage, fileName: req.file?.originalname, fileSize: req.file?.size, fileId, })}`, context: { jobId: subtitleJob.id, requestId, provider, targetLanguage, ttsLanguage, fileName: req.file?.originalname, fileSize: req.file?.size, fileId, }, }); res.status(202).json(toSubtitleJobResponse(getSubtitleJob(subtitleJobStore, subtitleJob.id)!)); void (async () => { try { const result = await generateSubtitlePipeline({ videoPath, fileId, provider, targetLanguage, ttsLanguage, env: process.env, requestId, onProgress: (progress) => { updateSubtitleJob(subtitleJobStore, subtitleJob.id, { status: progress.status, stage: progress.stage, progress: progress.progress, message: progress.message, }); }, }); updateSubtitleJob(subtitleJobStore, subtitleJob.id, { status: 'succeeded', stage: 'succeeded', progress: 100, message: 'Subtitle generation completed', result: { ...result, provider, requestId, }, }); logEvent({ level: 'info', message: `[subtitle] background job succeeded ${formatLogContext({ jobId: subtitleJob.id, requestId, provider, targetLanguage, durationMs: Date.now() - startedAt, subtitleCount: result.subtitles.length, })}`, context: { jobId: subtitleJob.id, requestId, provider, targetLanguage, durationMs: Date.now() - startedAt, subtitleCount: result.subtitles.length, quality: result.quality, alignmentEngine: result.alignmentEngine, }, }); } catch (error: any) { const message = error instanceof Error ? error.message : 'Failed to generate subtitles'; const status = deriveSubtitleErrorStatus(message); updateSubtitleJob(subtitleJobStore, subtitleJob.id, { status: 'failed', stage: 'failed', message, error: message, }); logEvent({ level: 'error', message: `[subtitle] background job failed ${formatLogContext({ jobId: subtitleJob.id, requestId, durationMs: Date.now() - startedAt, fileName: req.file?.originalname, fileSize: req.file?.size, status, })}`, context: { ...requestContext, jobId: subtitleJob.id, durationMs: Date.now() - startedAt, status, }, details: { error: serializeError(error), headers: summarizeRequestHeaders(req.headers), bodyKeys: Object.keys(req.body || {}).slice(0, 20), }, }); } finally { if (videoPath && fs.existsSync(videoPath)) { fs.unlinkSync(videoPath); updateSubtitleJob(subtitleJobStore, subtitleJob.id, { filePath: undefined, }); logEvent({ level: 'info', message: '[subtitle] uploaded temp file cleaned', context: { jobId: subtitleJob.id, requestId, videoPath, durationMs: Date.now() - startedAt, }, }); } } })(); } catch (error: any) { const message = error instanceof Error ? error.message : 'Failed to generate subtitles'; const status = deriveSubtitleErrorStatus(message); logEvent({ level: 'error', message: `[subtitle] request failed ${formatLogContext({ requestId, durationMs: Date.now() - startedAt, fileName: req.file?.originalname, fileSize: req.file?.size, status, })}`, context: { ...requestContext, durationMs: Date.now() - startedAt, status, }, details: { error: serializeError(error), headers: summarizeRequestHeaders(req.headers), bodyKeys: Object.keys(req.body || {}).slice(0, 20), }, }); res.status(status).json({ error: message, requestId }); } }); app.get('/api/generate-subtitles/:jobId', (req, res) => { pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS); const subtitleJob = getSubtitleJob(subtitleJobStore, req.params.jobId); if (!subtitleJob) { return res.status(404).json({ error: 'Subtitle job not found.' }); } return res.json(toSubtitleJobResponse(subtitleJob)); }); app.post('/api/export-video', upload.single('video'), async (req, res) => { const tempFiles: string[] = []; try { const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body; const videoFile = req.file; if (!videoFile) return res.status(400).json({ error: 'No video file provided' }); const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : []; const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null; const textStyles: TextStyles = textStylesStr ? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) } : DEFAULT_EXPORT_TEXT_STYLES; const timestamp = Date.now(); const inputPath = videoFile.path; const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`); const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`); tempFiles.push(subtitlePath, outputPath, inputPath); // 2. Prepare Audio Filters const probeData: any = await new Promise((resolve, reject) => { ffmpeg.ffprobe(inputPath, (err, metadata) => { if (err) reject(err); else resolve(metadata); }); }); const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio'); const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video'); const videoWidth = videoStream?.width || 1080; const videoHeight = videoStream?.height || 1920; const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange); const hasSubtitles = exportSubtitles.length > 0; if (hasSubtitles) { const assContent = buildAssSubtitleContent({ subtitles: exportSubtitles, textStyles, videoWidth, videoHeight, }); fs.writeFileSync(subtitlePath, assContent); } let command = ffmpeg(inputPath); const filterComplexParts: string[] = []; const audioMixInputs: string[] = []; let inputIndex = 1; const audioPlan = buildExportAudioPlan({ hasSourceAudio: hasAudio, hasBgm: Boolean(bgmBase64), subtitles: exportSubtitles, }); if (bgmBase64) { const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`); fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64')); command = command.input(bgmPath); tempFiles.push(bgmPath); filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`); audioMixInputs.push('[bgm]'); inputIndex++; } if (audioPlan.includeSourceAudio) { filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`); audioMixInputs.push('[sourcea]'); } for (let i = 0; i < audioPlan.ttsTracks.length; i++) { const track = audioPlan.ttsTracks[i]; if (track.audioUrl) { const base64Data = track.audioUrl.split(',')[1]; const isWav = track.audioUrl.includes('audio/wav'); const ext = isWav ? 'wav' : 'mp3'; const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`); fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64')); command = command.input(ttsPath); tempFiles.push(ttsPath); filterComplexParts.push( `[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`, ); audioMixInputs.push(`[tts${i}]`); inputIndex++; } } const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:'); if (hasSubtitles) { filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`); } let audioMap: string | null = null; if (audioMixInputs.length > 1) { filterComplexParts.push( `${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`, ); audioMap = '[aout]'; } else if (audioMixInputs.length === 1) { audioMap = audioMixInputs[0]; } if (filterComplexParts.length > 0) { command = command.complexFilter(filterComplexParts); } const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`]; if (audioMap) { outputMaps.push(`-map ${audioMap}`); } command = command.outputOptions(outputMaps); if (trimRange) { command = command.outputOptions([ `-ss ${trimRange.start}`, `-t ${trimRange.end - trimRange.start}` ]); } await new Promise((resolve, reject) => { command .output(outputPath) .on('end', resolve) .on('error', (err, stdout, stderr) => { console.error('FFmpeg export error:', err); console.error('FFmpeg stderr:', stderr); reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`)); }) .run(); }); if (!fs.existsSync(outputPath)) { throw new Error('FFmpeg finished but output file was not created'); } const outputBuffer = fs.readFileSync(outputPath); console.log(`Exported video size: ${outputBuffer.length} bytes`); const outputBase64 = outputBuffer.toString('base64'); const dataUrl = `data:video/mp4;base64,${outputBase64}`; res.json({ videoUrl: dataUrl }); } catch (error: any) { console.error('Export Error:', error); res.status(500).json({ error: error.message || 'Failed to export video' }); } finally { // Cleanup for (const file of tempFiles) { if (fs.existsSync(file)) { try { fs.unlinkSync(file); } catch (e) { console.error(`Failed to delete temp file ${file}:`, e); } } } } }); if (process.env.NODE_ENV !== 'production') { const vite = await createViteServer({ server: { middlewareMode: true }, appType: 'spa', }); app.use(vite.middlewares); } else { const distPath = path.join(process.cwd(), 'dist'); app.use(express.static(distPath)); app.get('*', (req, res) => { res.sendFile(path.join(distPath, 'index.html')); }); } app.listen(PORT, '0.0.0.0', () => { console.log(`Server running on http://localhost:${PORT}`); }); } startServer();