import express from 'express'; import cors from 'cors'; import dotenv from 'dotenv'; import { createServer as createViteServer } from 'vite'; import path from 'path'; import fs from 'fs'; import ffmpeg from 'fluent-ffmpeg'; import ffmpegInstaller from 'ffmpeg-static'; import ffprobeInstaller from 'ffprobe-static'; import axios from 'axios'; import multer from 'multer'; import { createMiniMaxTtsUrl, getMiniMaxTtsHttpStatus, resolveMiniMaxTtsConfig, } from './src/server/minimaxTts'; import { generateSubtitlePipeline } from './src/server/subtitleGeneration'; import { parseSubtitleRequest } from './src/server/subtitleRequest'; import { buildAssSubtitleContent, buildExportAudioPlan, DEFAULT_EXPORT_TEXT_STYLES, shiftSubtitlesToExportTimeline, } from './src/server/exportVideo'; import { TextStyles } from './src/types'; const upload = multer({ dest: 'uploads/', limits: { fileSize: 1024 * 1024 * 1024, // 1GB file limit fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings } }); if (!fs.existsSync('uploads')) { fs.mkdirSync('uploads'); } if (ffmpegInstaller) { ffmpeg.setFfmpegPath(ffmpegInstaller); } if (ffprobeInstaller.path) { ffmpeg.setFfprobePath(ffprobeInstaller.path); } dotenv.config(); async function startServer() { const app = express(); const PORT = 3000; app.use(cors()); app.use(express.json({ limit: '500mb' })); app.use(express.urlencoded({ limit: '500mb', extended: true })); // MiniMax TTS Endpoint app.post('/api/tts', async (req, res) => { try { const { text, voiceId } = req.body; if (!text) return res.status(400).json({ error: 'No text provided' }); const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env); const response = await axios.post( createMiniMaxTtsUrl(apiHost), { model: "speech-2.8-hd", text: text, stream: false, output_format: "hex", voice_setting: { voice_id: voiceId || 'male-qn-qingse', speed: 1.0, vol: 1.0, pitch: 0 }, audio_setting: { sample_rate: 32000, bitrate: 128000, format: "mp3", channel: 1, } }, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } } ); if (response.data?.base_resp?.status_code !== 0) { console.error('MiniMax API Error:', response.data?.base_resp); return res .status(getMiniMaxTtsHttpStatus(response.data?.base_resp)) .json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' }); } const hexAudio = response.data.data.audio; const audioBuffer = Buffer.from(hexAudio, 'hex'); const audioBase64 = audioBuffer.toString('base64'); res.json({ audio: audioBase64 }); } catch (error: any) { if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) { console.error('TTS Config Error:', error.message); return res.status(400).json({ error: error.message }); } console.error('TTS Error:', error.response?.data || error.message); res .status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp)) .json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' }); } }); // Vocal Separation Endpoint app.post('/api/separate-vocal', upload.single('video'), async (req, res) => { const videoPath = req.file?.path; const timestamp = Date.now(); const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`); try { if (!videoPath) return res.status(400).json({ error: 'No video file provided' }); // Simple vocal reduction using FFmpeg (Center-panned vocal removal trick) // This is a basic fallback as true AI separation requires specialized models. await new Promise((resolve, reject) => { ffmpeg(videoPath) .noVideo() .audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction .format('mp3') .on('end', resolve) .on('error', reject) .save(instrumentalPath); }); const instrumentalBuffer = fs.readFileSync(instrumentalPath); const instrumentalBase64 = instrumentalBuffer.toString('base64'); // Cleanup if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath); if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath); res.json({ instrumental: instrumentalBase64 }); } catch (error: any) { console.error('Vocal Separation Error:', error); res.status(500).json({ error: error.message || 'Failed to separate vocals' }); } finally { // Cleanup if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath); if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath); } }); app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => { const videoPath = req.file?.path; const timestamp = Date.now(); const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`); try { if (!videoPath) return res.status(400).json({ error: 'No video file provided' }); // 1. Extract Audio (16kHz, Mono, WAV) await new Promise((resolve, reject) => { ffmpeg(videoPath) .noVideo() .audioFrequency(16000) .audioChannels(1) .format('wav') .on('end', resolve) .on('error', reject) .save(audioPath); }); const audioFile = fs.readFileSync(audioPath); const audioBase64 = audioFile.toString('base64'); // Cleanup if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath); if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath); res.json({ audioBase64 }); } catch (error: any) { console.error('Audio Extraction Error:', error); res.status(500).json({ error: error.message || 'Failed to extract audio' }); } finally { // Cleanup if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath); if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath); } }); app.post('/api/generate-subtitles', upload.single('video'), async (req, res) => { const videoPath = req.file?.path; try { if (!videoPath) { return res.status(400).json({ error: 'No video file provided' }); } const { provider, targetLanguage } = parseSubtitleRequest(req.body); const result = await generateSubtitlePipeline({ videoPath, provider, targetLanguage, env: process.env, }); res.json({ ...result, provider, }); } catch (error: any) { const message = error instanceof Error ? error.message : 'Failed to generate subtitles'; const lowerMessage = message.toLowerCase(); const status = lowerMessage.includes('target language') || lowerMessage.includes('unsupported llm provider') || lowerMessage.includes('_api_key is required') || lowerMessage.includes('studio project fallback is disabled') ? 400 : lowerMessage.includes('unauthorized') || lowerMessage.includes('authentication') || lowerMessage.includes('auth fail') || lowerMessage.includes('status 401') ? 401 : 502; console.error('Subtitle Generation Error:', error); res.status(status).json({ error: message }); } finally { if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath); } }); app.post('/api/export-video', upload.single('video'), async (req, res) => { const tempFiles: string[] = []; try { const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body; const videoFile = req.file; if (!videoFile) return res.status(400).json({ error: 'No video file provided' }); const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : []; const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null; const textStyles: TextStyles = textStylesStr ? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) } : DEFAULT_EXPORT_TEXT_STYLES; const timestamp = Date.now(); const inputPath = videoFile.path; const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`); const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`); tempFiles.push(subtitlePath, outputPath, inputPath); // 2. Prepare Audio Filters const probeData: any = await new Promise((resolve, reject) => { ffmpeg.ffprobe(inputPath, (err, metadata) => { if (err) reject(err); else resolve(metadata); }); }); const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio'); const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video'); const videoWidth = videoStream?.width || 1080; const videoHeight = videoStream?.height || 1920; const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange); const hasSubtitles = exportSubtitles.length > 0; if (hasSubtitles) { const assContent = buildAssSubtitleContent({ subtitles: exportSubtitles, textStyles, videoWidth, videoHeight, }); fs.writeFileSync(subtitlePath, assContent); } let command = ffmpeg(inputPath); const filterComplexParts: string[] = []; const audioMixInputs: string[] = []; let inputIndex = 1; const audioPlan = buildExportAudioPlan({ hasSourceAudio: hasAudio, hasBgm: Boolean(bgmBase64), subtitles: exportSubtitles, }); if (bgmBase64) { const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`); fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64')); command = command.input(bgmPath); tempFiles.push(bgmPath); filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`); audioMixInputs.push('[bgm]'); inputIndex++; } if (audioPlan.includeSourceAudio) { filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`); audioMixInputs.push('[sourcea]'); } for (let i = 0; i < audioPlan.ttsTracks.length; i++) { const track = audioPlan.ttsTracks[i]; if (track.audioUrl) { const base64Data = track.audioUrl.split(',')[1]; const isWav = track.audioUrl.includes('audio/wav'); const ext = isWav ? 'wav' : 'mp3'; const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`); fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64')); command = command.input(ttsPath); tempFiles.push(ttsPath); filterComplexParts.push( `[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`, ); audioMixInputs.push(`[tts${i}]`); inputIndex++; } } const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:'); if (hasSubtitles) { filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`); } let audioMap: string | null = null; if (audioMixInputs.length > 1) { filterComplexParts.push( `${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`, ); audioMap = '[aout]'; } else if (audioMixInputs.length === 1) { audioMap = audioMixInputs[0]; } if (filterComplexParts.length > 0) { command = command.complexFilter(filterComplexParts); } const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`]; if (audioMap) { outputMaps.push(`-map ${audioMap}`); } command = command.outputOptions(outputMaps); if (trimRange) { command = command.outputOptions([ `-ss ${trimRange.start}`, `-t ${trimRange.end - trimRange.start}` ]); } await new Promise((resolve, reject) => { command .output(outputPath) .on('end', resolve) .on('error', (err, stdout, stderr) => { console.error('FFmpeg export error:', err); console.error('FFmpeg stderr:', stderr); reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`)); }) .run(); }); if (!fs.existsSync(outputPath)) { throw new Error('FFmpeg finished but output file was not created'); } const outputBuffer = fs.readFileSync(outputPath); console.log(`Exported video size: ${outputBuffer.length} bytes`); const outputBase64 = outputBuffer.toString('base64'); const dataUrl = `data:video/mp4;base64,${outputBase64}`; res.json({ videoUrl: dataUrl }); } catch (error: any) { console.error('Export Error:', error); res.status(500).json({ error: error.message || 'Failed to export video' }); } finally { // Cleanup for (const file of tempFiles) { if (fs.existsSync(file)) { try { fs.unlinkSync(file); } catch (e) { console.error(`Failed to delete temp file ${file}:`, e); } } } } }); if (process.env.NODE_ENV !== 'production') { const vite = await createViteServer({ server: { middlewareMode: true }, appType: 'spa', }); app.use(vite.middlewares); } else { const distPath = path.join(process.cwd(), 'dist'); app.use(express.static(distPath)); app.get('*', (req, res) => { res.sendFile(path.join(distPath, 'index.html')); }); } app.listen(PORT, '0.0.0.0', () => { console.log(`Server running on http://localhost:${PORT}`); }); } startServer();