video_translate/server.ts

import express from 'express';
import cors from 'cors';
import dotenv from 'dotenv';
import { createServer as createViteServer } from 'vite';
import path from 'path';
import fs from 'fs';
import ffmpeg from 'fluent-ffmpeg';
import axios from 'axios';
import multer from 'multer';
import {
  createMiniMaxTtsUrl,
  getMiniMaxTtsHttpStatus,
  resolveMiniMaxTtsConfig,
} from './src/server/minimaxTts';
import { generateSubtitlePipeline } from './src/server/subtitleGeneration';
import { parseSubtitleRequest } from './src/server/subtitleRequest';
import {
  buildAssSubtitleContent,
  buildExportAudioPlan,
  DEFAULT_EXPORT_TEXT_STYLES,
  shiftSubtitlesToExportTimeline,
} from './src/server/exportVideo';
import { TextStyles } from './src/types';

const upload = multer({
  dest: 'uploads/',
  limits: {
    fileSize: 1024 * 1024 * 1024, // 1GB file limit
    fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings
  }
});

if (!fs.existsSync('uploads')) {
  fs.mkdirSync('uploads');
}

dotenv.config();

const ffmpegPath = process.env.FFMPEG_PATH?.trim();
if (ffmpegPath) {
  ffmpeg.setFfmpegPath(ffmpegPath);
}

const ffprobePath = process.env.FFPROBE_PATH?.trim();
if (ffprobePath) {
  ffmpeg.setFfprobePath(ffprobePath);
}

async function startServer() {
  const app = express();
  const PORT = 3000;

  app.use(cors());
  app.use(express.json({ limit: '500mb' }));
  app.use(express.urlencoded({ limit: '500mb', extended: true }));

  // MiniMax TTS Endpoint
  app.post('/api/tts', async (req, res) => {
    try {
      const { text, voiceId } = req.body;
      if (!text) return res.status(400).json({ error: 'No text provided' });

      const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env);

      const response = await axios.post(
        createMiniMaxTtsUrl(apiHost),
        {
          model: "speech-2.8-hd",
          text: text,
          stream: false,
          output_format: "hex",
          voice_setting: {
            voice_id: voiceId || 'male-qn-qingse',
            speed: 1.0,
            vol: 1.0,
            pitch: 0
          },
          audio_setting: {
            sample_rate: 32000,
            bitrate: 128000,
            format: "mp3",
            channel: 1,
          }
        },
        {
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          }
        }
      );

      if (response.data?.base_resp?.status_code !== 0) {
        console.error('MiniMax API Error:', response.data?.base_resp);
        return res
          .status(getMiniMaxTtsHttpStatus(response.data?.base_resp))
          .json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' });
      }

      const hexAudio = response.data.data.audio;
      const audioBuffer = Buffer.from(hexAudio, 'hex');
      const audioBase64 = audioBuffer.toString('base64');
      res.json({ audio: audioBase64 });
    } catch (error: any) {
      if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) {
        console.error('TTS Config Error:', error.message);
        return res.status(400).json({ error: error.message });
      }

      console.error('TTS Error:', error.response?.data || error.message);
      res
        .status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp))
        .json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' });
    }
  });

  // Vocal Separation Endpoint
  app.post('/api/separate-vocal', upload.single('video'), async (req, res) => {
    const videoPath = req.file?.path;
    const timestamp = Date.now();
    const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`);

    try {
      if (!videoPath) return res.status(400).json({ error: 'No video file provided' });

      // Simple vocal reduction using FFmpeg (Center-panned vocal removal trick)
      // This is a basic fallback as true AI separation requires specialized models.
      await new Promise((resolve, reject) => {
        ffmpeg(videoPath)
          .noVideo()
          .audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction
          .format('mp3')
          .on('end', resolve)
          .on('error', reject)
          .save(instrumentalPath);
      });

      const instrumentalBuffer = fs.readFileSync(instrumentalPath);
      const instrumentalBase64 = instrumentalBuffer.toString('base64');

      // Cleanup
      if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
      if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);

      res.json({ instrumental: instrumentalBase64 });
    } catch (error: any) {
      console.error('Vocal Separation Error:', error);
      res.status(500).json({ error: error.message || 'Failed to separate vocals' });
    } finally {
      // Cleanup
      if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
      if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
    }
  });

  app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => {
    const videoPath = req.file?.path;
    const timestamp = Date.now();
    const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`);

    try {
      if (!videoPath) return res.status(400).json({ error: 'No video file provided' });

      // 1. Extract Audio (16kHz, Mono, WAV)
      await new Promise((resolve, reject) => {
        ffmpeg(videoPath)
          .noVideo()
          .audioFrequency(16000)
          .audioChannels(1)
          .format('wav')
          .on('end', resolve)
          .on('error', reject)
          .save(audioPath);
      });

      const audioFile = fs.readFileSync(audioPath);
      const audioBase64 = audioFile.toString('base64');

      // Cleanup
      if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
      if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);

      res.json({ audioBase64 });
    } catch (error: any) {
      console.error('Audio Extraction Error:', error);
      res.status(500).json({ error: error.message || 'Failed to extract audio' });
    } finally {
      // Cleanup
      if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
      if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
    }
  });

  app.post('/api/generate-subtitles', upload.single('video'), async (req, res) => {
    const videoPath = req.file?.path;

    try {
      if (!videoPath) {
        return res.status(400).json({ error: 'No video file provided' });
      }

      const { provider, targetLanguage } = parseSubtitleRequest(req.body);

      const result = await generateSubtitlePipeline({
        videoPath,
        provider,
        targetLanguage,
        env: process.env,
      });

      res.json({
        ...result,
        provider,
      });
    } catch (error: any) {
      const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
      const lowerMessage = message.toLowerCase();
      const status =
        lowerMessage.includes('target language') ||
        lowerMessage.includes('unsupported llm provider') ||
        lowerMessage.includes('_api_key is required') ||
        lowerMessage.includes('studio project fallback is disabled')
          ? 400
          : lowerMessage.includes('unauthorized') ||
              lowerMessage.includes('authentication') ||
              lowerMessage.includes('auth fail') ||
              lowerMessage.includes('status 401')
            ? 401
            : 502;

      console.error('Subtitle Generation Error:', error);
      res.status(status).json({ error: message });
    } finally {
      if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
    }
  });

  app.post('/api/export-video', upload.single('video'), async (req, res) => {
    const tempFiles: string[] = [];
    try {
      const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body;
      const videoFile = req.file;
      if (!videoFile) return res.status(400).json({ error: 'No video file provided' });

      const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : [];
      const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null;
      const textStyles: TextStyles = textStylesStr
        ? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) }
        : DEFAULT_EXPORT_TEXT_STYLES;

      const timestamp = Date.now();
      const inputPath = videoFile.path;
      const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`);
      const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`);

      tempFiles.push(subtitlePath, outputPath, inputPath);

      // 2. Prepare Audio Filters
      const probeData: any = await new Promise((resolve, reject) => {
        ffmpeg.ffprobe(inputPath, (err, metadata) => {
          if (err) reject(err);
          else resolve(metadata);
        });
      });
      const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio');
      const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video');
      const videoWidth = videoStream?.width || 1080;
      const videoHeight = videoStream?.height || 1920;
      const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange);

      const hasSubtitles = exportSubtitles.length > 0;
      if (hasSubtitles) {
        const assContent = buildAssSubtitleContent({
          subtitles: exportSubtitles,
          textStyles,
          videoWidth,
          videoHeight,
        });
        fs.writeFileSync(subtitlePath, assContent);
      }

      let command = ffmpeg(inputPath);
      const filterComplexParts: string[] = [];
      const audioMixInputs: string[] = [];
      let inputIndex = 1;
      const audioPlan = buildExportAudioPlan({
        hasSourceAudio: hasAudio,
        hasBgm: Boolean(bgmBase64),
        subtitles: exportSubtitles,
      });

      if (bgmBase64) {
        const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`);
        fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64'));
        command = command.input(bgmPath);
        tempFiles.push(bgmPath);
        filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`);
        audioMixInputs.push('[bgm]');
        inputIndex++;
      }

      if (audioPlan.includeSourceAudio) {
        filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`);
        audioMixInputs.push('[sourcea]');
      }

      for (let i = 0; i < audioPlan.ttsTracks.length; i++) {
        const track = audioPlan.ttsTracks[i];
        if (track.audioUrl) {
          const base64Data = track.audioUrl.split(',')[1];
          const isWav = track.audioUrl.includes('audio/wav');
          const ext = isWav ? 'wav' : 'mp3';
          const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`);
          fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64'));
          command = command.input(ttsPath);
          tempFiles.push(ttsPath);

          filterComplexParts.push(
            `[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`,
          );
          audioMixInputs.push(`[tts${i}]`);
          inputIndex++;
        }
      }

      const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:');
      if (hasSubtitles) {
        filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`);
      }

      let audioMap: string | null = null;
      if (audioMixInputs.length > 1) {
        filterComplexParts.push(
          `${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`,
        );
        audioMap = '[aout]';
      } else if (audioMixInputs.length === 1) {
        audioMap = audioMixInputs[0];
      }

      if (filterComplexParts.length > 0) {
        command = command.complexFilter(filterComplexParts);
      }

      const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`];
      if (audioMap) {
        outputMaps.push(`-map ${audioMap}`);
      }
      command = command.outputOptions(outputMaps);

      if (trimRange) {
        command = command.outputOptions([
          `-ss ${trimRange.start}`,
          `-t ${trimRange.end - trimRange.start}`
        ]);
      }

      await new Promise((resolve, reject) => {
        command
          .output(outputPath)
          .on('end', resolve)
          .on('error', (err, stdout, stderr) => {
            console.error('FFmpeg export error:', err);
            console.error('FFmpeg stderr:', stderr);
            reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`));
          })
          .run();
      });

      if (!fs.existsSync(outputPath)) {
        throw new Error('FFmpeg finished but output file was not created');
      }

      const outputBuffer = fs.readFileSync(outputPath);
      console.log(`Exported video size: ${outputBuffer.length} bytes`);
      const outputBase64 = outputBuffer.toString('base64');
      const dataUrl = `data:video/mp4;base64,${outputBase64}`;

      res.json({ videoUrl: dataUrl });
    } catch (error: any) {
      console.error('Export Error:', error);
      res.status(500).json({ error: error.message || 'Failed to export video' });
    } finally {
      // Cleanup
      for (const file of tempFiles) {
        if (fs.existsSync(file)) {
          try {
            fs.unlinkSync(file);
          } catch (e) {
            console.error(`Failed to delete temp file ${file}:`, e);
          }
        }
      }
    }
  });

  if (process.env.NODE_ENV !== 'production') {
    const vite = await createViteServer({
      server: { middlewareMode: true },
      appType: 'spa',
    });
    app.use(vite.middlewares);
  } else {
    const distPath = path.join(process.cwd(), 'dist');
    app.use(express.static(distPath));
    app.get('*', (req, res) => {
      res.sendFile(path.join(distPath, 'index.html'));
    });
  }

  app.listen(PORT, '0.0.0.0', () => {
    console.log(`Server running on http://localhost:${PORT}`);
  });
}

startServer();