video_translate/server.ts

import express from 'express';
import cors from 'cors';
import dotenv from 'dotenv';
import { createServer as createViteServer } from 'vite';
import path from 'path';
import fs from 'fs';
import ffmpeg from 'fluent-ffmpeg';
import axios from 'axios';
import multer from 'multer';
import {
  createMiniMaxTtsUrl,
  getMiniMaxTtsHttpStatus,
  resolveMiniMaxTtsConfig,
} from './src/server/minimaxTts';
import { generateSubtitlePipeline } from './src/server/subtitleGeneration';
import { parseSubtitleRequest } from './src/server/subtitleRequest';
import {
  buildAssSubtitleContent,
  buildExportAudioPlan,
  DEFAULT_EXPORT_TEXT_STYLES,
  shiftSubtitlesToExportTimeline,
} from './src/server/exportVideo';
import { formatLogContext, logEvent, serializeError } from './src/server/errorLogging';
import {
  createSubtitleJob,
  createSubtitleJobStore,
  getSubtitleJob,
  pruneExpiredSubtitleJobs,
  toSubtitleJobResponse,
  updateSubtitleJob,
} from './src/server/subtitleJobs';
import { TextStyles } from './src/types';

const upload = multer({
  dest: 'uploads/',
  limits: {
    fileSize: 1024 * 1024 * 1024, // 1GB file limit
    fieldSize: 1024 * 1024 * 500 // 500MB field limit for base64 strings
  }
});

if (!fs.existsSync('uploads')) {
  fs.mkdirSync('uploads');
}

const summarizeRequestHeaders = (headers: express.Request['headers']) => ({
  host: headers.host,
  'content-type': headers['content-type'],
  'content-length': headers['content-length'],
  'user-agent': headers['user-agent'],
  'x-forwarded-for': headers['x-forwarded-for'],
  'x-forwarded-proto': headers['x-forwarded-proto'],
});

const SUBTITLE_JOB_TTL_MS = 60 * 60 * 1000;
const subtitleJobStore = createSubtitleJobStore();

const deriveSubtitleErrorStatus = (message: string) => {
  const lowerMessage = message.toLowerCase();
  return lowerMessage.includes('target language') ||
    lowerMessage.includes('unsupported llm provider') ||
    lowerMessage.includes('_api_key is required') ||
    lowerMessage.includes('studio project fallback is disabled')
    ? 400
    : lowerMessage.includes('unauthorized') ||
        lowerMessage.includes('authentication') ||
        lowerMessage.includes('auth fail') ||
        lowerMessage.includes('status 401')
      ? 401
      : 502;
};

dotenv.config();

const ffmpegPath = process.env.FFMPEG_PATH?.trim();
if (ffmpegPath) {
  ffmpeg.setFfmpegPath(ffmpegPath);
}

const ffprobePath = process.env.FFPROBE_PATH?.trim();
if (ffprobePath) {
  ffmpeg.setFfprobePath(ffprobePath);
}

async function startServer() {
  const app = express();
  const PORT = 3000;
  const subtitleUpload = upload.single('video');

  app.use(cors());
  app.use(express.json({ limit: '500mb' }));
  app.use(express.urlencoded({ limit: '500mb', extended: true }));

  // MiniMax TTS Endpoint
  app.post('/api/tts', async (req, res) => {
    try {
      const { text, voiceId } = req.body;
      if (!text) return res.status(400).json({ error: 'No text provided' });

      const { apiHost, apiKey } = resolveMiniMaxTtsConfig(process.env);

      const response = await axios.post(
        createMiniMaxTtsUrl(apiHost),
        {
          model: "speech-2.8-hd",
          text: text,
          stream: false,
          output_format: "hex",
          voice_setting: {
            voice_id: voiceId || 'male-qn-qingse',
            speed: 1.0,
            vol: 1.0,
            pitch: 0
          },
          audio_setting: {
            sample_rate: 32000,
            bitrate: 128000,
            format: "mp3",
            channel: 1,
          }
        },
        {
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          }
        }
      );

      if (response.data?.base_resp?.status_code !== 0) {
        console.error('MiniMax API Error:', response.data?.base_resp);
        return res
          .status(getMiniMaxTtsHttpStatus(response.data?.base_resp))
          .json({ error: response.data?.base_resp?.status_msg || 'MiniMax TTS failed' });
      }

      const hexAudio = response.data.data.audio;
      const audioBuffer = Buffer.from(hexAudio, 'hex');
      const audioBase64 = audioBuffer.toString('base64');
      res.json({ audio: audioBase64 });
    } catch (error: any) {
      if (error instanceof Error && error.message.includes('MINIMAX_API_KEY')) {
        console.error('TTS Config Error:', error.message);
        return res.status(400).json({ error: error.message });
      }

      console.error('TTS Error:', error.response?.data || error.message);
      res
        .status(getMiniMaxTtsHttpStatus(error.response?.data?.base_resp))
        .json({ error: error.response?.data?.base_resp?.status_msg || error.message || 'Failed to generate TTS' });
    }
  });

  // Vocal Separation Endpoint
  app.post('/api/separate-vocal', upload.single('video'), async (req, res) => {
    const videoPath = req.file?.path;
    const timestamp = Date.now();
    const instrumentalPath = path.join(process.cwd(), `temp_instrumental_${timestamp}.mp3`);

    try {
      if (!videoPath) return res.status(400).json({ error: 'No video file provided' });

      // Simple vocal reduction using FFmpeg (Center-panned vocal removal trick)
      // This is a basic fallback as true AI separation requires specialized models.
      await new Promise((resolve, reject) => {
        ffmpeg(videoPath)
          .noVideo()
          .audioFilters('pan=stereo|c0=c0-c1|c1=c1-c0') // Basic vocal reduction
          .format('mp3')
          .on('end', resolve)
          .on('error', reject)
          .save(instrumentalPath);
      });

      const instrumentalBuffer = fs.readFileSync(instrumentalPath);
      const instrumentalBase64 = instrumentalBuffer.toString('base64');

      // Cleanup
      if (fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
      if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);

      res.json({ instrumental: instrumentalBase64 });
    } catch (error: any) {
      console.error('Vocal Separation Error:', error);
      res.status(500).json({ error: error.message || 'Failed to separate vocals' });
    } finally {
      // Cleanup
      if (instrumentalPath && fs.existsSync(instrumentalPath)) fs.unlinkSync(instrumentalPath);
      if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
    }
  });

  app.post('/api/process-audio-pipeline', upload.single('video'), async (req, res) => {
    const videoPath = req.file?.path;
    const timestamp = Date.now();
    const audioPath = path.join(process.cwd(), `temp_audio_${timestamp}.wav`);

    try {
      if (!videoPath) return res.status(400).json({ error: 'No video file provided' });

      // 1. Extract Audio (16kHz, Mono, WAV)
      await new Promise((resolve, reject) => {
        ffmpeg(videoPath)
          .noVideo()
          .audioFrequency(16000)
          .audioChannels(1)
          .format('wav')
          .on('end', resolve)
          .on('error', reject)
          .save(audioPath);
      });

      const audioFile = fs.readFileSync(audioPath);
      const audioBase64 = audioFile.toString('base64');

      // Cleanup
      if (fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
      if (fs.existsSync(videoPath)) fs.unlinkSync(videoPath);

      res.json({ audioBase64 });
    } catch (error: any) {
      console.error('Audio Extraction Error:', error);
      res.status(500).json({ error: error.message || 'Failed to extract audio' });
    } finally {
      // Cleanup
      if (audioPath && fs.existsSync(audioPath)) fs.unlinkSync(audioPath);
      if (videoPath && fs.existsSync(videoPath)) fs.unlinkSync(videoPath);
    }
  });

  app.post('/api/generate-subtitles', (req, res, next) => {
    if ((req.headers['content-type'] || '').includes('multipart/form-data')) {
      return subtitleUpload(req, res, next);
    }

    next();
  }, async (req, res) => {
    const videoPath = req.file?.path;
    const requestId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
    const startedAt = Date.now();
    const requestContext = {
      requestId,
      method: req.method,
      path: req.originalUrl,
      fileName: req.file?.originalname,
      fileSize: req.file?.size,
      contentType: req.headers['content-type'],
      contentLength: req.headers['content-length'],
      host: req.headers.host,
    };

    try {
      logEvent({
        level: 'info',
        message: '[subtitle] request received',
        context: requestContext,
        details: {
          headers: summarizeRequestHeaders(req.headers),
          bodyKeys: Object.keys(req.body || {}).slice(0, 20),
          hasUploadFile: Boolean(req.file),
        },
      });

      const { provider, targetLanguage, ttsLanguage, fileId } = parseSubtitleRequest(req.body);
      if (!videoPath && !fileId) {
        logEvent({
          level: 'warn',
          message: '[subtitle] request rejected: missing video source',
          context: {
            ...requestContext,
            durationMs: Date.now() - startedAt,
          },
          details: {
            fileId,
            parsedProvider: provider,
            targetLanguage,
            ttsLanguage,
          },
        });
        return res.status(400).json({ error: 'No video file provided' });
      }

      pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS);
      const subtitleJob = createSubtitleJob(subtitleJobStore, {
        requestId,
        provider,
        targetLanguage,
        ttsLanguage,
        fileId,
        filePath: videoPath,
      });

      updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
        status: 'running',
        stage: videoPath ? 'upload_received' : 'queued',
        progress: videoPath ? 15 : 5,
        message: videoPath ? 'Upload received' : 'Queued',
      });

      logEvent({
        level: 'info',
        message: `[subtitle] job accepted ${formatLogContext({
          jobId: subtitleJob.id,
          requestId,
          provider,
          targetLanguage,
          ttsLanguage,
          fileName: req.file?.originalname,
          fileSize: req.file?.size,
          fileId,
        })}`,
        context: {
          jobId: subtitleJob.id,
          requestId,
          provider,
          targetLanguage,
          ttsLanguage,
          fileName: req.file?.originalname,
          fileSize: req.file?.size,
          fileId,
        },
      });

      res.status(202).json(toSubtitleJobResponse(getSubtitleJob(subtitleJobStore, subtitleJob.id)!));

      void (async () => {
        try {
          const result = await generateSubtitlePipeline({
            videoPath,
            fileId,
            provider,
            targetLanguage,
            ttsLanguage,
            env: process.env,
            requestId,
            onProgress: (progress) => {
              updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
                status: progress.status,
                stage: progress.stage,
                progress: progress.progress,
                message: progress.message,
              });
            },
          });

          updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
            status: 'succeeded',
            stage: 'succeeded',
            progress: 100,
            message: 'Subtitle generation completed',
            result: {
              ...result,
              provider,
              requestId,
            },
          });

          logEvent({
            level: 'info',
            message: `[subtitle] background job succeeded ${formatLogContext({
              jobId: subtitleJob.id,
              requestId,
              provider,
              targetLanguage,
              durationMs: Date.now() - startedAt,
              subtitleCount: result.subtitles.length,
            })}`,
            context: {
              jobId: subtitleJob.id,
              requestId,
              provider,
              targetLanguage,
              durationMs: Date.now() - startedAt,
              subtitleCount: result.subtitles.length,
              quality: result.quality,
              alignmentEngine: result.alignmentEngine,
            },
          });
        } catch (error: any) {
          const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
          const status = deriveSubtitleErrorStatus(message);

          updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
            status: 'failed',
            stage: 'failed',
            message,
            error: message,
          });

          logEvent({
            level: 'error',
            message: `[subtitle] background job failed ${formatLogContext({
              jobId: subtitleJob.id,
              requestId,
              durationMs: Date.now() - startedAt,
              fileName: req.file?.originalname,
              fileSize: req.file?.size,
              status,
            })}`,
            context: {
              ...requestContext,
              jobId: subtitleJob.id,
              durationMs: Date.now() - startedAt,
              status,
            },
            details: {
              error: serializeError(error),
              headers: summarizeRequestHeaders(req.headers),
              bodyKeys: Object.keys(req.body || {}).slice(0, 20),
            },
          });
        } finally {
          if (videoPath && fs.existsSync(videoPath)) {
            fs.unlinkSync(videoPath);
            updateSubtitleJob(subtitleJobStore, subtitleJob.id, {
              filePath: undefined,
            });
            logEvent({
              level: 'info',
              message: '[subtitle] uploaded temp file cleaned',
              context: {
                jobId: subtitleJob.id,
                requestId,
                videoPath,
                durationMs: Date.now() - startedAt,
              },
            });
          }
        }
      })();
    } catch (error: any) {
      const message = error instanceof Error ? error.message : 'Failed to generate subtitles';
      const status = deriveSubtitleErrorStatus(message);

      logEvent({
        level: 'error',
        message: `[subtitle] request failed ${formatLogContext({
          requestId,
          durationMs: Date.now() - startedAt,
          fileName: req.file?.originalname,
          fileSize: req.file?.size,
          status,
        })}`,
        context: {
          ...requestContext,
          durationMs: Date.now() - startedAt,
          status,
        },
        details: {
          error: serializeError(error),
          headers: summarizeRequestHeaders(req.headers),
          bodyKeys: Object.keys(req.body || {}).slice(0, 20),
        },
      });
      res.status(status).json({ error: message, requestId });
    }
  });

  app.get('/api/generate-subtitles/:jobId', (req, res) => {
    pruneExpiredSubtitleJobs(subtitleJobStore, SUBTITLE_JOB_TTL_MS);
    const subtitleJob = getSubtitleJob(subtitleJobStore, req.params.jobId);

    if (!subtitleJob) {
      return res.status(404).json({ error: 'Subtitle job not found.' });
    }

    return res.json(toSubtitleJobResponse(subtitleJob));
  });

  app.post('/api/export-video', upload.single('video'), async (req, res) => {
    const tempFiles: string[] = [];
    try {
      const { subtitles: subtitlesStr, bgmBase64, trimRange: trimRangeStr, textStyles: textStylesStr } = req.body;
      const videoFile = req.file;
      if (!videoFile) return res.status(400).json({ error: 'No video file provided' });

      const subtitles = subtitlesStr ? JSON.parse(subtitlesStr) : [];
      const trimRange = trimRangeStr ? JSON.parse(trimRangeStr) : null;
      const textStyles: TextStyles = textStylesStr
        ? { ...DEFAULT_EXPORT_TEXT_STYLES, ...JSON.parse(textStylesStr) }
        : DEFAULT_EXPORT_TEXT_STYLES;

      const timestamp = Date.now();
      const inputPath = videoFile.path;
      const outputPath = path.join(process.cwd(), `output_${timestamp}.mp4`);
      const subtitlePath = path.join(process.cwd(), `subs_${timestamp}.ass`);

      tempFiles.push(subtitlePath, outputPath, inputPath);

      // 2. Prepare Audio Filters
      const probeData: any = await new Promise((resolve, reject) => {
        ffmpeg.ffprobe(inputPath, (err, metadata) => {
          if (err) reject(err);
          else resolve(metadata);
        });
      });
      const hasAudio = probeData.streams.some((s: any) => s.codec_type === 'audio');
      const videoStream = probeData.streams.find((s: any) => s.codec_type === 'video');
      const videoWidth = videoStream?.width || 1080;
      const videoHeight = videoStream?.height || 1920;
      const exportSubtitles = shiftSubtitlesToExportTimeline(subtitles || [], trimRange);

      const hasSubtitles = exportSubtitles.length > 0;
      if (hasSubtitles) {
        const assContent = buildAssSubtitleContent({
          subtitles: exportSubtitles,
          textStyles,
          videoWidth,
          videoHeight,
        });
        fs.writeFileSync(subtitlePath, assContent);
      }

      let command = ffmpeg(inputPath);
      const filterComplexParts: string[] = [];
      const audioMixInputs: string[] = [];
      let inputIndex = 1;
      const audioPlan = buildExportAudioPlan({
        hasSourceAudio: hasAudio,
        hasBgm: Boolean(bgmBase64),
        subtitles: exportSubtitles,
      });

      if (bgmBase64) {
        const bgmPath = path.join(process.cwd(), `bgm_${timestamp}.mp3`);
        fs.writeFileSync(bgmPath, Buffer.from(bgmBase64, 'base64'));
        command = command.input(bgmPath);
        tempFiles.push(bgmPath);
        filterComplexParts.push(`[${inputIndex}:a]volume=${audioPlan.bgmVolume ?? 0.5}[bgm]`);
        audioMixInputs.push('[bgm]');
        inputIndex++;
      }

      if (audioPlan.includeSourceAudio) {
        filterComplexParts.push(`[0:a]volume=${audioPlan.sourceAudioVolume ?? 0.3}[sourcea]`);
        audioMixInputs.push('[sourcea]');
      }

      for (let i = 0; i < audioPlan.ttsTracks.length; i++) {
        const track = audioPlan.ttsTracks[i];
        if (track.audioUrl) {
          const base64Data = track.audioUrl.split(',')[1];
          const isWav = track.audioUrl.includes('audio/wav');
          const ext = isWav ? 'wav' : 'mp3';
          const ttsPath = path.join(process.cwd(), `tts_${timestamp}_${i}.${ext}`);
          fs.writeFileSync(ttsPath, Buffer.from(base64Data, 'base64'));
          command = command.input(ttsPath);
          tempFiles.push(ttsPath);

          filterComplexParts.push(
            `[${inputIndex}:a]volume=${track.volume},adelay=${track.delayMs}|${track.delayMs}[tts${i}]`,
          );
          audioMixInputs.push(`[tts${i}]`);
          inputIndex++;
        }
      }

      const escapedSubtitlePath = subtitlePath.replace(/\\/g, '/').replace(/:/g, '\\:');
      if (hasSubtitles) {
        filterComplexParts.push(`[0:v]subtitles='${escapedSubtitlePath}'[vout]`);
      }

      let audioMap: string | null = null;
      if (audioMixInputs.length > 1) {
        filterComplexParts.push(
          `${audioMixInputs.join('')}amix=inputs=${audioMixInputs.length}:duration=first:dropout_transition=2[aout]`,
        );
        audioMap = '[aout]';
      } else if (audioMixInputs.length === 1) {
        audioMap = audioMixInputs[0];
      }

      if (filterComplexParts.length > 0) {
        command = command.complexFilter(filterComplexParts);
      }

      const outputMaps = [`-map ${hasSubtitles ? '[vout]' : '0:v'}`];
      if (audioMap) {
        outputMaps.push(`-map ${audioMap}`);
      }
      command = command.outputOptions(outputMaps);

      if (trimRange) {
        command = command.outputOptions([
          `-ss ${trimRange.start}`,
          `-t ${trimRange.end - trimRange.start}`
        ]);
      }

      await new Promise((resolve, reject) => {
        command
          .output(outputPath)
          .on('end', resolve)
          .on('error', (err, stdout, stderr) => {
            console.error('FFmpeg export error:', err);
            console.error('FFmpeg stderr:', stderr);
            reject(new Error(`FFmpeg error: ${err.message}. Stderr: ${stderr}`));
          })
          .run();
      });

      if (!fs.existsSync(outputPath)) {
        throw new Error('FFmpeg finished but output file was not created');
      }

      const outputBuffer = fs.readFileSync(outputPath);
      console.log(`Exported video size: ${outputBuffer.length} bytes`);
      const outputBase64 = outputBuffer.toString('base64');
      const dataUrl = `data:video/mp4;base64,${outputBase64}`;

      res.json({ videoUrl: dataUrl });
    } catch (error: any) {
      console.error('Export Error:', error);
      res.status(500).json({ error: error.message || 'Failed to export video' });
    } finally {
      // Cleanup
      for (const file of tempFiles) {
        if (fs.existsSync(file)) {
          try {
            fs.unlinkSync(file);
          } catch (e) {
            console.error(`Failed to delete temp file ${file}:`, e);
          }
        }
      }
    }
  });

  if (process.env.NODE_ENV !== 'production') {
    const vite = await createViteServer({
      server: { middlewareMode: true },
      appType: 'spa',
    });
    app.use(vite.middlewares);
  } else {
    const distPath = path.join(process.cwd(), 'dist');
    app.use(express.static(distPath));
    app.get('*', (req, res) => {
      res.sendFile(path.join(distPath, 'index.html'));
    });
  }

  app.listen(PORT, '0.0.0.0', () => {
    console.log(`Server running on http://localhost:${PORT}`);
  });
}

startServer();