Subtitle pipeline with Trigger.dev and Ittybit

View Markdown

When a video lands in your system, you often need subtitles — for accessibility, SEO, or because most social feeds autoplay on mute. This guide builds a four-step pipeline with Trigger.dev and Ittybit: extract the audio track, transcribe it, generate a WebVTT file, and store the subtitles alongside the video. Each step is independently retryable, so a transient Whisper timeout won’t re-extract audio that already succeeded.

Install dependencies

npm install @trigger.dev/sdk openai

Set your environment variables:

TRIGGER_SECRET_KEY=tr_dev_...
ITTYBIT_API_KEY=your_ittybit_api_key
OPENAI_API_KEY=sk-...

Ittybit helper

A thin wrapper around the Ittybit Task API. Used by the pipeline steps to create tasks and check status.

const ITTYBIT_API = 'https://api.ittybit.com';

async function createIttybitTask(body: {
  input: string;
  kind: string;
  options?: Record<string, unknown>;
}) {
  const res = await fetch(`${ITTYBIT_API}/tasks`, {
    method: 'POST',
    headers: {
      Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify(body),
  });

  if (!res.ok) {
    throw new Error(`Ittybit API error: ${res.status}`);
  }

  return res.json() as Promise<{
    id: string;
    status: string;
    output_url?: string;
  }>;
}

async function pollForCompletion(taskId: string) {
  for (let i = 0; i < 120; i++) {
    const res = await fetch(`${ITTYBIT_API}/tasks/${taskId}`, {
      headers: {
        Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
      },
    });

    const task = (await res.json()) as {
      id: string;
      status: string;
      output_url?: string;
    };

    if (task.status === 'succeeded') return task;
    if (task.status === 'failed') throw new Error(`Task ${taskId} failed`);

    await new Promise((r) => setTimeout(r, 3000));
  }

  throw new Error(`Task ${taskId} timed out`);
}

Define the subtitle pipeline

Each step.run is a durable checkpoint. If step 2 fails, Trigger.dev retries it without re-running step 1. The pipeline flows: extract audio, transcribe, build WebVTT, store the result.

import { task } from '@trigger.dev/sdk/v3';
import OpenAI from 'openai';

const openai = new OpenAI();

export const generateSubtitles = task({
  id: 'generate-subtitles',
  retry: { maxAttempts: 3 },
  run: async (
    payload: {
      videoUrl: string;
      videoId: string;
    },
    { ctx },
  ) => {
    const { videoUrl, videoId } = payload;

    // Step 1: Extract audio from video
    const audio = await ctx.run('extract-audio', async () => {
      const task = await createIttybitTask({
        input: videoUrl,
        kind: 'audio',
        options: { format: 'mp3' },
      });
      return pollForCompletion(task.id);
    });

    // Step 2: Transcribe the audio
    const segments = await ctx.run('transcribe', async () => {
      const audioFile = await fetch(audio.output_url!);
      const blob = new Blob([await audioFile.arrayBuffer()], {
        type: 'audio/mp3',
      });
      const file = new File([blob], 'audio.mp3', { type: 'audio/mp3' });

      const transcription = await openai.audio.transcriptions.create({
        model: 'whisper-1',
        file,
        response_format: 'verbose_json',
        timestamp_granularities: ['segment'],
      });

      return transcription.segments!.map((seg) => ({
        start: seg.start,
        end: seg.end,
        text: seg.text.trim(),
      }));
    });

    // Step 3: Generate WebVTT
    const webvtt = await ctx.run('generate-webvtt', async () => {
      let vtt = 'WEBVTT\n\n';

      for (let i = 0; i < segments.length; i++) {
        const s = segments[i];
        vtt += `${i + 1}\n`;
        vtt += `${formatTimestamp(s.start)} --> ${formatTimestamp(s.end)}\n`;
        vtt += `${s.text}\n\n`;
      }

      return vtt;
    });

    // Step 4: Store subtitles
    const result = await ctx.run('store-subtitles', async () => {
      await db.video.update({
        where: { id: videoId },
        data: {
          subtitles_vtt: webvtt,
          subtitle_status: 'ready',
        },
      });

      return { videoId, segmentCount: segments.length };
    });

    return result;
  },
});

function formatTimestamp(seconds: number): string {
  const h = Math.floor(seconds / 3600);
  const m = Math.floor((seconds % 3600) / 60);
  const s = seconds % 60;
  return (
    String(h).padStart(2, '0') +
    ':' +
    String(m).padStart(2, '0') +
    ':' +
    s.toFixed(3).padStart(6, '0')
  );
}

Trigger from an Ittybit webhook

When Ittybit finishes ingesting a video, it can POST a webhook to your server. Use that event to kick off the subtitle pipeline automatically.

import { tasks } from "@trigger.dev/sdk/v3";
import type { generateSubtitles } from "./trigger/generate-subtitles";

// app/api/webhooks/ittybit/route.ts
export async function POST(req: Request) {
const event = await req.json();

// Only process newly completed video uploads
if (event.kind !== "video" || event.status !== "succeeded") {
return new Response("ignored", { status: 200 });
}

const handle = await tasks.trigger<typeof generateSubtitles>(
"generate-subtitles",
{
videoUrl: event.output_url,
videoId: event.id,
},
);

return Response.json({ runId: handle.id });
}

import { tasks } from "@trigger.dev/sdk/v3";
import type { generateSubtitles } from "./trigger/generate-subtitles";

app.post("/webhooks/ittybit", async (req, res) => {
  const event = req.body;

  if (event.kind !== "video" || event.status !== "succeeded") {
    return res.sendStatus(200);
  }

  const handle = await tasks.trigger<typeof generateSubtitles>(
    "generate-subtitles",
    {
      videoUrl: event.output_url,
      videoId: event.id,
    },
  );

  res.json({ runId: handle.id });
});

Use wait tokens instead of polling

Polling works but burns compute while the audio extraction runs. For production, use Trigger.dev’s wait.for to pause the run until Ittybit calls back.

import { task, wait } from '@trigger.dev/sdk/v3';
import OpenAI from 'openai';

const openai = new OpenAI();

export const generateSubtitlesWithWait = task({
  id: 'generate-subtitles-wait',
  retry: { maxAttempts: 3 },
  run: async (payload: { videoUrl: string; videoId: string }) => {
    const { videoUrl, videoId } = payload;

    // Step 1: Create audio extraction task
    const audioTask = await createIttybitTask({
      input: videoUrl,
      kind: 'audio',
      options: { format: 'mp3' },
    });

    // Pause until Ittybit webhook resolves the wait token
    const audioResult = await wait.for<{ output_url: string }>({
      id: `audio-${audioTask.id}`,
      timeout: '15m',
    });

    // Step 2: Transcribe
    const audioFile = await fetch(audioResult.output_url);
    const blob = new Blob([await audioFile.arrayBuffer()], {
      type: 'audio/mp3',
    });
    const file = new File([blob], 'audio.mp3', { type: 'audio/mp3' });

    const transcription = await openai.audio.transcriptions.create({
      model: 'whisper-1',
      file,
      response_format: 'verbose_json',
      timestamp_granularities: ['segment'],
    });

    // Step 3: Build WebVTT
    let vtt = 'WEBVTT\n\n';
    for (let i = 0; i < transcription.segments!.length; i++) {
      const seg = transcription.segments![i];
      vtt += `${i + 1}\n`;
      vtt += `${formatTimestamp(seg.start)} --> ${formatTimestamp(seg.end)}\n`;
      vtt += `${seg.text.trim()}\n\n`;
    }

    // Step 4: Store
    await db.video.update({
      where: { id: videoId },
      data: { subtitles_vtt: vtt, subtitle_status: 'ready' },
    });

    return { videoId, segmentCount: transcription.segments!.length };
  },
});

Then resolve the wait token when the Ittybit webhook fires: