Content moderation with Claude Vision and Ittybit

View Markdown

User-uploaded video needs to be checked before it goes live. Ittybit extracts frames at regular intervals across the video, Claude Vision classifies each frame against your content policy, and the pipeline returns a single approve/flag decision with structured reasoning.

Extract frames with Ittybit

Create an image task for each sample point in the video. The start option tells Ittybit which second to extract. Kick off all the tasks in parallel.

const ITTYBIT_API_KEY = process.env.ITTYBIT_API_KEY;

async function extractFrames(videoUrl: string, intervalSeconds = 5, durationSeconds = 60) {
const timestamps: number[] = [];
for (let t = 0; t < durationSeconds; t += intervalSeconds) {
timestamps.push(t);
}

const tasks = await Promise.all(
timestamps.map(async (start) => {
const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
input: videoUrl,
kind: "image",
options: { start, width: 512, format: "jpeg" },
}),
});
return res.json();
})
);

return tasks;
}

import os
import requests
from concurrent.futures import ThreadPoolExecutor

ITTYBIT_API_KEY = os.environ["ITTYBIT_API_KEY"]

def extract_frames(video_url: str, interval: int = 5, duration: int = 60) -> list[dict]:
    timestamps = list(range(0, duration, interval))

    def create_task(start: int) -> dict:
        res = requests.post(
            "https://api.ittybit.com/jobs",
            headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
            json={
                "input": video_url,
                "kind": "image",
                "options": {"start": start, "width": 512, "format": "jpeg"},
            },
        )
        return res.json()

    with ThreadPoolExecutor() as pool:
        tasks = list(pool.map(create_task, timestamps))

    return tasks

Each job runs asynchronously. Poll GET /jobs/:id or use a webhook to know when the frame is ready. The completed job includes a URL to the extracted JPEG.

Classify frames with Claude Vision

Send each frame to Claude as an image URL. The system prompt defines your moderation policy and the expected JSON output format.

import Anthropic from "@anthropic-ai/sdk";

const anthropic = new Anthropic();

const MODERATION_POLICY = `You are a content moderator. Classify the image against this policy:

- violence: graphic violence, gore, or weapons used threateningly
- nudity: explicit nudity or sexual content
- hate: hate symbols, slurs, or extremist imagery
- self_harm: depictions of self-harm or suicide
- drugs: illegal drug use or paraphernalia

Respond with JSON only:
{
"flagged": boolean,
"categories": string[],
"severity": "none" | "low" | "medium" | "high",
"reasoning": string
}`;

async function classifyFrame(imageUrl: string) {
const response = await anthropic.messages.create({
model: "claude-sonnet-4-20250514",
max_tokens: 256,
messages: [
{
role: "user",
content: [
{
type: "image",
source: { type: "url", url: imageUrl },
},
{
type: "text",
text: "Classify this video frame against the content policy.",
},
],
},
],
system: MODERATION_POLICY,
});

const text = response.content[0].type === "text" ? response.content[0].text : "";
return JSON.parse(text);
}

import json
import anthropic

client = anthropic.Anthropic()

MODERATION_POLICY = """You are a content moderator. Classify the image against this policy:
- violence: graphic violence, gore, or weapons used threateningly
- nudity: explicit nudity or sexual content
- hate: hate symbols, slurs, or extremist imagery
- self_harm: depictions of self-harm or suicide
- drugs: illegal drug use or paraphernalia

Respond with JSON only:
{
  "flagged": boolean,
  "categories": string[],
  "severity": "none" | "low" | "medium" | "high",
  "reasoning": string
}"""

def classify_frame(image_url: str) -> dict:
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=256,
        system=MODERATION_POLICY,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {"type": "url", "url": image_url},
                    },
                    {
                        "type": "text",
                        "text": "Classify this video frame against the content policy.",
                    },
                ],
            },
        ],
    )

    text = response.content[0].text
    return json.loads(text)

Claude returns structured JSON for each frame:

{
  "flagged": true,
  "categories": ["violence"],
  "severity": "medium",
  "reasoning": "Frame shows a character holding a weapon in a threatening posture"
}

Aggregate verdicts

Collect all frame classifications and produce a single moderation decision. Flag the video if any frame exceeds your severity threshold.

interface FrameVerdict {
  flagged: boolean;
  categories: string[];
  severity: "none" | "low" | "medium" | "high";
  reasoning: string;
}

interface ModerationResult {
approved: boolean;
frames_checked: number;
frames_flagged: number;
categories: string[];
verdicts: Array<{
timestamp: number;
severity: string;
categories: string[];
reasoning: string;
}>;
}

function aggregateVerdicts(
verdicts: FrameVerdict[],
timestamps: number[],
threshold: "low" | "medium" | "high" = "medium"
): ModerationResult {
const severityRank = { none: 0, low: 1, medium: 2, high: 3 };
const thresholdRank = severityRank[threshold];

const flagged = verdicts
.map((v, i) => ({ ...v, timestamp: timestamps[i] }))
.filter((v) => v.flagged && severityRank[v.severity] >= thresholdRank);

const allCategories = [...new Set(flagged.flatMap((v) => v.categories))];

return {
approved: flagged.length === 0,
frames_checked: verdicts.length,
frames_flagged: flagged.length,
categories: allCategories,
verdicts: flagged.map((v) => ({
timestamp: v.timestamp,
severity: v.severity,
categories: v.categories,
reasoning: v.reasoning,
})),
};
}

def aggregate_verdicts(
    verdicts: list[dict],
    timestamps: list[int],
    threshold: str = "medium",
) -> dict:
    severity_rank = {"none": 0, "low": 1, "medium": 2, "high": 3}
    threshold_rank = severity_rank[threshold]

    flagged = [
        {**v, "timestamp": t}
        for v, t in zip(verdicts, timestamps)
        if v["flagged"] and severity_rank[v["severity"]] >= threshold_rank
    ]

    all_categories = list({cat for v in flagged for cat in v["categories"]})

    return {
        "approved": len(flagged) == 0,
        "frames_checked": len(verdicts),
        "frames_flagged": len(flagged),
        "categories": all_categories,
        "verdicts": [
            {
                "timestamp": v["timestamp"],
                "severity": v["severity"],
                "categories": v["categories"],
                "reasoning": v["reasoning"],
            }
            for v in flagged
        ],
    }

The final output looks like this:

{
  "approved": false,
  "frames_checked": 12,
  "frames_flagged": 2,
  "categories": ["violence"],
  "verdicts": [
    {
      "timestamp": 15,
      "severity": "medium",
      "categories": ["violence"],
      "reasoning": "Frame shows a character holding a weapon in a threatening posture"
    },
    {
      "timestamp": 20,
      "severity": "high",
      "categories": ["violence"],
      "reasoning": "Frame depicts graphic impact with visible injury"
    }
  ]
}

Full pipeline

Wire it all together: extract frames, wait for completion, classify, decide.

async function pollForResult(taskId: string): Promise<string> {
  while (true) {
    const res = await fetch(`https://api.ittybit.com/jobs/${taskId}`, {
      headers: { Authorization: `Bearer ${ITTYBIT_API_KEY}` },
    });
    const task = await res.json();
    if (task.status === "completed") return task.output.url;
    if (task.status === "error") throw new Error(`Task ${taskId} failed`);
    await new Promise((r) => setTimeout(r, 2000));
  }
}

async function moderateVideo(videoUrl: string, durationSeconds = 60) {
const interval = 5;
const timestamps: number[] = [];
for (let t = 0; t < durationSeconds; t += interval) {
timestamps.push(t);
}

// 1. Extract frames via Ittybit
const tasks = await extractFrames(videoUrl, interval, durationSeconds);

// 2. Wait for all frames to be ready
const frameUrls = await Promise.all(
tasks.map((task: any) => pollForResult(task.id))
);

// 3. Classify each frame with Claude Vision
const verdicts = await Promise.all(frameUrls.map(classifyFrame));

// 4. Aggregate verdicts into a single decision
return aggregateVerdicts(verdicts, timestamps);
}

// Run it
const result = await moderateVideo("https://example.com/uploads/user-video.mp4", 60);

if (result.approved) {
console.log("Video approved for publishing");
} else {
console.log(`Video flagged: ${result.categories.join(", ")}`);
console.log(`${result.frames_flagged} of ${result.frames_checked} frames violated policy`);
}

import time

def poll_for_result(task_id: str) -> str:
    while True:
        res = requests.get(
            f"https://api.ittybit.com/jobs/{task_id}",
            headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
        )
        task = res.json()
        if task["status"] == "completed":
            return task["output"]["url"]
        if task["status"] == "error":
            raise RuntimeError(f"Task {task_id} failed")
        time.sleep(2)

def moderate_video(video_url: str, duration: int = 60) -> dict:
    interval = 5
    timestamps = list(range(0, duration, interval))

    # 1. Extract frames via Ittybit
    tasks = extract_frames(video_url, interval, duration)

    # 2. Wait for all frames to be ready
    frame_urls = [poll_for_result(task["id"]) for task in tasks]

    # 3. Classify each frame with Claude Vision
    verdicts = [classify_frame(url) for url in frame_urls]

    # 4. Aggregate verdicts into a single decision
    return aggregate_verdicts(verdicts, timestamps)

# Run it
result = moderate_video("https://example.com/uploads/user-video.mp4", duration=60)

if result["approved"]:
    print("Video approved for publishing")
else:
    print(f"Video flagged: {', '.join(result['categories'])}")
    print(f"{result['frames_flagged']} of {result['frames_checked']} frames violated policy")

Tuning the pipeline

Parameter	Default	Effect
Frame interval	5s	Lower catches more but costs more Claude calls
Severity threshold	`medium`	Set to `low` for stricter moderation
Image width	512px	Higher resolution improves accuracy, increases latency
Model	`claude-sonnet-4-20250514`	Use Haiku for faster/cheaper screening at scale