AI-powered video clipping with OpenAI
“Cliphound” (a repurposing tool for content creators) takes a long-form video, sends the transcript to GPT-4, and lets the model decide which segments make the best social clips. GPT-4 calls Ittybit’s task API via function calling to trim and transcode each clip automatically.
Define the tool
Give GPT-4 a create_video_clip function that maps directly to Ittybit’s POST /jobs endpoint:
const tools = [
{
type: "function" as const,
function: {
name: "create_video_clip",
description:
"Trim a segment from a video and transcode it for social media",
parameters: {
type: "object",
properties: {
start: {
type: "number",
description: "Start time in seconds",
},
end: {
type: "number",
description: "End time in seconds",
},
format: {
type: "string",
enum: ["mp4", "webm"],
description: "Output format",
},
width: {
type: "number",
description: "Output width in pixels",
},
height: {
type: "number",
description: "Output height in pixels",
},
},
required: ["start", "end"],
},
},
},
];tools = [
{
"type": "function",
"function": {
"name": "create_video_clip",
"description": "Trim a segment from a video and transcode it for social media",
"parameters": {
"type": "object",
"properties": {
"start": {
"type": "number",
"description": "Start time in seconds",
},
"end": {
"type": "number",
"description": "End time in seconds",
},
"format": {
"type": "string",
"enum": ["mp4", "webm"],
"description": "Output format",
},
"width": {
"type": "number",
"description": "Output width in pixels",
},
"height": {
"type": "number",
"description": "Output height in pixels",
},
},
"required": ["start", "end"],
},
},
}
]# The tool definition is part of the OpenAI chat completion request body.
# See the TypeScript or Python tabs for the full structure.
# The next sections show how to wire it up end-to-end. Ask GPT-4 to pick the clips
Send the video transcript (or a description) along with the tool definition. GPT-4 analyzes the content and decides where to cut.
import OpenAI from "openai";
const openai = new OpenAI();
const VIDEO_URL = "https://cliphound-app.com/uploads/podcast-ep42.mp4";
const transcript = `[0:00] Intro and sponsor read
[2:15] Guest arrives, small talk
[4:30] "The moment I knew the startup was going to fail was when..."
[7:45] Detailed breakdown of what went wrong with fundraising
[12:00] Lessons learned about co-founder dynamics
[15:30] Lightning round questions
[18:00] Outro`;
const response = await openai.chat.completions.create({
model: "gpt-4o",
tools,
messages: [
{
role: "system",
content: `You are a video editor. Given a transcript, identify the 2-3 most
engaging segments for short-form social clips (15-60 seconds each).
Call create_video_clip for each one. Target 1080x1920 vertical mp4.`,
},
{
role: "user",
content: `Here's the transcript for ${VIDEO_URL}:\n${transcript}`,
},
],
});
from openai import OpenAI
client = OpenAI()
VIDEO_URL = "https://cliphound-app.com/uploads/podcast-ep42.mp4"
transcript = """
[0:00] Intro and sponsor read
[2:15] Guest arrives, small talk
[4:30] "The moment I knew the startup was going to fail was when..."
[7:45] Detailed breakdown of what went wrong with fundraising
[12:00] Lessons learned about co-founder dynamics
[15:30] Lightning round questions
[18:00] Outro
"""
response = client.chat.completions.create(
model="gpt-4o",
tools=tools,
messages=[
{
"role": "system",
"content": (
"You are a video editor. Given a transcript, identify the 2-3 most "
"engaging segments for short-form social clips (15-60 seconds each). "
"Call create_video_clip for each one. Target 1080x1920 vertical mp4."
),
},
{
"role": "user",
"content": f"Here's the transcript for {VIDEO_URL}:\n{transcript}",
},
],
)curl https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"tools": [{
"type": "function",
"function": {
"name": "create_video_clip",
"description": "Trim a segment from a video and transcode it for social media",
"parameters": {
"type": "object",
"properties": {
"start": { "type": "number", "description": "Start time in seconds" },
"end": { "type": "number", "description": "End time in seconds" },
"format": { "type": "string", "enum": ["mp4", "webm"] },
"width": { "type": "number" },
"height": { "type": "number" }
},
"required": ["start", "end"]
}
}
}],
"messages": [
{
"role": "system",
"content": "You are a video editor. Identify the 2-3 most engaging segments for short-form social clips (15-60s each). Call create_video_clip for each. Target 1080x1920 vertical mp4."
},
{
"role": "user",
"content": "Here is the transcript:\n[0:00] Intro and sponsor read\n[2:15] Guest arrives\n[4:30] The moment I knew the startup was going to fail...\n[7:45] Fundraising breakdown\n[12:00] Co-founder dynamics\n[15:30] Lightning round\n[18:00] Outro"
}
]
}' GPT-4 will typically return multiple tool calls — one per clip it wants to create.
Handle the function calls
Loop through the tool calls and send each one to Ittybit as a task:
async function createIttybitTask(args: {
start: number;
end: number;
format?: string;
width?: number;
height?: number;
}) {
const task = {
kind: "video",
input: VIDEO_URL,
options: {
start: args.start,
end: args.end,
format: args.format ?? "mp4",
codec: "h264",
width: args.width ?? 1080,
height: args.height ?? 1920,
quality: "high",
},
};
const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(task),
});
return res.json();
}
// Process each tool call from GPT-4
const toolCalls = response.choices[0].message.tool_calls ?? [];
const tasks = await Promise.all(
toolCalls.map(async (call) => {
const args = JSON.parse(call.function.arguments);
return createIttybitTask(args);
})
);
console.log(`Created ${tasks.length} clip tasks`);
// [{ id: "task_abc123", status: "queued", ... }, ...]
import json
import os
import requests
ITTYBIT_API_KEY = os.environ["ITTYBIT_API_KEY"]
def create_ittybit_task(args: dict) -> dict:
task = {
"kind": "video",
"input": VIDEO_URL,
"options": {
"start": args["start"],
"end": args["end"],
"format": args.get("format", "mp4"),
"codec": "h264",
"width": args.get("width", 1080),
"height": args.get("height", 1920),
"quality": "high",
},
}
res = requests.post(
"https://api.ittybit.com/jobs",
headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
json=task,
)
return res.json()
# Process each tool call from GPT-4
tool_calls = response.choices[0].message.tool_calls or []
tasks = [
create_ittybit_task(json.loads(call.function.arguments))
for call in tool_calls
]
print(f"Created {len(tasks)} clip tasks")# For each clip GPT-4 suggests, POST to Ittybit.
# Example: GPT-4 picked the segment from 4:30 to 5:45 (270s-345s).
curl -X POST https://api.ittybit.com/jobs \
-H "Authorization: Bearer $ITTYBIT_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"kind": "video",
"input": "https://cliphound-app.com/uploads/podcast-ep42.mp4",
"options": {
"start": 270,
"end": 345,
"format": "mp4",
"codec": "h264",
"width": 1080,
"height": 1920,
"quality": "high"
}
}'
# Response:
# {
# "id": "task_abc123",
# "object": "task",
# "kind": "video",
# "status": "queued",
# "input": "https://cliphound-app.com/uploads/podcast-ep42.mp4",
# "options": { "start": 270, "end": 345, ... },
# "created_at": 1712000000000
# } Poll for completion
Each task moves through queued -> processing -> succeeded. Poll until all clips are ready:
async function waitForTask(taskId: string): Promise<any> {
while (true) {
const res = await fetch(`https://api.ittybit.com/jobs/${taskId}`, {
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
},
});
const task = await res.json();
if (task.status === "succeeded" || task.status === "failed") {
return task;
}
await new Promise((r) => setTimeout(r, 2000));
}
}
const completed = await Promise.all(
tasks.map((task) => waitForTask(task.id))
);
for (const task of completed) {
console.log(`${task.id}: ${task.status}`);
}
import time
def wait_for_task(task_id: str) -> dict:
while True:
res = requests.get(
f"https://api.ittybit.com/jobs/{task_id}",
headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
)
task = res.json()
if task["status"] in ("succeeded", "failed"):
return task
time.sleep(2)
completed = [wait_for_task(task["id"]) for task in tasks]
for task in completed:
print(f"{task['id']}: {task['status']}")# Poll a task until it completes
curl https://api.ittybit.com/jobs/task_abc123 \
-H "Authorization: Bearer $ITTYBIT_API_KEY"
# Repeat until status is "succeeded" or "failed" See also
- API
POST /jobswithkind: "video"— create video processing jobs - Trim video clips — manual trimming without AI
- Resize video for social platforms — platform-specific dimensions