Video editing with Gemini and Ittybit

View Markdown

Users say “cut the intro, keep only the demo, export as 720p.” Gemini analyzes the video and figures out the trim points. Ittybit executes the edit. No timeline UI, no manual scrubbing — just natural language in, finished clip out.

Define the editing tool

Give Gemini a function declaration that describes what edits are possible. It will call this function with the right parameters based on the user’s instructions.

import { GoogleGenerativeAI, FunctionDeclarationSchemaType } from "@google/generative-ai";

const genai = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);

const editVideoTool = {
name: "edit_video",
description:
"Edit a video by trimming to a specific segment and optionally changing resolution or format.",
parameters: {
type: FunctionDeclarationSchemaType.OBJECT,
properties: {
start: {
type: FunctionDeclarationSchemaType.NUMBER,
description: "Start time in seconds",
},
end: {
type: FunctionDeclarationSchemaType.NUMBER,
description: "End time in seconds",
},
width: {
type: FunctionDeclarationSchemaType.NUMBER,
description: "Output width in pixels (e.g. 1280 for 720p)",
},
height: {
type: FunctionDeclarationSchemaType.NUMBER,
description: "Output height in pixels",
},
format: {
type: FunctionDeclarationSchemaType.STRING,
description: "Output format (mp4, webm)",
},
codec: {
type: FunctionDeclarationSchemaType.STRING,
description: "Video codec (h264, h265, vp9, av1)",
},
quality: {
type: FunctionDeclarationSchemaType.STRING,
description: "Quality preset (low, medium, high)",
},
},
required: ["start", "end"],
},
};
import google.generativeai as genai

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

edit_video_tool = genai.protos.Tool(
    function_declarations=[
        genai.protos.FunctionDeclaration(
            name="edit_video",
            description="Edit a video by trimming to a specific segment and optionally changing resolution or format.",
            parameters=genai.protos.Schema(
                type=genai.protos.Type.OBJECT,
                properties={
                    "start": genai.protos.Schema(
                        type=genai.protos.Type.NUMBER,
                        description="Start time in seconds",
                    ),
                    "end": genai.protos.Schema(
                        type=genai.protos.Type.NUMBER,
                        description="End time in seconds",
                    ),
                    "width": genai.protos.Schema(
                        type=genai.protos.Type.NUMBER,
                        description="Output width in pixels (e.g. 1280 for 720p)",
                    ),
                    "height": genai.protos.Schema(
                        type=genai.protos.Type.NUMBER,
                        description="Output height in pixels",
                    ),
                    "format": genai.protos.Schema(
                        type=genai.protos.Type.STRING,
                        description="Output format (mp4, webm)",
                    ),
                    "codec": genai.protos.Schema(
                        type=genai.protos.Type.STRING,
                        description="Video codec (h264, h265, vp9, av1)",
                    ),
                    "quality": genai.protos.Schema(
                        type=genai.protos.Type.STRING,
                        description="Quality preset (low, medium, high)",
                    ),
                },
                required=["start", "end"],
            ),
        )
    ]
)

Send the video and instruction to Gemini

Pass the video URL and the user’s plain-English editing request. Gemini watches the video and returns a function call with precise timestamps.

const model = genai.getGenerativeModel({
  model: "gemini-2.0-flash",
  tools: [{ functionDeclarations: [editVideoTool] }],
});

const videoUrl = "https://example.com/videos/product-demo.mp4";
const instruction = "Cut the intro, keep only the demo section, export as 720p";

const result = await model.generateContent([
{
fileData: { mimeType: "video/mp4", fileUri: videoUrl },
},
{
text: `You are a video editor. Watch this video and determine the exact trim points for the following edit: "${instruction}". Call the edit_video function with the appropriate parameters.`,
},
]);

const call = result.response.functionCalls()?.[0];
if (!call || call.name !== "edit_video") {
throw new Error("Gemini did not return an edit_video function call");
}

const editParams = call.args;
console.log("Gemini says:", editParams);
// e.g. { start: 34, end: 187, width: 1280, format: "mp4" }
import os

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash",
    tools=[edit_video_tool],
)

video_url = "https://example.com/videos/product-demo.mp4"
instruction = "Cut the intro, keep only the demo section, export as 720p"

response = model.generate_content([
    genai.protos.Part(
        file_data=genai.protos.FileData(
            mime_type="video/mp4", file_uri=video_url
        )
    ),
    f'You are a video editor. Watch this video and determine the exact trim points for the following edit: "{instruction}". Call the edit_video function with the appropriate parameters.',
])

call = response.candidates[0].content.parts[0].function_call
if call.name != "edit_video":
    raise ValueError("Gemini did not return an edit_video function call")

edit_params = dict(call.args)
print("Gemini says:", edit_params)
# e.g. {"start": 34, "end": 187, "width": 1280, "format": "mp4"}

Execute the edit with Ittybit

Take Gemini’s parameters and dispatch them as an Ittybit task. The video processing runs asynchronously — poll the task or use a webhook to get notified when it’s done.

const task = {
  input: videoUrl,
  kind: "video",
  options: {
    start: editParams.start,
    end: editParams.end,
    ...(editParams.width && { width: editParams.width }),
    ...(editParams.height && { height: editParams.height }),
    ...(editParams.format && { format: editParams.format }),
    ...(editParams.codec && { codec: editParams.codec }),
    ...(editParams.quality && { quality: editParams.quality }),
  },
};

const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(task),
});
const data = await res.json();

console.log("Task ID:", data.id);
console.log("Status:", data.status); // "queued" -> "processing" -> "succeeded"
import requests

task = {
    "input": video_url,
    "kind": "video",
    "options": {
        "start": edit_params["start"],
        "end": edit_params["end"],
    },
}

for key in ("width", "height", "format", "codec", "quality"):
    if key in edit_params:
        task["options"][key] = edit_params[key]

res = requests.post(
    "https://api.ittybit.com/jobs",
    headers={"Authorization": f"Bearer {os.environ['ITTYBIT_API_KEY']}"},
    json=task,
)
data = res.json()

print("Task ID:", data["id"])
print("Status:", data["status"])  # "queued" -> "processing" -> "succeeded"

Poll for completion

async function waitForTask(taskId: string): Promise<any> {
  while (true) {
    const res = await fetch(`https://api.ittybit.com/jobs/${taskId}`, {
      headers: {
        Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
      },
    });
    const task = await res.json();

    if (task.status === "succeeded") return task;
    if (task.status === "failed") throw new Error(`Task failed: ${task.error}`);

    await new Promise((r) => setTimeout(r, 2000));

}
}

const completed = await waitForTask(data.id);
console.log("Output URL:", completed.output_url);
import time

def wait_for_task(task_id: str) -> dict:
    while True:
        res = requests.get(
            f"https://api.ittybit.com/jobs/{task_id}",
            headers={"Authorization": f"Bearer {os.environ['ITTYBIT_API_KEY']}"},
        )
        task = res.json()

        if task["status"] == "succeeded":
            return task
        if task["status"] == "failed":
            raise Exception(f"Task failed: {task['error']}")

        time.sleep(2)

completed = wait_for_task(data["id"])
print("Output URL:", completed["output_url"])

See also