# AI-picked thumbnails with Gemini and Ittybit

Use Gemini scene analysis to find the best moments, then Ittybit to extract optimized thumbnails

A good thumbnail is the difference between a click and a scroll. Instead of grabbing the first frame or a random midpoint, use Gemini to watch the video and identify the most visually compelling moments -- then extract and optimize frames at those exact timestamps with Ittybit.

## Define the scene analysis tool

Give Gemini a function declaration that returns an array of timestamps. Each entry represents a visually strong moment suitable for a thumbnail.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript

const genai = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);

const pickThumbnailsTool = {
name: "pick_thumbnails",
description:
"Select the best moments from a video for use as thumbnails. Return timestamps where the visual content is most compelling -- clear subjects, good lighting, expressive faces, or dramatic action.",
parameters: {
type: FunctionDeclarationSchemaType.OBJECT,
properties: {
moments: {
type: FunctionDeclarationSchemaType.ARRAY,
description: "Array of thumbnail candidates, ordered by visual quality",
items: {
type: FunctionDeclarationSchemaType.OBJECT,
properties: {
timestamp: {
type: FunctionDeclarationSchemaType.NUMBER,
description: "Time in seconds",
},
reason: {
type: FunctionDeclarationSchemaType.STRING,
description: "Why this moment works as a thumbnail",
},
},
required: ["timestamp", "reason"],
},
},
},
required: ["moments"],
},
};

````

```python

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

pick_thumbnails_tool = genai.protos.Tool(
    function_declarations=[
        genai.protos.FunctionDeclaration(
            name="pick_thumbnails",
            description=(
                "Select the best moments from a video for use as thumbnails. "
                "Return timestamps where the visual content is most compelling "
                "-- clear subjects, good lighting, expressive faces, or dramatic action."
            ),
            parameters=genai.protos.Schema(
                type=genai.protos.Type.OBJECT,
                properties={
                    "moments": genai.protos.Schema(
                        type=genai.protos.Type.ARRAY,
                        description="Array of thumbnail candidates, ordered by visual quality",
                        items=genai.protos.Schema(
                            type=genai.protos.Type.OBJECT,
                            properties={
                                "timestamp": genai.protos.Schema(
                                    type=genai.protos.Type.NUMBER,
                                    description="Time in seconds",
                                ),
                                "reason": genai.protos.Schema(
                                    type=genai.protos.Type.STRING,
                                    description="Why this moment works as a thumbnail",
                                ),
                            },
                            required=["timestamp", "reason"],
                        ),
                    ),
                },
                required=["moments"],
            ),
        )
    ]
)
````

</CodeGroup>

## Send the video to Gemini

Pass the video and ask Gemini to pick the best thumbnail moments. It watches the entire video and returns structured timestamps via the function call.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
const model = genai.getGenerativeModel({
  model: "gemini-2.0-flash",
  tools: [{ functionDeclarations: [pickThumbnailsTool] }],
});

const videoUrl = "https://example.com/videos/product-launch.mp4";

const result = await model.generateContent([
{
fileData: { mimeType: "video/mp4", fileUri: videoUrl },
},
{
text: `Watch this video and pick the 3-5 best moments for thumbnails.
Look for: clear subjects in focus, good lighting, expressive faces,
dramatic action, or visually striking compositions.
Avoid: blurry frames, transitions, dark scenes, or text-heavy slides.
Call the pick_thumbnails function with your selections.`,
},
]);

const call = result.response.functionCalls()?.[0];
if (!call || call.name !== "pick_thumbnails") {
throw new Error("Gemini did not return a pick_thumbnails function call");
}

const { moments } = call.args as {
moments: Array<{ timestamp: number; reason: string }>;
};

console.log(`Gemini picked ${moments.length} moments:`);
for (const m of moments) {
console.log(`  ${m.timestamp}s -- ${m.reason}`);
}
// e.g.
// 14.2s -- Speaker mid-gesture with product visible, well-lit
// 38.7s -- Close-up of product with clear detail
// 52.1s -- Audience reaction shot, high energy

````

```python
model = genai.GenerativeModel(
    model_name="gemini-2.0-flash",
    tools=[pick_thumbnails_tool],
)

video_url = "https://example.com/videos/product-launch.mp4"

response = model.generate_content([
    genai.protos.Part(
        file_data=genai.protos.FileData(
            mime_type="video/mp4", file_uri=video_url
        )
    ),
    (
        "Watch this video and pick the 3-5 best moments for thumbnails. "
        "Look for: clear subjects in focus, good lighting, expressive faces, "
        "dramatic action, or visually striking compositions. "
        "Avoid: blurry frames, transitions, dark scenes, or text-heavy slides. "
        "Call the pick_thumbnails function with your selections."
    ),
])

call = response.candidates[0].content.parts[0].function_call
if call.name != "pick_thumbnails":
    raise ValueError("Gemini did not return a pick_thumbnails function call")

moments = [dict(m) for m in call.args["moments"]]

print(f"Gemini picked {len(moments)} moments:")
for m in moments:
    print(f"  {m['timestamp']}s -- {m['reason']}")
# e.g.
#   14.2s -- Speaker mid-gesture with product visible, well-lit
#   38.7s -- Close-up of product with clear detail
#   52.1s -- Audience reaction shot, high energy
````

</CodeGroup>

## Extract thumbnails with Ittybit

For each timestamp Gemini picked, create an Ittybit image task. The `kind: "image"` task with a video input extracts a frame at the given `start` time and outputs an optimized image.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
async function extractThumbnail(
  videoUrl: string,
  timestamp: number,
  options: { width?: number; height?: number; format?: string } = {}
) {
  const task = {
    kind: "image",
    input: videoUrl,
    options: {
      start: timestamp,
      width: options.width ?? 1280,
      height: options.height ?? 720,
      format: options.format ?? "webp",
    },
  };

const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(task),
});
return res.json();
}

// Extract a thumbnail for each moment Gemini identified
const tasks = await Promise.all(
moments.map((m) => extractThumbnail(videoUrl, m.timestamp))
);

console.log(`Created ${tasks.length} thumbnail tasks`);
for (const task of tasks) {
console.log(`  ${task.id}: ${task.status}`);
}

````

```python

ITTYBIT_API_KEY = os.environ["ITTYBIT_API_KEY"]

def extract_thumbnail(
    video_url: str,
    timestamp: float,
    width: int = 1280,
    height: int = 720,
    format: str = "webp",
) -> dict:
    task = {
        "kind": "image",
        "input": video_url,
        "options": {
            "start": timestamp,
            "width": width,
            "height": height,
            "format": format,
        },
    }

    res = requests.post(
        "https://api.ittybit.com/jobs",
        headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
        json=task,
    )
    return res.json()

# Extract a thumbnail for each moment Gemini identified
tasks = [extract_thumbnail(video_url, m["timestamp"]) for m in moments]

print(f"Created {len(tasks)} thumbnail tasks")
for task in tasks:
    print(f"  {task['id']}: {task['status']}")
````

</CodeGroup>

## Poll for completion

Each task moves through `queued` -> `processing` -> `succeeded`. Poll until all thumbnails are ready.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
async function waitForTask(taskId: string): Promise<any> {
  while (true) {
    const res = await fetch(`https://api.ittybit.com/jobs/${taskId}`, {
      headers: {
        Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
      },
    });
    const task = await res.json();

    if (task.status === "succeeded") return task;
    if (task.status === "failed") throw new Error(`Task failed: ${task.error}`);

    await new Promise((r) => setTimeout(r, 2000));

}
}

const completed = await Promise.all(
tasks.map((task) => waitForTask(task.id))
);

for (const [i, task] of completed.entries()) {
console.log(`Thumbnail ${i + 1}: ${task.output_url}`);
console.log(`  ${moments[i].timestamp}s -- ${moments[i].reason}`);
}

````

```python

def wait_for_task(task_id: str) -> dict:
    while True:
        res = requests.get(
            f"https://api.ittybit.com/jobs/{task_id}",
            headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
        )
        task = res.json()

        if task["status"] == "succeeded":
            return task
        if task["status"] == "failed":
            raise Exception(f"Task failed: {task['error']}")

        time.sleep(2)

completed = [wait_for_task(task["id"]) for task in tasks]

for i, task in enumerate(completed):
    print(f"Thumbnail {i + 1}: {task['output_url']}")
    print(f"  {moments[i]['timestamp']}s -- {moments[i]['reason']}")
````

</CodeGroup>

## See also

- [Extract thumbnails from video](/guides/extract-thumbnails-from-video) -- manual thumbnail extraction without AI
- [Responsive images](/guides/generate-responsive-image-sizes) -- generate multiple sizes from each thumbnail
- [Video editing with Gemini](/guides/video-editing-with-gemini) -- use Gemini to trim and edit video