# Media processing agent with Mistral

Use Mistral function calling to orchestrate video, image, and audio tasks via Ittybit

Large language models are good at understanding intent but can't process media on their own. By wiring Mistral's function calling to the Ittybit API, you can build an agent that accepts plain English requests like "transcode this to 720p" or "create an HLS stream" and dispatches the right media tasks automatically.

## Install dependencies

<CodeGroup labels={["TypeScript", "Python"]}>
```bash
npm install @mistralai/mistralai
```

```bash
pip install mistralai requests
```

</CodeGroup>

## Define the tool

The agent needs one tool: `process_media`. It accepts an input URL, a task kind, and processing options. Mistral will extract these from the user's natural language request.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
const tools = [
  {
    type: "function" as const,
    function: {
      name: "process_media",
      description:
        "Create a media processing task via Ittybit. Supports transcoding video, extracting audio, generating thumbnails, and creating adaptive streams.",
      parameters: {
        type: "object",
        properties: {
          input: {
            type: "string",
            description: "URL of the source media file",
          },
          kind: {
            type: "string",
            enum: ["video", "audio", "image", "adaptive_video"],
            description: "Type of processing task",
          },
          options: {
            type: "object",
            description:
              "Processing options like format, width, height, quality, start, end",
          },
        },
        required: ["input", "kind"],
      },
    },
  },
];
```

```python
tools = [
    {
        "type": "function",
        "function": {
            "name": "process_media",
            "description": (
                "Create a media processing task via Ittybit. "
                "Supports transcoding video, extracting audio, "
                "generating thumbnails, and creating adaptive streams."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "input": {
                        "type": "string",
                        "description": "URL of the source media file",
                    },
                    "kind": {
                        "type": "string",
                        "enum": ["video", "audio", "image", "adaptive_video"],
                        "description": "Type of processing task",
                    },
                    "options": {
                        "type": "object",
                        "description": (
                            "Processing options like format, width, "
                            "height, quality, start, end"
                        ),
                    },
                },
                "required": ["input", "kind"],
            },
        },
    }
]
```

</CodeGroup>

## Handle tool calls

Send the user message to Mistral with the tool definition. When the model decides to call `process_media`, parse the arguments and POST to the Ittybit Tasks API.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript

const mistral = new Mistral({
apiKey: process.env.MISTRAL_API_KEY,
});

async function processMedia(args: {
input: string;
kind: string;
options?: Record<string, unknown>;
}) {
const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
input: args.input,
kind: args.kind,
options: args.options ?? {},
}),
});
return await res.json();
}

async function runAgent(userMessage: string) {
const messages: any[] = [
{
role: "system",
content:
"You are a media processing assistant. Use the process_media tool to handle user requests for transcoding, thumbnails, format conversion, and streaming.",
},
{ role: "user", content: userMessage },
];

const response = await mistral.chat.complete({
model: "mistral-large-latest",
messages,
tools,
});

const choice = response.choices?.[0];
if (!choice) return;

// If the model wants to call a tool, execute it
if (choice.finishReason === "tool_calls" && choice.message.toolCalls) {
const results = [];

    for (const toolCall of choice.message.toolCalls) {
      if (toolCall.function.name === "process_media") {
        const args = JSON.parse(toolCall.function.arguments);
        const result = await processMedia(args);
        results.push(result);

        messages.push(choice.message);
        messages.push({
          role: "tool",
          name: "process_media",
          content: JSON.stringify(result),
          toolCallId: toolCall.id,
        });
      }
    }

    // Get the model's summary of what happened
    const followUp = await mistral.chat.complete({
      model: "mistral-large-latest",
      messages,
      tools,
    });

    return {
      reply: followUp.choices?.[0]?.message.content,
      tasks: results,
    };

}

return { reply: choice.message.content, tasks: [] };
}

````

```python

from mistralai import Mistral

client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])

def process_media(args: dict) -> dict:
    res = requests.post(
        "https://api.ittybit.com/jobs",
        headers={
            "Authorization": f"Bearer {os.environ['ITTYBIT_API_KEY']}",
        },
        json={
            "input": args["input"],
            "kind": args["kind"],
            "options": args.get("options", {}),
        },
    )
    return res.json()

def run_agent(user_message: str) -> dict:
    messages = [
        {
            "role": "system",
            "content": (
                "You are a media processing assistant. "
                "Use the process_media tool to handle user requests "
                "for transcoding, thumbnails, format conversion, "
                "and streaming."
            ),
        },
        {"role": "user", "content": user_message},
    ]

    response = client.chat.complete(
        model="mistral-large-latest",
        messages=messages,
        tools=tools,
    )

    choice = response.choices[0]

    if choice.finish_reason == "tool_calls" and choice.message.tool_calls:
        results = []

        for tool_call in choice.message.tool_calls:
            if tool_call.function.name == "process_media":
                args = json.loads(tool_call.function.arguments)
                result = process_media(args)
                results.append(result)

                messages.append(choice.message)
                messages.append(
                    {
                        "role": "tool",
                        "name": "process_media",
                        "content": json.dumps(result),
                        "tool_call_id": tool_call.id,
                    }
                )

        follow_up = client.chat.complete(
            model="mistral-large-latest",
            messages=messages,
            tools=tools,
        )

        return {
            "reply": follow_up.choices[0].message.content,
            "tasks": results,
        }

    return {"reply": choice.message.content, "tasks": []}
````

</CodeGroup>

## Try it

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
// Transcode to 720p
await runAgent(
  "Transcode https://example.com/raw.mov to 720p MP4"
);

// Generate a thumbnail
await runAgent(
"Grab a thumbnail at the 5 second mark from https://example.com/raw.mov"
);

// Create an HLS stream
await runAgent(
"Create an HLS stream from https://example.com/raw.mov"
);

// Multiple tasks from one prompt
await runAgent(
"Take https://example.com/raw.mov and make a 720p MP4, a WebP thumbnail, and an HLS stream"
);

````

```python
# Transcode to 720p
run_agent(
    "Transcode https://example.com/raw.mov to 720p MP4"
)

# Generate a thumbnail
run_agent(
    "Grab a thumbnail at the 5 second mark from https://example.com/raw.mov"
)

# Create an HLS stream
run_agent(
    "Create an HLS stream from https://example.com/raw.mov"
)

# Multiple tasks from one prompt
run_agent(
    "Take https://example.com/raw.mov and make a 720p MP4, "
    "a WebP thumbnail, and an HLS stream"
)
````

</CodeGroup>

For the last example, Mistral will issue multiple `process_media` calls in a single response -- one for each task the user asked for.

## Polling for results

Tasks run asynchronously. Poll the task endpoint to check status, or use [webhooks](/docs/webhooks) for push notifications.

<CodeGroup labels={["TypeScript", "Python"]}>
```typescript
async function waitForTask(taskId: string): Promise<any> {
  while (true) {
    const res = await fetch(
      `https://api.ittybit.com/jobs/${taskId}`,
      {
        headers: {
          Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
        },
      }
    );
    const data = await res.json();

    if (data.status === "completed" || data.status === "error") {
      return data;
    }

    await new Promise((r) => setTimeout(r, 2000));

}
}

````

```python

def wait_for_task(task_id: str) -> dict:
    while True:
        res = requests.get(
            f"https://api.ittybit.com/jobs/{task_id}",
            headers={
                "Authorization": f"Bearer {os.environ['ITTYBIT_API_KEY']}",
            },
        )
        data = res.json()

        if data["status"] in ("completed", "error"):
            return data

        time.sleep(2)
````

</CodeGroup>

## See also

- [Create HLS streams](/guides/create-hls-streams)
- [Build a user upload pipeline](/guides/build-a-user-upload-pipeline)
- [Generate thumbnails](/guides/extract-thumbnails-from-video)
- [Mistral function calling docs](https://docs.mistral.ai/capabilities/function_calling/)