Podcast to blog post with OpenAI and Ittybit

View Markdown

“Podscribe” (a podcast publishing tool) lets creators upload an episode and get a polished blog post back. Ittybit normalizes the audio to a clean MP3, Whisper transcribes it, and GPT-4 turns the transcript into a blog post with show notes.

Extract and normalize the audio

Podcast uploads come in all shapes — WAV from a DAW, M4A from a phone, video files from Riverside. Normalize everything to a consistent MP3 before sending to Whisper.

const EPISODE_URL = "https://podscribe-app.com/uploads/ep-47-raw.wav";

const res = await fetch("https://api.ittybit.com/jobs", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
kind: "audio",
input: EPISODE_URL,
options: {
format: "mp3",
quality: "high",
},
}),
});
const task = await res.json();

console.log(task.id); // "task_abc123"
console.log(task.status); // "queued"

import os
import requests

EPISODE_URL = "https://podscribe-app.com/uploads/ep-47-raw.wav"
ITTYBIT_API_KEY = os.environ["ITTYBIT_API_KEY"]

res = requests.post(
    "https://api.ittybit.com/jobs",
    headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
    json={
        "kind": "audio",
        "input": EPISODE_URL,
        "options": {
            "format": "mp3",
            "quality": "high",
        },
    },
)
task = res.json()

print(task["id"])     # "task_abc123"
print(task["status"]) # "queued"

Poll for the processed audio

The task moves through queued -> processing -> succeeded. Once it completes, the result contains the URL of the normalized MP3.

async function waitForTask(taskId: string): Promise<any> {
  while (true) {
    const res = await fetch(`https://api.ittybit.com/jobs/${taskId}`, {
      headers: {
        Authorization: `Bearer ${process.env.ITTYBIT_API_KEY}`,
      },
    });
    const task = await res.json();

    if (task.status === "succeeded" || task.status === "failed") {
      return task;
    }

    await new Promise((r) => setTimeout(r, 2000));

}
}

const completed = await waitForTask(task.id);
const audioUrl = completed.output.url;

console.log(audioUrl);
// "https://cdn.ittybit.com/.../ep-47.mp3"

import time


def wait_for_task(task_id: str) -> dict:
    while True:
        res = requests.get(
            f"https://api.ittybit.com/jobs/{task_id}",
            headers={"Authorization": f"Bearer {ITTYBIT_API_KEY}"},
        )
        task = res.json()

        if task["status"] in ("succeeded", "failed"):
            return task

        time.sleep(2)


completed = wait_for_task(task["id"])
audio_url = completed["output"]["url"]

print(audio_url)
# "https://cdn.ittybit.com/.../ep-47.mp3"

Transcribe with Whisper

Download the normalized MP3 and send it to OpenAI’s Whisper API. The clean, consistent format means fewer transcription errors.

import OpenAI from "openai";
import fs from "node:fs";

const openai = new OpenAI();

// Download the processed audio
const audio = await fetch(audioUrl);
const buffer = Buffer.from(await audio.arrayBuffer());
fs.writeFileSync("/tmp/episode.mp3", buffer);

const transcription = await openai.audio.transcriptions.create({
model: "whisper-1",
file: fs.createReadStream("/tmp/episode.mp3"),
response_format: "verbose_json",
});

console.log(`Transcribed ${transcription.segments?.length} segments`);
console.log(transcription.text.slice(0, 200));

from openai import OpenAI

client = OpenAI()

# Download the processed audio
audio = requests.get(audio_url)
with open("/tmp/episode.mp3", "wb") as f:
    f.write(audio.content)

with open("/tmp/episode.mp3", "rb") as f:
    transcription = client.audio.transcriptions.create(
        model="whisper-1",
        file=f,
        response_format="verbose_json",
    )

print(f"Transcribed {len(transcription.segments)} segments")
print(transcription.text[:200])

Generate the blog post with GPT-4

Pass the full transcript to GPT-4 with instructions to produce a blog post and structured show notes.

const response = await openai.chat.completions.create({
  model: "gpt-4o",
  messages: [
    {
      role: "system",
      content: `You are a podcast editor. Given a transcript, produce:
1. A blog post (800-1200 words) that captures the key ideas in a readable narrative.
   Use markdown formatting with headings.
2. Show notes as a JSON object with fields: title, summary (2-3 sentences),
   topics (array of strings), and highlights (array of {time, description}).

Return your response as JSON with two keys: "blog_post" (markdown string)
and "show_notes" (object).`,
},
{
role: "user",
content: transcription.text,
},
],
response_format: { type: "json_object" },
});

const result = JSON.parse(response.choices[0].message.content!);

console.log("Blog post length:", result.blog_post.length);
console.log("Topics:", result.show_notes.topics);
console.log("Highlights:", result.show_notes.highlights.length);

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": (
                "You are a podcast editor. Given a transcript, produce:\n"
                "1. A blog post (800-1200 words) that captures the key ideas in a readable narrative.\n"
                "   Use markdown formatting with headings.\n"
                "2. Show notes as a JSON object with fields: title, summary (2-3 sentences),\n"
                "   topics (array of strings), and highlights (array of {time, description}).\n\n"
                "Return your response as JSON with two keys: \"blog_post\" (markdown string) "
                "and \"show_notes\" (object)."
            ),
        },
        {
            "role": "user",
            "content": transcription.text,
        },
    ],
    response_format={"type": "json_object"},
)

import json

result = json.loads(response.choices[0].message.content)

print("Blog post length:", len(result["blog_post"]))
print("Topics:", result["show_notes"]["topics"])
print("Highlights:", len(result["show_notes"]["highlights"]))

The output looks like this:

{
  "blog_post": "# Why We Rebuilt Our Auth System From Scratch\n\nAfter three years of...",
  "show_notes": {
    "title": "Rebuilding Auth: Lessons from the Trenches",
    "summary": "Sarah and Mike discuss why they scrapped their authentication system and rebuilt it. They cover the security incidents that triggered the rewrite and the architectural decisions that made it work.",
    "topics": ["authentication", "security", "system design", "technical debt"],
    "highlights": [
      { "time": "4:30", "description": "The security incident that started it all" },
      { "time": "12:15", "description": "Why OAuth alone wasn't enough" },
      { "time": "23:00", "description": "The migration strategy that avoided downtime" }
    ]
  }
}

Podcast to blog post with OpenAI and Ittybit

Extract and normalize the audio

Poll for the processed audio

Transcribe with Whisper

Generate the blog post with GPT-4

See also