The AI Agent Content Pipeline: From Raw Footage to Posted — Fully Automated
Back to Blog
Content Creation· 9 min min read

The AI Agent Content Pipeline: From Raw Footage to Posted — Fully Automated

The full content workflow — film, ingest, transcribe, extract clips, caption, thumbnail, and post — can run automatically with AI agents. Here's the complete pipeline architecture and code.

NA
By NEPA AI
NEPA AI · Building autonomous systems for creators and businesses
#content pipeline#AI agents#video editing#automation#social media#ffmpeg#whisper#content creator

The bottleneck for most content creators is editing, captioning, thumbnails, and posting to multiple accounts — that takes 3-4 hours per video. An AI pipeline cuts this down to about 20 minutes of active work.

Here’s the full setup:

Pipeline Overview

  1. Ingest: Transfer footage + extract metadata
  2. Transcribe: Use Whisper for transcripts & timestamps
  3. Scene Detect: Shot detection to segment videos
  4. Clip Extract: Auto-find viral moments
  5. Audio Mix: Denoise, normalize audio, add background music if needed
  6. Caption Burn: Add styled captions to video
  7. Thumbnail Gen: Generate keyframes & AI text overlays for thumbnails
  8. Metadata Gen: Write platform-specific titles & hashtags with GPT
  9. Export: Optimize videos for 9:16, 16:9, and 1:1 formats
  10. Queue Post: Schedule or post to all accounts

Ingest & Metadata Extraction

import subprocess
from datetime import datetime

def ingest_footage(source_path):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    job_id = f"job_{timestamp}"
    
    job_dir = Path(f"./jobs/{job_id}/")
    for subdir in ["input", "transcripts", "clips", "audio", "captions", 
                   "thumbnails", "export"]:
        (job_dir / subdir).mkdir(parents=True, exist_ok=True)
    
    input_path = job_dir / "input" / Path(source_path).name
    shutil.copy2(source_path, input_path)
    
    probe = subprocess.run(
        ["ffprobe", "-v", "quiet", "-print_format", "json",
         "-show_streams", "-show_format", str(input_path)],
        capture_output=True, text=True
    )
    
    job_data = {
        "job_id": job_id,
        "input_path": str(input_path),
        "status": "ingested"
    }
    
    with open(job_dir / "meta.json", 'w') as f:
        json.dump(job_data, f)
    print(f"✓ Ingested: {job_id} ({job_data['input_path']})")

Transcription

from faster_whisper import WhisperModel

def transcribe_footage(input_path):
    model = WhisperModel("medium", device="cuda", compute_type="float16")
    
    segments, info = model.transcribe(input_path)
    
    transcript_data = {
        "language": info.language,
        "duration": info.duration,
        "text": " ".join(s.text for s in segments),
        "segments": [
            {"start": s.start, "end": s.end, 
             "text": s.text.strip(), 
             "words": [{"word": w.word, "start": w.start, "end": w.end} 
                       for w in (s.words or [])]}
            for s in segments
        ]
    }
    
    with open(f"./jobs/job_*/transcripts/transcript.json", 'w') as f:
        json.dump(transcript_data, f)

Clip Extraction

import openai
from datetime import timedelta

client = openai.OpenAI()

def extract_viral_clips(job_id):
    transcript_path = Path(f"./jobs/{job_id}/transcripts/transcript.json")
    
    prompt = """
    Identify the best clips from this transcript for social media.
    Return JSON array under key "clips".
    """
    
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"},
        temperature=0.3
    )
    
    clips_data = json.loads(resp.choices[0].message.content).get("clips", [])
    for i, clip in enumerate(clips_data):
        duration = clip["end_time"] - clip["start_time"]
        output_path = f"./jobs/{job_id}/clips/clip_{i+1:03d}.mp4"
        
        subprocess.run([
            "ffmpeg", "-y",
            "-ss", str(clip["start_time"]),
            "-i", job_dir / "input" / Path(source_path).name,
            "-t", str(duration),
            "-c:v", "libx264", "-crf", "18",
            "-c:a", "aac", "-b:a", "128k",
            output_path
        ])
    
    with open(f"./jobs/{job_id}/clips/scores.json", 'w') as f:
        json.dump(clips_data, f)

Metadata Generation

def generate_post_metadata(job_id):
    transcript_path = Path(f"./jobs/{job_id}/transcripts/transcript.json")
    
    prompt = """
    Generate metadata for YouTube, Instagram, TikTok, and Twitter.
    Return JSON with platform-specific data.
    """
    
    resp = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"},
        temperature=0.6
    )
    
    metadata = json.loads(resp.choices[0].message.content)
    with open(f"./jobs/{job_id}/post_metadata.json", 'w') as f:
        json.dump(metadata, f)

Export & Queue

def export_for_platforms(job_id):
    job_dir = Path(f"./jobs/{job_id}")
    clip_files = sorted(job_dir.glob("clips/clip_*.mp4"))
    
    for filename, vf_args in exports.items():
        output_path = job_dir / "export" / filename
        subprocess.run([
            "ffmpeg", "-y",
            "-i", str(clip_files[0]),
            *vf_args,
            "-c:v", "libx264", "-crf", "20", "-preset", "fast",
            "-c:a", "aac", "-b:a", "128k",
            str(output_path)
        ], capture_output=True)

Full Pipeline

def run_full_pipeline(source_path, brand="nepa_ai"):
    job = ingest_footage(source_path)
    job = transcribe_footage(job["input_path"])
    job = extract_viral_clips(job["job_id"], max_clips=3)
    job = generate_post_metadata(job["job_id"], brand=brand)
    job = export_for_platforms(job["job_id"], clip_index=0)
    
    print(f"\n✅ Pipeline complete: {job['job_id']}")
    return job

# Run it
job = run_full_pipeline("./footage/todays_recording.mp4", brand="nepa_ai")

This pipeline runs in 8-12 minutes for a 30-minute recording on modern hardware. Review and approve the outputs before posting.


The NEPA AI Content Creator Stack packages everything — ingest, transcribe, clip extraction, captioning, thumbnail generation, metadata writing, and multi-platform posting — into one agent-driven workflow.

→ Get the Content Creator AI Stack at axon.nepa-ai.com

Samsung 990 PRO NVMe SSD 2TB →