AI Frame Interpolation with RIFE: Smooth Any Video to 60fps
Back to Blog
Animation· 8 min min read

AI Frame Interpolation with RIFE: Smooth Any Video to 60fps

RIFE (Real-time Intermediate Flow Estimation) can double or quadruple the frame rate of any video using AI-generated intermediate frames. Full Python pipeline for 24fps → 60fps conversion.

NA
By NEPA AI
NEPA AI · Building autonomous systems for creators and businesses
#RIFE#frame interpolation#60fps#video AI#animation#python#ffmpeg

Running RIFE on your videos is a cheat code for making them look twice as smooth. Take a crusty 24fps source, run this, and suddenly it glides at 60fps. Here’s a real walkthrough—Python scripts, exact commands—no filler.

What RIFE Does

RIFE stands for Real-time Intermediate Flow Estimation. It’s a neural net that takes two frames (F₀, F₁), predicts how stuff’s moving between them, and generates made-up but photorealistic in-betweens.

Basic process:

  1. Estimate how everything moves in both directions.
  2. Warp each frame toward the middle point (t=0.5).
  3. Blend, fix occlusions, patch weirdness.
  4. Spit out a new frame somewhere between the originals.

Double the framerate, it inserts one new frame in every gap. Quadruple it? Same thing, just recurses and generates more fakes.

Where RIFE Actually Works

Forget the marketing, here’s how it lands in reality:

| Source | Quality | |-----------------------|---------| | Live action, smooth pan/tilt | ⭐⭐⭐⭐⭐ | | Animation (30fps sources) | ⭐⭐⭐⭐ | | Video games, screencast | ⭐⭐⭐⭐ | | Fast sports (lots of motion) | ⭐⭐⭐ | | Old film (judder/grain) | ⭐⭐ | | Anime with static holds | ⭐⭐ |

You’ll see halo artifacts on dirty film and anime, but for clean 24–30fps video it’s a magic trick.

Install & Run

Clone the actual repo, grab the weights, and plug in the basics. No mystery here.

git clone https://github.com/hzwer/Practical-RIFE.git
cd Practical-RIFE
pip install torch torchvision numpy opencv-python tqdm
mkdir -p train_log
# Put the RIFE model files into ./train_log/

Core: Frame Interpolation Function

Keep it direct:

import torch, cv2, numpy as np
from pathlib import Path
sys.path.append("./Practical-RIFE")
from model.RIFE import Model

def load_rife_model(model_dir="./train_log", device="auto"):
    device = "cuda" if device=="auto" and torch.cuda.is_available() else "cpu"
    model = Model()
    model.load_model(model_dir, -1)
    model.eval()
    print(f"Loaded RIFE on {device}")
    return model

def interpolate_frame_pair(model, frame0, frame1, timestep=0.5, scale=1.0):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    to_tensor = lambda img: torch.from_numpy(
        img.transpose(2,0,1).astype("float32")/255.0).unsqueeze(0).to(device)
    I0 = to_tensor(cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB))
    I1 = to_tensor(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
    n,c,h,w = I0.shape
    ph = ((h-1)//32+1)*32
    pw = ((w-1)//32+1)*32
    pad = (0,pw-w,0,ph-h)
    I0_pad = torch.nn.functional.pad(I0, pad)
    I1_pad = torch.nn.functional.pad(I1, pad)
    with torch.no_grad():
        mid = model.inference(I0_pad, I1_pad, scale=scale, timestep=timestep)
    mid = mid[:,:,:h,:w]
    mid_np = (mid[0].permute(1,2,0).cpu().numpy()*255).astype(np.uint8)
    return cv2.cvtColor(mid_np, cv2.COLOR_RGB2BGR)

End-to-End Video Smoothing

Here’s the pipeline. Reads one frame at a time, injects interpolated ones, writes out fresh 60fps:

from tqdm import tqdm

def interpolate_video(
    input_path, output_path,
    multiplier=2, model_dir="./train_log",
    scale=1.0, crf=18, preset="slow"
):
    cap = cv2.VideoCapture(input_path)
    src_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    target_fps = src_fps * multiplier
    print(f"Source: {src_fps:.2f}fps ×{multiplier} → {target_fps:.2f}fps")
    model = load_rife_model(model_dir)
    temp_output = output_path + ".noaudio.mp4"
    writer = cv2.VideoWriter(temp_output, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (w, h))
    prev_frame = None
    frames_written = 0
    with tqdm(total=total_frames, desc="Interpolating") as pbar:
        while True:
            ret, frame = cap.read()
            if not ret: break
            if prev_frame is not None:
                for t in np.linspace(0,1,multiplier+1)[1:-1]:
                    interp = interpolate_frame_pair(model, prev_frame, frame, t, scale)
                    writer.write(interp)
                    frames_written += 1
            writer.write(frame)
            frames_written += 1
            prev_frame = frame
            pbar.update(1)
    cap.release()
    writer.release()
    import subprocess
    subprocess.run([
        "ffmpeg", "-y",
        "-i", temp_output,
        "-i", input_path,
        "-map", "0:v:0",
        "-map", "1:a:0?", # don’t die on missing audio
        "-c:v", "libx264",
        "-crf", str(crf),
        "-preset", preset,
        "-c:a", "aac",
        "-b:a", "192k",
        output_path
    ], check=True)
    Path(temp_output).unlink()
    return {
        "source_fps": src_fps,
        "target_fps": target_fps,
        "multiplier": multiplier,
        "frames_written": frames_written,
        "output": output_path
    }

Optional: Scene Cut Detection

Interpolate across a jump cut? You’ll get a mangled frame. Here’s a minimalist scene cut detector, just pixel mean diff:

def detect_scene_cuts(video_path, threshold=30.0):
    cap = cv2.VideoCapture(video_path)
    cuts, prev_frame, idx = [], None, 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if prev_frame is not None:
            diff = cv2.absdiff(gray, prev_frame).mean()
            if diff > threshold:
                cuts.append(idx)
        prev_frame = gray
        idx += 1
    cap.release()
    print(f"Detected {len(cuts)} scene cuts")
    return cuts

def interpolate_video_with_cut_detection(
    input_path, output_path, multiplier=2, cut_threshold=30.0
):
    cuts = set(detect_scene_cuts(input_path, cut_threshold))
    cap = cv2.VideoCapture(input_path)
    src_fps = cap.get(cv2.CAP_PROP_FPS)
    w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    model = load_rife_model()
    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), src_fps*multiplier, (w,h))
    prev_frame, idx = None, 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        if prev_frame is not None:
            if idx not in cuts:
                for t in np.linspace(0,1,multiplier+1)[1:-1]:
                    interp = interpolate_frame_pair(model, prev_frame, frame, t)
                    writer.write(interp)
            else:
                for _ in range(multiplier-1):
                    writer.write(prev_frame)
        writer.write(frame)
        prev_frame = frame
        idx += 1
    cap.release()
    writer.release()
    print(f"Done. Skipped interpolation at {len(cuts)} cuts.")

Benchmark: How Fast?

On an RTX 3090:

  • 2x (1 frame per pair): ~45ms per base frame, ~3GB VRAM
  • 4x (3 fakes per pair): ~130ms/frame, still ~3GB
  • 8x: ~290ms/frame, up to 4GB

A two-minute 1080p24 clip (2880 frames) will finish 2x interpolation in ~4 mins.


If you’re building with AI (not just watching), check my tools at axon.nepa-ai.com.

ASUS ROG Strix GeForce RTX 4090 →

MSI Gaming GeForce RTX 4070 →