AI Frame Interpolation with RIFE: Smooth Any Video to 60fps
RIFE (Real-time Intermediate Flow Estimation) can double or quadruple the frame rate of any video using AI-generated intermediate frames. Full Python pipeline for 24fps → 60fps conversion.
Frame interpolation is one of the most immediately satisfying AI video effects: you feed in a choppy 24fps source and get smooth 60fps output with motion-blended intermediate frames that the original never contained. This is how TV manufacturers advertise "motion smoothing" — and RIFE does it better, locally, for free.
Here's how RIFE works internally, when it produces great results vs. artifacts, and a complete Python pipeline for video conversion.
How RIFE Works
RIFE (Real-time Intermediate Flow Estimation) is a convolutional neural network that estimates optical flow between two frames and synthesizes a realistic intermediate frame.
Given frame F₀ and frame F₁, RIFE:
- Estimates bidirectional optical flow fields (F₀→F₁ and F₁→F₀)
- Warps both frames toward the intermediate timestep
t = 0.5 - Uses a refinement network to fix occlusions and blending artifacts
- Outputs
F_t— a photorealistic interpolated frame
For 2x interpolation (24fps → 48fps), it inserts one frame between every pair. For 4x (24fps → 96fps), it recurses — generating midpoints between midpoints.
When RIFE Works Best
| Source Material | Quality | Notes | |---|---|---| | Live action, smooth motion | ⭐⭐⭐⭐⭐ | Ideal — clean optical flow | | Animation (30fps) | ⭐⭐⭐⭐ | Minor ghosting on fast cuts | | Video games / screen capture | ⭐⭐⭐⭐ | Great for gameplay footage | | Fast-moving sports | ⭐⭐⭐ | Artifacts on quick direction changes | | Old film (judder + grain) | ⭐⭐ | Grain confuses flow estimation | | Anime with 2s/frame holds | ⭐⭐ | "Soap opera effect" on stills |
Installation
# Clone RIFE implementation
git clone https://github.com/hzwer/Practical-RIFE.git
cd Practical-RIFE
pip install torch torchvision numpy opencv-python tqdm
# Download model weights (RIFE v4.6 recommended)
mkdir -p train_log
# Download from: https://github.com/hzwer/Practical-RIFE/releases
# Place model files in ./train_log/
Basic Frame Interpolation
import torch
import cv2
import numpy as np
from pathlib import Path
import sys
sys.path.append("./Practical-RIFE")
from model.RIFE import Model
def load_rife_model(model_dir: str = "./train_log", device: str = "auto") -> Model:
"""Load RIFE model."""
if device == "auto":
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Model()
model.load_model(model_dir, -1)
model.eval()
model.device()
print(f"RIFE loaded on {device}")
return model
def interpolate_frame_pair(
model: Model,
frame0: np.ndarray,
frame1: np.ndarray,
timestep: float = 0.5, # 0.5 = midpoint between frames
scale: float = 1.0 # Spatial scale (reduce for speed on small GPUs)
) -> np.ndarray:
"""
Generate one interpolated frame between frame0 and frame1.
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
def to_tensor(frame):
return torch.from_numpy(
frame.transpose(2, 0, 1).astype("float32") / 255.0
).unsqueeze(0).to(device)
I0 = to_tensor(cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB))
I1 = to_tensor(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
# Pad to multiples of 32 (required by RIFE architecture)
n, c, h, w = I0.shape
ph = ((h - 1) // 32 + 1) * 32
pw = ((w - 1) // 32 + 1) * 32
pad = (0, pw - w, 0, ph - h)
I0_pad = torch.nn.functional.pad(I0, pad)
I1_pad = torch.nn.functional.pad(I1, pad)
with torch.no_grad():
mid = model.inference(I0_pad, I1_pad, scale=scale, timestep=timestep)
# Crop back to original size
mid = mid[:, :, :h, :w]
# Convert back to numpy
mid_np = (mid[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
return cv2.cvtColor(mid_np, cv2.COLOR_RGB2BGR)
Full Video Interpolation Pipeline
from tqdm import tqdm
def interpolate_video(
input_path: str,
output_path: str,
multiplier: int = 2, # 2x, 4x, or 8x frame rate
model_dir: str = "./train_log",
scale: float = 1.0,
crf: int = 18, # ffmpeg CRF (18 = near-lossless, 23 = default)
preset: str = "slow" # ffmpeg encoding preset
) -> dict:
"""
Interpolate a video to N times its original frame rate.
2x: 24fps → 48fps
4x: 24fps → 96fps (applies 2x twice recursively)
"""
cap = cv2.VideoCapture(input_path)
src_fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
target_fps = src_fps * multiplier
print(f"Source: {src_fps:.2f}fps × {multiplier} = {target_fps:.2f}fps target")
print(f"Resolution: {w}x{h} | Total source frames: {total_frames}")
model = load_rife_model(model_dir)
# Write to temp file first, then mux with original audio
temp_output = output_path + ".noaudio.mp4"
writer = cv2.VideoWriter(
temp_output,
cv2.VideoWriter_fourcc(*"mp4v"),
target_fps,
(w, h)
)
prev_frame = None
frames_written = 0
with tqdm(total=total_frames, desc="Interpolating") as pbar:
while True:
ret, frame = cap.read()
if not ret:
break
if prev_frame is not None:
# For 2x: insert 1 frame between each pair
# For 4x: insert 3 frames (at t=0.25, 0.5, 0.75)
# For 8x: insert 7 frames
timesteps = np.linspace(0, 1, multiplier + 1)[1:-1]
for t in timesteps:
interp = interpolate_frame_pair(model, prev_frame, frame, timestep=t, scale=scale)
writer.write(interp)
frames_written += 1
writer.write(frame)
frames_written += 1
prev_frame = frame
pbar.update(1)
cap.release()
writer.release()
# Mux with original audio using ffmpeg
import subprocess
subprocess.run([
"ffmpeg", "-y",
"-i", temp_output,
"-i", input_path,
"-map", "0:v:0",
"-map", "1:a:0?", # ? = don't fail if no audio
"-c:v", "libx264",
"-crf", str(crf),
"-preset", preset,
"-c:a", "aac",
"-b:a", "192k",
output_path
], check=True)
Path(temp_output).unlink()
return {
"source_fps": src_fps,
"target_fps": target_fps,
"multiplier": multiplier,
"frames_written": frames_written,
"output": output_path
}
# 2x interpolation (24fps → 48fps)
result = interpolate_video(
"input_24fps.mp4",
"output_48fps.mp4",
multiplier=2,
scale=1.0,
crf=18
)
print(result)
# 4x interpolation (24fps → 96fps, near cinema-smooth)
result = interpolate_video(
"input_24fps.mp4",
"output_96fps.mp4",
multiplier=4,
scale=0.5, # Reduce scale for faster processing on lower-end GPUs
crf=20
)
Scene Cut Detection (Avoid Interpolating Across Cuts)
Interpolating across a hard cut produces garbage frames. Detect cuts first:
def detect_scene_cuts(video_path: str, threshold: float = 30.0) -> list:
"""
Detect scene cuts using frame difference.
Returns list of frame indices where cuts occur.
"""
cap = cv2.VideoCapture(video_path)
cuts = []
prev_frame = None
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if prev_frame is not None:
diff = cv2.absdiff(gray, prev_frame).mean()
if diff > threshold:
cuts.append(frame_idx)
prev_frame = gray
frame_idx += 1
cap.release()
print(f"Detected {len(cuts)} scene cuts")
return cuts
def interpolate_video_with_cut_detection(
input_path: str,
output_path: str,
multiplier: int = 2,
cut_threshold: float = 30.0,
**kwargs
):
"""
Interpolate video, skipping interpolation at scene cuts.
"""
cuts = detect_scene_cuts(input_path, cut_threshold)
cut_set = set(cuts)
cap = cv2.VideoCapture(input_path)
src_fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
model = load_rife_model(kwargs.get("model_dir", "./train_log"))
writer = cv2.VideoWriter(
output_path, cv2.VideoWriter_fourcc(*"mp4v"),
src_fps * multiplier, (w, h)
)
prev_frame = None
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
if prev_frame is not None:
if frame_idx not in cut_set:
# Normal interpolation
timesteps = np.linspace(0, 1, multiplier + 1)[1:-1]
for t in timesteps:
interp = interpolate_frame_pair(model, prev_frame, frame, t)
writer.write(interp)
else:
# At a cut — duplicate the previous frame instead of interpolating
for _ in range(multiplier - 1):
writer.write(prev_frame)
writer.write(frame)
prev_frame = frame
frame_idx += 1
cap.release()
writer.release()
print(f"Done. Skipped interpolation at {len(cuts)} scene cuts.")
Performance Benchmarks
On RTX 3090, processing a 1080p video:
| Multiplier | Frames Added | Time per Source Frame | GPU VRAM | |---|---|---|---| | 2x | +1 per pair | ~45ms | ~3GB | | 4x | +3 per pair | ~130ms | ~3GB | | 8x | +7 per pair | ~290ms | ~4GB |
For a 2-minute 1080p24 clip (2,880 source frames), 2x interpolation takes ~4 minutes on a 3090.
The Animation Workspace packages RIFE interpolation with 1,188 lines of animation tooling: text-to-animation, video stylization, motion graphics, character animation, and frame rate conversion — everything you need for AI-assisted animation production.