Running RIFE on your videos is a cheat code for making them look twice as smooth. Take a crusty 24fps source, run this, and suddenly it glides at 60fps. Here’s a real walkthrough—Python scripts, exact commands—no filler.
What RIFE Does
RIFE stands for Real-time Intermediate Flow Estimation. It’s a neural net that takes two frames (F₀, F₁), predicts how stuff’s moving between them, and generates made-up but photorealistic in-betweens.
Basic process:
- Estimate how everything moves in both directions.
- Warp each frame toward the middle point (
t=0.5). - Blend, fix occlusions, patch weirdness.
- Spit out a new frame somewhere between the originals.
Double the framerate, it inserts one new frame in every gap. Quadruple it? Same thing, just recurses and generates more fakes.
Where RIFE Actually Works
Forget the marketing, here’s how it lands in reality:
| Source | Quality | |-----------------------|---------| | Live action, smooth pan/tilt | ⭐⭐⭐⭐⭐ | | Animation (30fps sources) | ⭐⭐⭐⭐ | | Video games, screencast | ⭐⭐⭐⭐ | | Fast sports (lots of motion) | ⭐⭐⭐ | | Old film (judder/grain) | ⭐⭐ | | Anime with static holds | ⭐⭐ |
You’ll see halo artifacts on dirty film and anime, but for clean 24–30fps video it’s a magic trick.
Install & Run
Clone the actual repo, grab the weights, and plug in the basics. No mystery here.
git clone https://github.com/hzwer/Practical-RIFE.git
cd Practical-RIFE
pip install torch torchvision numpy opencv-python tqdm
mkdir -p train_log
# Put the RIFE model files into ./train_log/
Core: Frame Interpolation Function
Keep it direct:
import torch, cv2, numpy as np
from pathlib import Path
sys.path.append("./Practical-RIFE")
from model.RIFE import Model
def load_rife_model(model_dir="./train_log", device="auto"):
device = "cuda" if device=="auto" and torch.cuda.is_available() else "cpu"
model = Model()
model.load_model(model_dir, -1)
model.eval()
print(f"Loaded RIFE on {device}")
return model
def interpolate_frame_pair(model, frame0, frame1, timestep=0.5, scale=1.0):
device = "cuda" if torch.cuda.is_available() else "cpu"
to_tensor = lambda img: torch.from_numpy(
img.transpose(2,0,1).astype("float32")/255.0).unsqueeze(0).to(device)
I0 = to_tensor(cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB))
I1 = to_tensor(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
n,c,h,w = I0.shape
ph = ((h-1)//32+1)*32
pw = ((w-1)//32+1)*32
pad = (0,pw-w,0,ph-h)
I0_pad = torch.nn.functional.pad(I0, pad)
I1_pad = torch.nn.functional.pad(I1, pad)
with torch.no_grad():
mid = model.inference(I0_pad, I1_pad, scale=scale, timestep=timestep)
mid = mid[:,:,:h,:w]
mid_np = (mid[0].permute(1,2,0).cpu().numpy()*255).astype(np.uint8)
return cv2.cvtColor(mid_np, cv2.COLOR_RGB2BGR)
End-to-End Video Smoothing
Here’s the pipeline. Reads one frame at a time, injects interpolated ones, writes out fresh 60fps:
from tqdm import tqdm
def interpolate_video(
input_path, output_path,
multiplier=2, model_dir="./train_log",
scale=1.0, crf=18, preset="slow"
):
cap = cv2.VideoCapture(input_path)
src_fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
target_fps = src_fps * multiplier
print(f"Source: {src_fps:.2f}fps ×{multiplier} → {target_fps:.2f}fps")
model = load_rife_model(model_dir)
temp_output = output_path + ".noaudio.mp4"
writer = cv2.VideoWriter(temp_output, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (w, h))
prev_frame = None
frames_written = 0
with tqdm(total=total_frames, desc="Interpolating") as pbar:
while True:
ret, frame = cap.read()
if not ret: break
if prev_frame is not None:
for t in np.linspace(0,1,multiplier+1)[1:-1]:
interp = interpolate_frame_pair(model, prev_frame, frame, t, scale)
writer.write(interp)
frames_written += 1
writer.write(frame)
frames_written += 1
prev_frame = frame
pbar.update(1)
cap.release()
writer.release()
import subprocess
subprocess.run([
"ffmpeg", "-y",
"-i", temp_output,
"-i", input_path,
"-map", "0:v:0",
"-map", "1:a:0?", # don’t die on missing audio
"-c:v", "libx264",
"-crf", str(crf),
"-preset", preset,
"-c:a", "aac",
"-b:a", "192k",
output_path
], check=True)
Path(temp_output).unlink()
return {
"source_fps": src_fps,
"target_fps": target_fps,
"multiplier": multiplier,
"frames_written": frames_written,
"output": output_path
}
Optional: Scene Cut Detection
Interpolate across a jump cut? You’ll get a mangled frame. Here’s a minimalist scene cut detector, just pixel mean diff:
def detect_scene_cuts(video_path, threshold=30.0):
cap = cv2.VideoCapture(video_path)
cuts, prev_frame, idx = [], None, 0
while True:
ret, frame = cap.read()
if not ret: break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if prev_frame is not None:
diff = cv2.absdiff(gray, prev_frame).mean()
if diff > threshold:
cuts.append(idx)
prev_frame = gray
idx += 1
cap.release()
print(f"Detected {len(cuts)} scene cuts")
return cuts
def interpolate_video_with_cut_detection(
input_path, output_path, multiplier=2, cut_threshold=30.0
):
cuts = set(detect_scene_cuts(input_path, cut_threshold))
cap = cv2.VideoCapture(input_path)
src_fps = cap.get(cv2.CAP_PROP_FPS)
w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
model = load_rife_model()
writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), src_fps*multiplier, (w,h))
prev_frame, idx = None, 0
while True:
ret, frame = cap.read()
if not ret: break
if prev_frame is not None:
if idx not in cuts:
for t in np.linspace(0,1,multiplier+1)[1:-1]:
interp = interpolate_frame_pair(model, prev_frame, frame, t)
writer.write(interp)
else:
for _ in range(multiplier-1):
writer.write(prev_frame)
writer.write(frame)
prev_frame = frame
idx += 1
cap.release()
writer.release()
print(f"Done. Skipped interpolation at {len(cuts)} cuts.")
Benchmark: How Fast?
On an RTX 3090:
- 2x (1 frame per pair): ~45ms per base frame, ~3GB VRAM
- 4x (3 fakes per pair): ~130ms/frame, still ~3GB
- 8x: ~290ms/frame, up to 4GB
A two-minute 1080p24 clip (2880 frames) will finish 2x interpolation in ~4 mins.
If you’re building with AI (not just watching), check my tools at axon.nepa-ai.com.



