The Affiliate Content Pipeline
Stage 1: Product Research Automation
import httpx
from bs4 import BeautifulSoup
def scrape_amazon_bestsellers(category_url: str, pages: int = 3) -> list[dict]:
products = []
for page in range(1, pages + 1):
url = f"{category_url}?pg={page}"
resp = httpx.get(url)
soup = BeautifulSoup(resp.text, "html.parser")
items = soup.select(".zg-grid-general-faceout")
for item in items:
title_el = item.select_one(".p13n-sc-truncate-desktop-type2, ._cDEzb_p13n-sc-css-line-clamp-3_g3dy1")
price_el = item.select_one(".p1n-sc-price")
rating_el = item.select_one("i.a-icon-star")
asin_el = item.select_one("a[href*='/dp/']")
if not title_el:
continue
asin = ""
if asin_el:
href = asin_el.get("href", "")
parts = href.split("/dp/")
if len(parts) > 1:
asin = parts[1].split("/")[0]
products.append({
"title": title_el.text.strip(),
"asin": asin,
"price": price_el.text.strip() if price_el else "",
"rating": rating_el.text.strip() if rating_el else "",
"affiliate_url": f"https://amazon.com/dp/{asin}?tag=YOUR_TAG-20" if asin else ""
})
time.sleep(1) # be respectful
return products
Stage 2: Keyword Research with GPT
import openai
def generate_article_keywords(product: dict) -> list[dict]:
prompt = f"""
Product: {product['title']}
Price: {product['price']}
Generate 5 SEO blog article ideas targeting this product. For each, provide:
- slug: URL-friendly slug
- title: SEO-optimized H1 title
- keyword: primary keyword to target
- intent: informational | commercial | transactional
- estimated_difficulty: easy | medium | hard
Focus on long-tail, low-competition keywords.
Return as JSON array.
"""
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"},
temperature=0.5
)
data = json.loads(response.choices[0].message.content)
return data.get("articles", [])
Stage 3: Article Generation
def generate_affiliate_article(product: dict, keyword_data: dict) -> str:
product_context = f"""
Product: {product['title']}
Price: {product['price']}
Rating: {product['rating']}
ASIN: {product['asin']}
Affiliate URL: {product['affiliate_url']}
"""
article_prompt = f"""
Write a complete, high-quality SEO blog article with these specifications:
{product_context}
Target keyword: {keyword_data['keyword']}
Article title: {keyword_data['title']}
Search intent: {keyword_data['intent']}
Requirements:
- Length: 1000-1500 words
- Include an H1 title (the provided title)
- Use H2 and H3 subheadings throughout
- Write an engaging introduction that mentions the main problem the product solves
- Include a section on key features (use a bullet list or table)
- Include a section on pros and cons
- Include a "Who Is This For?" section
- Include a "Bottom Line" conclusion with a clear recommendation
- Place the affiliate link naturally in: intro, features section, and conclusion
- Use this for affiliate links: [Check Price on Amazon]({product['affiliate_url']})
- Write in a helpful, honest tone — mention limitations, not just positives
- Include secondary keywords naturally: don't keyword-stuff
- Format in clean Markdown
Do not add frontmatter — that will be added separately.
"""
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[{"role": "user", "content": article_prompt}],
max_tokens=2000,
temperature=0.7
)
return response.choices[0].message.content
def build_mdx_file(product: dict, keyword_data: dict, article_body: str) -> str:
import datetime
today = datetime.date.today().isoformat()
frontmatter = f"""---
title: "{keyword_data['title']}"
date: "{today}"
category: "Product Reviews"
tags: ["{keyword_data['keyword']}", "{product['title'][0:30]}", "review", "affiliate"]
excerpt: "Looking for the best {keyword_data['keyword']}? We break down features, pricing, and who it's actually for."
readTime: "6 min"
---
"""
return frontmatter + article_body
Stage 4: SEO Validation
import re
def validate_seo(article: str, keyword: str) -> dict:
issues = []
# Word count
word_count = len(article.split())
if word_count < 800:
issues.append(f"Too short: {word_count} words (minimum 800)")
# Keyword density
keyword_lower = keyword.lower()
article_lower = article.lower()
keyword_count = article_lower.count(keyword_lower)
density = (keyword_count / word_count) * 100
if keyword_count < 3:
issues.append(f"Keyword appears only {keyword_count} times — add more")
if density > 3.0:
issues.append(f"Keyword density {density:.1f}% — too high, risks penalty")
# Heading structure
h2_count = len(re.findall(r'^## ', article, re.MULTILINE))
if h2_count < 3:
issues.append(f"Only {h2_count} H2 headings — add more structure")
# Affiliate link present
if "amazon.com" not in article:
issues.append("No affiliate link found")
return {
"valid": len(issues) == 0,
"word_count": word_count,
"keyword_density": round(density, 2),
"h2_count": h2_count,
"issues": issues
}
Stage 5: Auto-Publish to CMS
import subprocess
from pathlib import Path
def publish_article(slug: str, mdx_content: str, blog_dir: str) -> bool:
blog_path = Path(blog_dir)
article_file = blog_path / f"{slug}.mdx"
if article_file.exists():
print(f" Skipping {slug} — already exists")
return False
article_file.write_text(mdx_content, encoding="utf-8")
print(f" ✓ Written: {article_file.name}")
return True
def run_pipeline(
category_url: str,
blog_dir: str,
articles_per_run: int = 5
):
products = scrape_amazon_bestsellers(category_url, pages=2)
published = 0
for product in products:
if published >= articles_per_run:
break
keyword_options = generate_article_keywords(product)
if not keyword_options:
continue
keyword_data = next(
(k for k in keyword_options if k.get("estimated_difficulty") == "easy"),
keyword_options[0]
)
print(f" Generating article for: {keyword_data['keyword']}")
article_body = generate_affiliate_article(product, keyword_data)
seo_check = validate_seo(article_body, keyword_data['keyword'])
if not seo_check['valid']:
print(f" SEO issues: {seo_check['issues']}")
slug = keyword_data.get("slug", keyword_data["keyword"].replace(" ", "-"))
if publish_article(slug, build_mdx_file(product, keyword_data, article_body), blog_dir):
published += 1
print(f" ✓ Published ({published}/{articles_per_run})")
print(f"\nPipeline complete. Published {published} articles.")
return published
Running the Pipeline
# Install dependencies
pip install openai beautifulsoup4 httpx
# Set API key
export OPENAI_API_KEY="sk-..."
# Run for a specific category
python affiliate_pipeline.py \
--category "https://www.amazon.com/best-sellers-electronics" \
--blog-dir "./content/blog/" \
--articles 5
Running this 5 days/week = 25 articles/week = 1,200+ articles/year. At 0.5% conversion and $15 average commission, even conservative traffic numbers produce meaningful recurring revenue.
→ Get the Affiliate Engine at axon.nepa-ai.com/shop/affiliate-engine
Build the blog once. Let it earn while you sleep.



