The average SDR spends 5 hours/day on prospecting. I do that manually, searching LinkedIn, copying info into spreadsheets, and writing personalized openers. AI can automate most of this, letting you focus on actual conversations.
Here’s a streamlined lead gen pipeline running daily, enriching leads with company context, and exporting to your CRM—targeting 300 qualified leads/month.
The Pipeline Architecture
Target Definition → Source Scraping → Dedup → Enrichment → Scoring → Export
- Target Definition: ICP parameters — industry, size, job title, tech stack, growth signals.
- Source Scraping: LinkedIn Sales Nav, Apollo.io, company job boards, G2, Crunchbase.
- Dedup: cross-reference against existing CRM records, previous outreach log.
- Enrichment: company financials, tech stack, news/signals, decision-maker contacts.
- Scoring: rank leads by fit and buying intent signals.
- Export: push to CRM (HubSpot, Salesforce) or generate outreach queue.
Defining Your ICP Programmatically
Your ICP is a set of filters:
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class ICPCriteria:
# Company filters
industries: list[str] = field(default_factory=lambda: [
"SaaS", "E-commerce", "Digital Marketing Agency"
])
employee_range: tuple[int, int] = (10, 500)
revenue_range_usd: tuple[int, int] = (1_000_000, 50_000_000)
locations: list[str] = field(default_factory=lambda: ["United States", "Canada"])
# Contact filters
job_titles: list[str] = field(default_factory=lambda: [
"CEO", "Founder", "Head of Marketing",
"VP of Marketing", "Director of Content"
])
# Tech stack signals (company uses these tools = warmer lead)
tech_stack_signals: list[str] = field(default_factory=lambda: [
"HubSpot", "Webflow", "Shopify", "Notion"
])
# Growth signals (recently hired, funding, job postings)
require_growth_signal: bool = True
# Exclusions
exclude_domains: list[str] = field(default_factory=list)
exclude_companies: list[str] = field(default_factory=list)
# Your ICP
ICP = ICPCriteria(
industries=["Digital Agency", "Content Studio", "Media Company"],
employee_range=(5, 200),
job_titles=["Founder", "CEO", "Creative Director", "Head of Content"],
tech_stack_signals=["Adobe Premiere", "Final Cut", "CapCut"],
)
Scraping Apollo.io via API
Apollo has a generous API tier (free: 50 exports/month, paid: unlimited). This is the fastest source for contact data:
import httpx
import time
from typing import Generator
APOLLO_API_KEY = "your_apollo_api_key"
def search_apollo_people(icp: ICPCriteria, pages: int = 10) -> Generator[dict, None, None]:
"""
Search Apollo for contacts matching ICP. Yields contact records.
Free tier: 50 exports/month. Paid: unlimited.
"""
base_url = "https://api.apollo.io/v1/mixed_people/search"
for page in range(1, pages + 1):
payload = {
"api_key": APOLLO_API_KEY,
"page": page,
"per_page": 25,
"person_titles": icp.job_titles,
"organization_industry_tag_ids": [],
"organization_num_employees_ranges": [
f"{icp.employee_range[0]},{icp.employee_range[1]}"
],
"person_locations": icp.locations,
"contact_email_status": ["verified", "likely to engage"],
}
resp = httpx.post(base_url, json=payload, timeout=30)
data = resp.json()
if not data.get("people"):
break
for person in data["people"]:
yield {
"first_name": person.get("first_name"),
"last_name": person.get("last_name"),
"email": person.get("email"),
"title": person.get("title"),
"company": person.get("organization", {}).get("name"),
"linkedin_url": person.get("linkedin_url"),
"company_domain": person.get("organization", {}).get("primary_domain"),
"employee_count": person.get("organization", {}).get("num_employees"),
"source": "apollo"
}
time.sleep(0.5) # respect rate limits
LinkedIn Scraping with Playwright
For leads not in Apollo, or for enriching with LinkedIn-specific signals:
from playwright.async_api import async_playwright
import asyncio
async def scrape_linkedin_search(query: str, session_file: str, max_results: int = 50):
"""
Search LinkedIn and extract profile data.
Requires a saved LinkedIn session (log in manually once).
"""
leads = []
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(storage_state=session_file)
page = await context.new_page()
# LinkedIn people search
search_url = f"https://www.linkedin.com/search/results/people/?keywords={query}&origin=GLOBAL_SEARCH_HEADER"
await page.goto(search_url)
await page.wait_for_load_state("networkidle")
results_seen = 0
while results_seen < max_results:
# Extract current page results
profiles = await page.query_selector_all('.entity-result__item')
for profile in profiles:
name_el = await profile.query_selector('.entity-result__title-text')
title_el = await profile.query_selector('.entity-result__primary-subtitle')
company_el = await profile.query_selector('.entity-result__secondary-subtitle')
link_el = await profile.query_selector('a.app-aware-link')
if name_el:
leads.append({
"name": await name_el.inner_text(),
"title": await title_el.inner_text() if title_el else "",
"company": await company_el.inner_text() if company_el else "",
"linkedin_url": await link_el.get_attribute('href') if link_el else "",
"source": "linkedin"
})
results_seen += 1
# Next page
next_btn = page.locator('button[aria-label="Next"]')
if not await next_btn.is_visible():
break
await next_btn.click()
await asyncio.sleep(2) # avoid rate limiting
await browser.close()
return leads
AI Enrichment with Company Context
Raw lead data isn't enough. You need context to write a relevant opener. Use an LLM to generate company summaries from web searches:
import openai
import httpx
client = openai.OpenAI()
async def enrich_lead(lead: dict) -> dict:
"""Add AI-generated context to a lead record."""
company = lead.get("company", "")
domain = lead.get("company_domain", "")
# 1. Fetch recent news / company description
search_results = await web_search_company(company, domain)
# 2. Generate context summary
prompt = f"""
Company: {company}
Website: {domain}
Contact: {lead.get('first_name')} {lead.get('last_name')}, {lead.get('title')}
Search results:
{search_results[0:2000]}
Write 2-3 sentences summarizing:
1. What this company does
2. What pain points our AI automation service could solve for them
3. One specific hook for an outreach email
Be specific and factual. No fluff.
"""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
max_tokens=200,
temperature=0.3
)
lead["ai_context"] = response.choices[0].message.content
lead["enriched"] = True
return lead
async def web_search_company(company: str, domain: str) -> str:
"""Quick web search for company context."""
# Use a search API (Brave, SerpAPI, etc.)
resp = await httpx.AsyncClient().get(
"https://api.search.brave.com/res/v1/web/search",
params={"q": f"{company} {domain} about services"},
headers={"Accept": "application/json", "X-Subscription-Token": "YOUR_KEY"},
timeout=10
)
results = resp.json().get("web", {}).get("results", [])
return "\n".join(r.get("description", "") for r in results[0:3])
Lead Scoring
Rank leads by fit and intent signals:
def score_lead(lead: dict, icp: ICPCriteria) -> int:
score = 0
# Title match
title = (lead.get("title") or "").lower()
for preferred_title in icp.job_titles:
if preferred_title.lower() in title:
score += 30
break
# Company size fit
employees = lead.get("employee_count", 0) or 0
if icp.employee_range[0] <= employees <= icp.employee_range[1]:
score += 20
# Email verified
if lead.get("email") and lead.get("email_status") == "verified":
score += 25
# LinkedIn URL present
if lead.get("linkedin_url"):
score += 10
# AI context generated (enrichment succeeded)
if lead.get("ai_context"):
score += 15
return score
def rank_leads(leads: list[dict]) -> list[dict]:
for lead in leads:
lead["score"] = score_lead(lead, ICP)
return sorted(leads, key=lambda x: x["score"], reverse=True)
Full Pipeline Run
import csv
from datetime import datetime
async def run_lead_pipeline():
all_leads = []
# Source 1: Apollo
print("Scraping Apollo...")
for lead in search_apollo_people(ICP, pages=12):
all_leads.append(lead)
# Source 2: LinkedIn
print("Scraping LinkedIn...")
li_leads = await scrape_linkedin_search("content marketing agency founder",
"sessions/linkedin_main.json", max_results=100)
all_leads.extend(li_leads)
# Dedup by email + LinkedIn URL
seen = set()
deduped = []
for lead in all_leads:
key = lead.get("email") or lead.get("linkedin_url")
if key and key not in seen:
seen.add(key)
deduped.append(lead)
print(f"After dedup: {len(deduped)} leads")
# Enrich (batch with rate limiting)
print("Enriching leads...")
enriched = []
for i, lead in enumerate(deduped[0:100]): # enrich top 100
enriched_lead = await enrich_lead(lead)
enriched.append(enriched_lead)
if i % 10 == 0:
print(f" Enriched {i+1}/100...")
await asyncio.sleep(0.3)
# Score and rank
ranked = rank_leads(enriched)
# Export to CSV
filename = f"leads_{datetime.now().strftime('%Y-%m-%d')}.csv"
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=[
"first_name", "last_name", "email", "title", "company",
"linkedin_url", "score", "ai_context"
])
writer.writeheader()
writer.writerows(ranked)
print(f"✓ Exported {len(ranked)} ranked leads to {filename}")
return ranked
asyncio.run(run_lead_pipeline())
Running this daily gives you a fresh batch of scored, enriched leads in your outreach queue—ready for personalized emails or LinkedIn connection requests.
The NEPA AI Lead Gen Machine packages this complete pipeline with Apollo and LinkedIn connectors, automatic CRM export, dedup against your Obsidian vault leads database, and AI-generated personalized openers for the top 20 leads.
→ Get the Lead Gen Machine at /shop/lead-gen-machine
300 leads/month. Minimal manual work.



