goonpedo crawl script
🧩 Syntax:
#!/usr/bin/env python3
"""
YouTube Channel Crawler and Keyword/Regex Matcher
Usage:
------
python yt_pedo_finder_v1.0.py <channel_url_or_id> --api-key <YOUR_API_KEY> [options]
Positional Arguments:
channel Starting YouTube channel URL or ID
Required Arguments:
--api-key Your YouTube Data API v3 key
Optional Arguments:
--max-depth Maximum recursion depth for crawling channels (default: 2)
--max-videos Maximum number of videos to fetch per channel (default: 5)
Features:
---------
- Recursively crawl a starting channel and its commenters’ channels
- Match keywords/regexes in:
* Channel name
* Channel description
* Top-level comments on videos
- Records matched channels and matched text
- Generates optional reports:
* CSV
* HTML
- Includes channels without matches in reports
- Matches are case-insensitive
Examples:
---------
# Crawl a channel and get all matches (with default depth and videos)
python yt_pedo_finder_v1.0.py https://www.youtube.com/@example --api-key YOUR_KEY
# Crawl a channel with depth 3 and 10 videos per channel
python yt_pedo_finder_v1.0.py UCabcd1234 --api-key YOUR_KEY --max-depth 3 --max-videos 10
"""
import re
import sys
import csv
import argparse
from collections import deque, defaultdict
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# --- CONFIG ---
KEYWORDS = [
r"(?i)\bsession\b", # session
r"\b[a-fA-F0-9]{66}\b", # Matches session IDs
r"(?i)\bteleguard\b", # teleguard
r"(?i)\bmaps?\b", # map / MAP(s)
r"(?i)\bp[ox]do\w*", # pedo, pxdos, pedophilia, pxdocon, etc.
r"(?i)\bl[ox]li\w*", # loli, lxli, lolicon, lxlicon, etc.
r"(?i)\bminors?\b", # minor(s)
r"(?i)\bch[ix]ld(ren)?\b", # (child / chxld) + (ren)?
r"(?i)\bunderage\b", # underage
r"(?i)\bageplay\b", # ageplay
r"(?i)\bgr[ox][ox]m\w*", # groom, groomer, grxxm, grxxming, etc.
r"(?i)\bg[ox][ox]n\w*", # goon, gooner, gxxn, gxxning, etc.
]
REGEXES = [re.compile(pat, re.I) for pat in KEYWORDS]
# ---------------
visited = set()
# matched[channel_id] = {"name":..., "link":..., "matches":[{"type":..., "text":..., "link":...}]}
matched = defaultdict(lambda: {"name": "", "link": "", "matches": []})
def matches(text: str):
return [r.pattern for r in REGEXES if r.search(text or "")]
def get_channel_id(youtube, url_or_id):
if url_or_id.startswith("UC"):
return url_or_id
if "/@" in url_or_id:
handle = url_or_id.split("/@")[1]
resp = youtube.search().list(
part="snippet",
q=f"@{handle}",
type="channel",
maxResults=1
).execute()
if resp["items"]:
return resp["items"][0]["snippet"]["channelId"]
if "channel/" in url_or_id:
return url_or_id.split("channel/")[1]
raise ValueError(f"Could not resolve channel: {url_or_id}")
def get_channel_info(youtube, channel_id):
try:
resp = youtube.channels().list(
part="snippet,contentDetails,statistics",
id=channel_id
).execute()
except HttpError as e:
print(f"[!] Failed to fetch info for {channel_id}: {e}")
return None
items = resp.get("items", [])
if not items:
return None
item = items[0]
return {
"name": item["snippet"].get("title", ""),
"description": item["snippet"].get("description", ""),
"uploads": item["contentDetails"]["relatedPlaylists"].get("uploads"),
"video_count": int(item.get("statistics", {}).get("videoCount", 0)),
"subs": int(item.get("statistics", {}).get("subscriberCount", 0)),
"link": f"https://www.youtube.com/channel/{channel_id}"
}
def get_videos(youtube, playlist_id, max_videos=5):
videos = []
if not playlist_id:
return videos
try:
req = youtube.playlistItems().list(
part="contentDetails",
playlistId=playlist_id,
maxResults=max_videos
)
resp = req.execute()
except HttpError as e:
print(f"[!] Failed to get videos for playlist {playlist_id}: {e}")
return videos
for item in resp.get("items", []):
videos.append(item["contentDetails"]["videoId"])
return videos
def get_commenters(youtube, video_id, max_comments=50):
commenters = []
try:
req = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
maxResults=max_comments,
textFormat="plainText"
)
resp = req.execute()
except HttpError as e:
print(f"[!] Cannot fetch comments for {video_id}: {e}")
return commenters
for item in resp.get("items", []):
snippet = item["snippet"]["topLevelComment"]["snippet"]
text = snippet.get("textDisplay", "")
author_id = snippet.get("authorChannelId", {}).get("value")
if author_id:
commenters.append((author_id, text, f"https://www.youtube.com/watch?v={video_id}&lc={item['id']}"))
return commenters
def crawl(youtube, start_channel, max_depth=2, max_videos=5):
queue = deque([(start_channel, 0)])
while queue:
channel_id, depth = queue.popleft()
if channel_id in visited or depth > max_depth:
continue
visited.add(channel_id)
info = get_channel_info(youtube, channel_id)
if not info:
# If we can't fetch info, still add placeholder name
if channel_id not in matched:
matched[channel_id]["name"] = channel_id
matched[channel_id]["link"] = f"https://www.youtube.com/channel/{channel_id}"
continue
print(f"[+] Visiting {channel_id} ({info['name']}) at depth {depth}")
print(f" Subscribers: {info['subs']}, Videos: {info['video_count']}")
# Ensure matched entry has name and link
matched[channel_id]["name"] = info["name"]
matched[channel_id]["link"] = info["link"]
# Check channel name
if matches(info["name"]):
print(f" -> MATCHED name: {info['name']}")
matched[channel_id]["matches"].append({
"type": "name",
"text": info["name"],
"link": info["link"]
})
# --- Check description once per channel ---
if matches(info["description"]):
print(f" -> MATCHED description: {info['description'][:100]}...")
matched[channel_id]["matches"].append({
"type": "description",
"text": info["description"],
"link": info["link"]
})
uploads = info.get("uploads")
if not uploads or info["video_count"] == 0:
continue
videos = get_videos(youtube, uploads, max_videos=max_videos)
for vid in videos:
commenters = get_commenters(youtube, vid)
for author_id, text, link in commenters:
if matches(text):
# Attempt to get name if not already present
if not matched[author_id]["name"]:
author_info = get_channel_info(youtube, author_id)
matched[author_id]["name"] = author_info["name"] if author_info else author_id
matched[author_id]["link"] = f"https://www.youtube.com/channel/{author_id}"
print(f" -> MATCHED comment in {author_id}: {text[:100]}...")
matched[author_id]["matches"].append({
"type": "comment",
"text": text,
"link": link
})
queue.append((author_id, depth + 1))
return matched
def save_csv(matched, filename="matched_channels.csv"):
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["Channel ID", "Channel Name", "Channel Link", "Match Type", "Matched Text", "Link"])
for ch_id, info in matched.items():
if info["matches"]:
for m in info["matches"]:
writer.writerow([ch_id, info["name"], info["link"], m["type"], m["text"], m["link"]])
else:
# No matches, write nulls
writer.writerow([ch_id, info["name"], info["link"], "", "", ""])
def save_html(matched, filename="matched_channels.html"):
with open(filename, "w", encoding="utf-8") as f:
f.write("<html><body><h1>YouTube Channel Crawl Results</h1>\n")
# Channels with matches
f.write("<h2>Matched Channels</h2>\n")
for ch_id, info in matched.items():
if not info["matches"]:
continue
f.write(f"<h3><a href='{info['link']}'>{info['name']}</a></h3>\n<ul>\n")
for m in info["matches"]:
text = m["text"][:200].replace("<", "<").replace(">", ">")
f.write(f"<li>{m['type']}: <a href='{m['link']}'>{text}</a></li>\n")
f.write("</ul>\n")
# Channels with no matches
no_match_channels = [info for info in matched.values() if not info["matches"]]
if no_match_channels:
f.write("<h2>Channels with no matches</h2>\n<ul>\n")
for info in no_match_channels:
f.write(f"<li><a href='{info['link']}'>{info['name']}</a></li>\n")
f.write("</ul>\n")
f.write("</body></html>")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="YouTube crawling bot using API")
parser.add_argument("channel", help="Starting channel URL or ID")
parser.add_argument("--api-key", required=True, help="YouTube Data API key")
parser.add_argument("--max-depth", type=int, default=2)
parser.add_argument("--max-videos", type=int, default=5)
args = parser.parse_args()
youtube = build("youtube", "v3", developerKey=args.api_key)
start_id = get_channel_id(youtube, args.channel)
results = crawl(youtube, start_id, max_depth=args.max_depth, max_videos=args.max_videos)
# Sort channels by number of matches descending
sorted_results = dict(sorted(results.items(), key=lambda kv: len(kv[1]["matches"]), reverse=True))
print("\n=== Matched Channels ===")
for ch_id, info in sorted_results.items():
print(f"{info['link']} ({len(info['matches'])} matches)")
# Interactive prompts for reports
csv_save = input("\nDo you want to save a CSV report? [y/N]: ").strip().lower()
if csv_save == "y":
csv_filename = input("Enter CSV filename (default: matched_channels.csv): ").strip()
if not csv_filename:
csv_filename = "matched_channels.csv"
save_csv(sorted_results, csv_filename)
print(f"CSV saved to {csv_filename}")
html_save = input("\nDo you want to save an HTML report? [y/N]: ").strip().lower()
if html_save == "y":
html_filename = input("Enter HTML filename (default: matched_channels.html): ").strip()
if not html_filename:
html_filename = "matched_channels.html"
save_html(sorted_results, html_filename)
print(f"HTML saved to {html_filename}")
print("\n=== Done ===")