import requests from bs4 import BeautifulSoup import re import json import logging from urllib.parse import urljoin # Konfiguration CONFIG = { 'source_url': 'https://www.2ix2.com/', 'output_file': 'channels_config.py', 'container_selector': 'div.moviefilm', # Angepasst nach der bereitgestellten Struktur 'link_selector': 'a', # Angepasst nach der bereitgestellten Struktur 'name_clean_regex': r'(Live|Stream|HD)$', # Bereinigt Channel-Namen 'base_url': 'https://www.2ix2.com' } # Logger einrichten logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('channel_scraper.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def fetch_page(url): """Läd die Webseite herunter""" try: response = requests.get(url, timeout=10) response.raise_for_status() return response.text except Exception as e: logger.error(f"Fehler beim Laden der Seite: {str(e)}") return None def extract_channels(html): """Extrahiert Channel-Links aus der HTML-Struktur""" soup = BeautifulSoup(html, 'html.parser') channel_container = soup.select(CONFIG['container_selector']) if not channel_container: logger.error("Channel-Container nicht gefunden!") return [] channels = [] for container in channel_container: try: link = container.select_one(CONFIG['link_selector']) if not link: continue channel_name = link.text.strip() relative_url = link['href'] # Bereinige den Channel-Namen clean_name = re.sub(CONFIG['name_clean_regex'], '', channel_name).strip() short_name = clean_name.split()[0] # Erster Teil des Namens # Vollständige URL erstellen full_url = urljoin(CONFIG['base_url'], relative_url) channels.append({ 'name': f"{clean_name}", 'url': full_url, 'output_name': short_name }) except Exception as e: logger.warning(f"Fehler bei Link-Verarbeitung: {str(e)}") continue return channels def save_config(channels): """Speichert die Channel-Liste im Python-Format""" with open(CONFIG['output_file'], 'w') as f: f.write("STREAM_SOURCES = [\n") for channel in channels: f.write(f" {{\n") f.write(f" 'name': {json.dumps(channel['name'])},\n") f.write(f" 'url': {json.dumps(channel['url'])},\n") f.write(f" 'output_name': {json.dumps(channel['output_name'])},\n") f.write(f" }},\n") f.write("]\n") def main(): logger.info(f"Starte Scraping von {CONFIG['source_url']}") html = fetch_page(CONFIG['source_url']) if not html: logger.error("Scraping abgebrochen") return channels = extract_channels(html) if not channels: logger.error("Keine Channels gefunden") return logger.info(f"Gefundene Channels: {len(channels)}") save_config(channels) logger.info(f"Konfiguration gespeichert in {CONFIG['output_file']}") if __name__ == "__main__": main()