# filter_news.py (erweitert) import re from config import CATEGORY_FILTERS import logging logger = logging.getLogger(__name__) class NewsFilter: def __init__(self): self.keyword_filters = CATEGORY_FILTERS self.blacklist = self.load_wordlist("blacklist.txt") self.whitelist = self.load_wordlist("whitelist.txt") def load_wordlist(self, filename): try: with open(f"config/{filename}") as f: return [line.strip().lower() for line in f if line.strip()] except FileNotFoundError: return [] def filter_articles(self, articles): filtered = [] for article in articles: if self.is_blacklisted(article) and not self.is_whitelisted(article): continue filtered.append(article) return self.prioritize_articles(filtered) def is_blacklisted(self, article): text = f"{article['title']} {article['description']}".lower() return any(re.search(rf'\b{word}\b', text) for word in self.blacklist) def is_whitelisted(self, article): text = f"{article['title']} {article['description']}".lower() return any(re.search(rf'\b{word}\b', text) for word in self.whitelist) def prioritize_articles(self, articles): # Priorisierung nach Kategorie und Keywords def sort_key(article): score = 0 category_keywords = self.keyword_filters.get(article["category"], []) text = f"{article['title']} {article['description']}".lower() for keyword in category_keywords: if re.search(rf'\b{keyword.lower()}\b', text): score += 1 return -score # Höhere Priorität zuerst return sorted(articles, key=sort_key)