File size: 1,792 Bytes
f256f51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# filter_news.py (erweitert)
import re
from config import CATEGORY_FILTERS
import logging

logger = logging.getLogger(__name__)

class NewsFilter:
    def __init__(self):
        self.keyword_filters = CATEGORY_FILTERS
        self.blacklist = self.load_wordlist("blacklist.txt")
        self.whitelist = self.load_wordlist("whitelist.txt")

    def load_wordlist(self, filename):
        try:
            with open(f"config/{filename}") as f:
                return [line.strip().lower() for line in f if line.strip()]
        except FileNotFoundError:
            return []

    def filter_articles(self, articles):
        filtered = []
        for article in articles:
            if self.is_blacklisted(article) and not self.is_whitelisted(article):
                continue
            filtered.append(article)
        return self.prioritize_articles(filtered)

    def is_blacklisted(self, article):
        text = f"{article['title']} {article['description']}".lower()
        return any(re.search(rf'\b{word}\b', text) for word in self.blacklist)

    def is_whitelisted(self, article):
        text = f"{article['title']} {article['description']}".lower()
        return any(re.search(rf'\b{word}\b', text) for word in self.whitelist)

    def prioritize_articles(self, articles):
        # Priorisierung nach Kategorie und Keywords
        def sort_key(article):
            score = 0
            category_keywords = self.keyword_filters.get(article["category"], [])
            text = f"{article['title']} {article['description']}".lower()
            for keyword in category_keywords:
                if re.search(rf'\b{keyword.lower()}\b', text):
                    score += 1
            return -score  # Höhere Priorität zuerst
        return sorted(articles, key=sort_key)