import os import streamlit as st import requests from bs4 import BeautifulSoup from sentence_transformers import SentenceTransformer, util from transformers import pipeline class URLValidator: """ A production-ready URL validation class that evaluates the credibility of a webpage using multiple factors: domain trust, content relevance, fact-checking, bias detection, and citations. """ def __init__(self): # SerpAPI Key self.serpapi_key = os.getenv("SERPAPI_API_KEY") # Load models once to avoid redundant API calls self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection") self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment") def fetch_page_content(self, url: str) -> str: """ Fetches and extracts text content from the given URL. """ try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") return " ".join([p.text for p in soup.find_all("p")]) # Extract paragraph text except requests.RequestException: return "" # Fail gracefully by returning an empty string def get_domain_trust(self, url: str, content: str) -> int: """ Computes the domain trust score based on available data sources. """ trust_scores = [] # Hugging Face Fake News Detector if content: try: trust_scores.append(self.get_domain_trust_huggingface(content)) except: pass # Compute final score (average of available scores) return int(sum(trust_scores) / len(trust_scores)) if trust_scores else 50 def get_domain_trust_huggingface(self, content: str) -> int: """ Uses a Hugging Face fake news detection model to assess credibility. """ if not content: return 50 # Default score if no cont