mh21492p commited on
Commit
b54514c
·
verified ·
1 Parent(s): 8398c9f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from transformers import pipeline
8
+
9
+ class URLValidator:
10
+ """
11
+ A production-ready URL validation class that evaluates the credibility of a webpage
12
+ using multiple factors: domain trust, content relevance, fact-checking, bias detection, and citations.
13
+ """
14
+
15
+ def __init__(self):
16
+ # SerpAPI Key
17
+ self.serpapi_key = os.getenv("SERPAPI_API_KEY")
18
+
19
+ # Load models once to avoid redundant API calls
20
+ self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
21
+ self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection")
22
+ self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
23
+
24
+ def fetch_page_content(self, url: str) -> str:
25
+ """ Fetches and extracts text content from the given URL. """
26
+ try:
27
+ response = requests.get(url, timeout=10)
28
+ response.raise_for_status()
29
+ soup = BeautifulSoup(response.text, "html.parser")
30
+ return " ".join([p.text for p in soup.find_all("p")]) # Extract paragraph text
31
+ except requests.RequestException:
32
+ return "" # Fail gracefully by returning an empty string
33
+
34
+ def get_domain_trust(self, url: str, content: str) -> int:
35
+ """ Computes the domain trust score based on available data sources. """
36
+ trust_scores = []
37
+
38
+ # Hugging Face Fake News Detector
39
+ if content:
40
+ try:
41
+ trust_scores.append(self.get_domain_trust_huggingface(content))
42
+ except:
43
+ pass
44
+
45
+ # Compute final score (average of available scores)
46
+ return int(sum(trust_scores) / len(trust_scores)) if trust_scores else 50
47
+
48
+ def get_domain_trust_huggingface(self, content: str) -> int:
49
+ """ Uses a Hugging Face fake news detection model to assess credibility. """
50
+ if not content:
51
+ return 50 # Default score if no cont