Inara132000 commited on
Commit
2b28117
·
verified ·
1 Parent(s): cdf8cfc

Update deliverable2.py

Browse files
Files changed (1) hide show
  1. deliverable2.py +49 -3
deliverable2.py CHANGED
@@ -26,7 +26,8 @@ class URLValidator:
26
  response.raise_for_status()
27
  soup = BeautifulSoup(response.text, "html.parser")
28
  return " ".join([p.text for p in soup.find_all("p")]) # Extract paragraph text
29
- except requests.RequestException:
 
30
  return "" # Fail gracefully by returning an empty string
31
 
32
  def get_domain_trust(self, url: str, content: str) -> int:
@@ -36,7 +37,8 @@ class URLValidator:
36
  if content:
37
  try:
38
  trust_scores.append(self.get_domain_trust_huggingface(content))
39
- except:
 
40
  pass
41
 
42
  return int(sum(trust_scores) / len(trust_scores)) if trust_scores else 50
@@ -45,4 +47,48 @@ class URLValidator:
45
  """ Uses a Hugging Face fake news detection model to assess credibility. """
46
  if not content:
47
  return 50
48
- result = s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  response.raise_for_status()
27
  soup = BeautifulSoup(response.text, "html.parser")
28
  return " ".join([p.text for p in soup.find_all("p")]) # Extract paragraph text
29
+ except requests.RequestException as e:
30
+ print(f"Error fetching content from {url}: {e}")
31
  return "" # Fail gracefully by returning an empty string
32
 
33
  def get_domain_trust(self, url: str, content: str) -> int:
 
37
  if content:
38
  try:
39
  trust_scores.append(self.get_domain_trust_huggingface(content))
40
+ except Exception as e:
41
+ print(f"Error in domain trust computation: {e}")
42
  pass
43
 
44
  return int(sum(trust_scores) / len(trust_scores)) if trust_scores else 50
 
47
  """ Uses a Hugging Face fake news detection model to assess credibility. """
48
  if not content:
49
  return 50
50
+ try:
51
+ result = self.fake_news_classifier(content)[0]
52
+ if result['label'] == 'FAKE':
53
+ return 20 # Fake content detected
54
+ elif result['label'] == 'REAL':
55
+ return 80 # Real content detected
56
+ else:
57
+ return 50 # Neutral if unsure
58
+ except Exception as e:
59
+ print(f"Error in fake news detection: {e}")
60
+ return 50 # Return neutral if an error occurs
61
+
62
+ def get_content_relevance(self, query: str, content: str) -> float:
63
+ """ Measures content relevance to a query using Sentence Transformers. """
64
+ if not content:
65
+ return 0.0
66
+ query_embedding = self.similarity_model.encode(query, convert_to_tensor=True)
67
+ content_embedding = self.similarity_model.encode(content, convert_to_tensor=True)
68
+ similarity = util.pytorch_cos_sim(query_embedding, content_embedding)
69
+ return float(similarity)
70
+
71
+ def evaluate_url(self, url: str, query: str) -> dict:
72
+ """ Combines various methods to evaluate the overall credibility of a URL. """
73
+ content = self.fetch_page_content(url)
74
+ if not content:
75
+ return {"URL": url, "Validity": "Invalid", "Trust": 50, "Relevance": 0.0}
76
+
77
+ trust = self.get_domain_trust(url, content)
78
+ relevance = self.get_content_relevance(query, content)
79
+
80
+ # Decide if the URL is credible based on trust and relevance thresholds
81
+ validity = "Valid" if trust > 60 and relevance > 0.5 else "Invalid"
82
+
83
+ return {"URL": url, "Validity": validity, "Trust": trust, "Relevance": relevance}
84
+
85
+ # Example usage
86
+ serpapi_key = os.getenv("SERPAPI_API_KEY") # Set your API key
87
+ url_validator = URLValidator(serpapi_key)
88
+
89
+ # Query to evaluate URL relevance
90
+ query = "How blockchain works"
91
+ url = "https://www.ibm.com/topics/what-is-blockchain"
92
+
93
+ evaluation = url_validator.evaluate_url(url, query)
94
+ print(evaluation)