SaiKumar1627 commited on
Commit
841ac1e
·
verified ·
1 Parent(s): cff0d68

Update deliverable2.py

Browse files
Files changed (1) hide show
  1. deliverable2.py +9 -76
deliverable2.py CHANGED
@@ -2,12 +2,11 @@ import requests
2
  from bs4 import BeautifulSoup
3
  from sentence_transformers import SentenceTransformer, util
4
  from transformers import pipeline
5
- import pandas as pd
6
 
7
  class URLValidator:
8
  """
9
- A production-ready URL validation class that evaluates the credibility of a webpage
10
- using multiple factors: domain trust, content relevance, fact-checking, bias detection, and citations.
11
  """
12
 
13
  def __init__(self):
@@ -17,9 +16,9 @@ class URLValidator:
17
  self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
18
 
19
  def fetch_page_content(self, url: str) -> str:
20
- """ Fetches and extracts text content from the given URL, handling errors gracefully. """
21
  try:
22
- headers = {"User-Agent": "Mozilla/5.0"}
23
  response = requests.get(url, timeout=10, headers=headers)
24
  response.raise_for_status()
25
  soup = BeautifulSoup(response.text, "html.parser")
@@ -34,9 +33,9 @@ class URLValidator:
34
  return f"Error: Unable to fetch URL ({str(e)})."
35
 
36
  def get_domain_trust(self, url: str, content: str) -> int:
37
- """ Computes the domain trust score. Uses a mock approach for now. """
38
  if "Error" in content:
39
- return 0 # If page fetch failed, trust is low
40
  return len(url) % 5 + 1 # Mock trust rating (1-5)
41
 
42
  def compute_similarity_score(self, user_query: str, content: str) -> int:
@@ -81,23 +80,12 @@ class URLValidator:
81
  return " ".join(reasons) if reasons else "This source is highly credible and relevant."
82
 
83
  def rate_url_validity(self, user_query: str, url: str):
84
- """ Main function to evaluate the validity of a webpage. """
85
  content = self.fetch_page_content(url)
86
 
 
87
  if "Error" in content:
88
- return {
89
- "raw_score": {
90
- "Domain Trust": 0,
91
- "Content Relevance": 0,
92
- "Fact-Check Score": 0,
93
- "Bias Score": 0,
94
- "Final Validity Score": 0
95
- },
96
- "stars": {
97
- "icon": "❌"
98
- },
99
- "explanation": content
100
- }
101
 
102
  domain_trust = self.get_domain_trust(url, content)
103
  similarity_score = self.compute_similarity_score(user_query, content)
@@ -127,58 +115,3 @@ class URLValidator:
127
  },
128
  "explanation": explanation
129
  }
130
-
131
-
132
- # ✅ **Updated 15 Queries and 15 Different URLs**
133
- sample_queries = [
134
- "How does artificial intelligence impact the job market?",
135
- "What are the risks of genetically modified organisms (GMOs)?",
136
- "What are the environmental effects of plastic pollution?",
137
- "How does 5G technology affect human health?",
138
- "What are the latest treatments for Alzheimer's disease?",
139
- "Is red meat consumption linked to heart disease?",
140
- "How does cryptocurrency mining impact the environment?",
141
- "What are the benefits of electric cars?",
142
- "How does sleep deprivation affect cognitive function?",
143
- "What are the effects of social media on teenage mental health?",
144
- "What are the ethical concerns of facial recognition technology?",
145
- "How does air pollution contribute to lung diseases?",
146
- "What are the potential dangers of artificial general intelligence?",
147
- "How does meditation impact brain function?",
148
- "What are the psychological effects of video game addiction?"
149
- ]
150
-
151
- sample_urls = [
152
- "https://www.forbes.com/sites/forbestechcouncil/2023/10/15/impact-of-ai-on-the-job-market/",
153
- "https://www.fda.gov/food/food-labeling-nutrition/consumers-guide-gmo-foods",
154
- "https://www.nationalgeographic.com/environment/article/plastic-pollution",
155
- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7453195/",
156
- "https://www.alz.org/alzheimers-dementia/treatments",
157
- "https://www.heart.org/en/news/2021/02/10/how-red-meat-affects-heart-health",
158
- "https://www.scientificamerican.com/article/how-bitcoin-mining-impacts-the-environment/",
159
- "https://www.tesla.com/blog/environmental-benefits-electric-cars",
160
- "https://www.sleepfoundation.org/sleep-deprivation",
161
- "https://www.psychologytoday.com/us/basics/teenagers-and-social-media",
162
- "https://www.brookings.edu/research/facial-recognition-technology-ethical-concerns/",
163
- "https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health",
164
- "https://futureoflife.org/background/benefits-risks-of-artificial-intelligence/",
165
- "https://www.mindful.org/meditation/mindfulness-getting-started/",
166
- "https://www.apa.org/news/press/releases/stress/2020/video-games"
167
- ]
168
-
169
- # **Run Validator & Save CSV**
170
- validator = URLValidator()
171
- results = []
172
- for query, url in zip(sample_queries, sample_urls):
173
- result = validator.rate_url_validity(query, url)
174
- results.append({
175
- "user_query": query,
176
- "url_to_check": url,
177
- "func_rating": round(result["raw_score"]["Final Validity Score"] / 20),
178
- "custom_rating": round(result["raw_score"]["Final Validity Score"] / 20) + 1
179
- })
180
-
181
- df = pd.DataFrame(results)
182
- df.to_csv("url_validation_results.csv", index=False)
183
-
184
- print("✅ CSV file 'url_validation_results.csv' has been created successfully!")
 
2
  from bs4 import BeautifulSoup
3
  from sentence_transformers import SentenceTransformer, util
4
  from transformers import pipeline
 
5
 
6
  class URLValidator:
7
  """
8
+ URL Validator class that evaluates the credibility of a webpage
9
+ using domain trust, content relevance, fact-checking, bias detection, and citations.
10
  """
11
 
12
  def __init__(self):
 
16
  self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
17
 
18
  def fetch_page_content(self, url: str) -> str:
19
+ """ Fetches and extracts text content from the given URL. """
20
  try:
21
+ headers = {"User-Agent": "Mozilla/5.0"} # Helps bypass some bot protections
22
  response = requests.get(url, timeout=10, headers=headers)
23
  response.raise_for_status()
24
  soup = BeautifulSoup(response.text, "html.parser")
 
33
  return f"Error: Unable to fetch URL ({str(e)})."
34
 
35
  def get_domain_trust(self, url: str, content: str) -> int:
36
+ """ Simulated function to assess domain trust. """
37
  if "Error" in content:
38
+ return 0
39
  return len(url) % 5 + 1 # Mock trust rating (1-5)
40
 
41
  def compute_similarity_score(self, user_query: str, content: str) -> int:
 
80
  return " ".join(reasons) if reasons else "This source is highly credible and relevant."
81
 
82
  def rate_url_validity(self, user_query: str, url: str):
83
+ """ Main function to evaluate the validity of a webpage. """
84
  content = self.fetch_page_content(url)
85
 
86
+ # Handle errors
87
  if "Error" in content:
88
+ return {"Validation Error": content}
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  domain_trust = self.get_domain_trust(url, content)
91
  similarity_score = self.compute_similarity_score(user_query, content)
 
115
  },
116
  "explanation": explanation
117
  }