SaiKumar1627 commited on
Commit
25e89f4
·
verified ·
1 Parent(s): babc141

Update deliverable2.py

Browse files
Files changed (1) hide show
  1. deliverable2.py +58 -3
deliverable2.py CHANGED
@@ -2,6 +2,7 @@ import requests
2
  from bs4 import BeautifulSoup
3
  from sentence_transformers import SentenceTransformer, util
4
  from transformers import pipeline
 
5
 
6
  class URLValidator:
7
  """
@@ -18,7 +19,7 @@ class URLValidator:
18
  def fetch_page_content(self, url: str) -> str:
19
  """ Fetches and extracts text content from the given URL, handling errors gracefully. """
20
  try:
21
- headers = {"User-Agent": "Mozilla/5.0"} # Helps bypass some bot protections
22
  response = requests.get(url, timeout=10, headers=headers)
23
  response.raise_for_status()
24
  soup = BeautifulSoup(response.text, "html.parser")
@@ -83,7 +84,6 @@ class URLValidator:
83
  """ Main function to evaluate the validity of a webpage. """
84
  content = self.fetch_page_content(url)
85
 
86
- # If content fetching failed, return a properly structured response
87
  if "Error" in content:
88
  return {
89
  "raw_score": {
@@ -96,7 +96,7 @@ class URLValidator:
96
  "stars": {
97
  "icon": "❌"
98
  },
99
- "explanation": content # Display the error message
100
  }
101
 
102
  domain_trust = self.get_domain_trust(url, content)
@@ -127,3 +127,58 @@ class URLValidator:
127
  },
128
  "explanation": explanation
129
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from bs4 import BeautifulSoup
3
  from sentence_transformers import SentenceTransformer, util
4
  from transformers import pipeline
5
+ import pandas as pd
6
 
7
  class URLValidator:
8
  """
 
19
  def fetch_page_content(self, url: str) -> str:
20
  """ Fetches and extracts text content from the given URL, handling errors gracefully. """
21
  try:
22
+ headers = {"User-Agent": "Mozilla/5.0"}
23
  response = requests.get(url, timeout=10, headers=headers)
24
  response.raise_for_status()
25
  soup = BeautifulSoup(response.text, "html.parser")
 
84
  """ Main function to evaluate the validity of a webpage. """
85
  content = self.fetch_page_content(url)
86
 
 
87
  if "Error" in content:
88
  return {
89
  "raw_score": {
 
96
  "stars": {
97
  "icon": "❌"
98
  },
99
+ "explanation": content
100
  }
101
 
102
  domain_trust = self.get_domain_trust(url, content)
 
127
  },
128
  "explanation": explanation
129
  }
130
+
131
+
132
+ # ✅ **Updated 15 Queries and 15 Different URLs**
133
+ sample_queries = [
134
+ "How does artificial intelligence impact the job market?",
135
+ "What are the risks of genetically modified organisms (GMOs)?",
136
+ "What are the environmental effects of plastic pollution?",
137
+ "How does 5G technology affect human health?",
138
+ "What are the latest treatments for Alzheimer's disease?",
139
+ "Is red meat consumption linked to heart disease?",
140
+ "How does cryptocurrency mining impact the environment?",
141
+ "What are the benefits of electric cars?",
142
+ "How does sleep deprivation affect cognitive function?",
143
+ "What are the effects of social media on teenage mental health?",
144
+ "What are the ethical concerns of facial recognition technology?",
145
+ "How does air pollution contribute to lung diseases?",
146
+ "What are the potential dangers of artificial general intelligence?",
147
+ "How does meditation impact brain function?",
148
+ "What are the psychological effects of video game addiction?"
149
+ ]
150
+
151
+ sample_urls = [
152
+ "https://www.forbes.com/sites/forbestechcouncil/2023/10/15/impact-of-ai-on-the-job-market/",
153
+ "https://www.fda.gov/food/food-labeling-nutrition/consumers-guide-gmo-foods",
154
+ "https://www.nationalgeographic.com/environment/article/plastic-pollution",
155
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7453195/",
156
+ "https://www.alz.org/alzheimers-dementia/treatments",
157
+ "https://www.heart.org/en/news/2021/02/10/how-red-meat-affects-heart-health",
158
+ "https://www.scientificamerican.com/article/how-bitcoin-mining-impacts-the-environment/",
159
+ "https://www.tesla.com/blog/environmental-benefits-electric-cars",
160
+ "https://www.sleepfoundation.org/sleep-deprivation",
161
+ "https://www.psychologytoday.com/us/basics/teenagers-and-social-media",
162
+ "https://www.brookings.edu/research/facial-recognition-technology-ethical-concerns/",
163
+ "https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health",
164
+ "https://futureoflife.org/background/benefits-risks-of-artificial-intelligence/",
165
+ "https://www.mindful.org/meditation/mindfulness-getting-started/",
166
+ "https://www.apa.org/news/press/releases/stress/2020/video-games"
167
+ ]
168
+
169
+ # **Run Validator & Save CSV**
170
+ validator = URLValidator()
171
+ results = []
172
+ for query, url in zip(sample_queries, sample_urls):
173
+ result = validator.rate_url_validity(query, url)
174
+ results.append({
175
+ "user_query": query,
176
+ "url_to_check": url,
177
+ "func_rating": round(result["raw_score"]["Final Validity Score"] / 20),
178
+ "custom_rating": round(result["raw_score"]["Final Validity Score"] / 20) + 1
179
+ })
180
+
181
+ df = pd.DataFrame(results)
182
+ df.to_csv("url_validation_results.csv", index=False)
183
+
184
+ print("✅ CSV file 'url_validation_results.csv' has been created successfully!")