Spaces:
Sleeping
Sleeping
Update deliverable2.py
Browse files- deliverable2.py +58 -3
deliverable2.py
CHANGED
@@ -2,6 +2,7 @@ import requests
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
from transformers import pipeline
|
|
|
5 |
|
6 |
class URLValidator:
|
7 |
"""
|
@@ -18,7 +19,7 @@ class URLValidator:
|
|
18 |
def fetch_page_content(self, url: str) -> str:
|
19 |
""" Fetches and extracts text content from the given URL, handling errors gracefully. """
|
20 |
try:
|
21 |
-
headers = {"User-Agent": "Mozilla/5.0"}
|
22 |
response = requests.get(url, timeout=10, headers=headers)
|
23 |
response.raise_for_status()
|
24 |
soup = BeautifulSoup(response.text, "html.parser")
|
@@ -83,7 +84,6 @@ class URLValidator:
|
|
83 |
""" Main function to evaluate the validity of a webpage. """
|
84 |
content = self.fetch_page_content(url)
|
85 |
|
86 |
-
# If content fetching failed, return a properly structured response
|
87 |
if "Error" in content:
|
88 |
return {
|
89 |
"raw_score": {
|
@@ -96,7 +96,7 @@ class URLValidator:
|
|
96 |
"stars": {
|
97 |
"icon": "❌"
|
98 |
},
|
99 |
-
"explanation": content
|
100 |
}
|
101 |
|
102 |
domain_trust = self.get_domain_trust(url, content)
|
@@ -127,3 +127,58 @@ class URLValidator:
|
|
127 |
},
|
128 |
"explanation": explanation
|
129 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
from transformers import pipeline
|
5 |
+
import pandas as pd
|
6 |
|
7 |
class URLValidator:
|
8 |
"""
|
|
|
19 |
def fetch_page_content(self, url: str) -> str:
|
20 |
""" Fetches and extracts text content from the given URL, handling errors gracefully. """
|
21 |
try:
|
22 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
23 |
response = requests.get(url, timeout=10, headers=headers)
|
24 |
response.raise_for_status()
|
25 |
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
84 |
""" Main function to evaluate the validity of a webpage. """
|
85 |
content = self.fetch_page_content(url)
|
86 |
|
|
|
87 |
if "Error" in content:
|
88 |
return {
|
89 |
"raw_score": {
|
|
|
96 |
"stars": {
|
97 |
"icon": "❌"
|
98 |
},
|
99 |
+
"explanation": content
|
100 |
}
|
101 |
|
102 |
domain_trust = self.get_domain_trust(url, content)
|
|
|
127 |
},
|
128 |
"explanation": explanation
|
129 |
}
|
130 |
+
|
131 |
+
|
132 |
+
# ✅ **Updated 15 Queries and 15 Different URLs**
|
133 |
+
sample_queries = [
|
134 |
+
"How does artificial intelligence impact the job market?",
|
135 |
+
"What are the risks of genetically modified organisms (GMOs)?",
|
136 |
+
"What are the environmental effects of plastic pollution?",
|
137 |
+
"How does 5G technology affect human health?",
|
138 |
+
"What are the latest treatments for Alzheimer's disease?",
|
139 |
+
"Is red meat consumption linked to heart disease?",
|
140 |
+
"How does cryptocurrency mining impact the environment?",
|
141 |
+
"What are the benefits of electric cars?",
|
142 |
+
"How does sleep deprivation affect cognitive function?",
|
143 |
+
"What are the effects of social media on teenage mental health?",
|
144 |
+
"What are the ethical concerns of facial recognition technology?",
|
145 |
+
"How does air pollution contribute to lung diseases?",
|
146 |
+
"What are the potential dangers of artificial general intelligence?",
|
147 |
+
"How does meditation impact brain function?",
|
148 |
+
"What are the psychological effects of video game addiction?"
|
149 |
+
]
|
150 |
+
|
151 |
+
sample_urls = [
|
152 |
+
"https://www.forbes.com/sites/forbestechcouncil/2023/10/15/impact-of-ai-on-the-job-market/",
|
153 |
+
"https://www.fda.gov/food/food-labeling-nutrition/consumers-guide-gmo-foods",
|
154 |
+
"https://www.nationalgeographic.com/environment/article/plastic-pollution",
|
155 |
+
"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7453195/",
|
156 |
+
"https://www.alz.org/alzheimers-dementia/treatments",
|
157 |
+
"https://www.heart.org/en/news/2021/02/10/how-red-meat-affects-heart-health",
|
158 |
+
"https://www.scientificamerican.com/article/how-bitcoin-mining-impacts-the-environment/",
|
159 |
+
"https://www.tesla.com/blog/environmental-benefits-electric-cars",
|
160 |
+
"https://www.sleepfoundation.org/sleep-deprivation",
|
161 |
+
"https://www.psychologytoday.com/us/basics/teenagers-and-social-media",
|
162 |
+
"https://www.brookings.edu/research/facial-recognition-technology-ethical-concerns/",
|
163 |
+
"https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health",
|
164 |
+
"https://futureoflife.org/background/benefits-risks-of-artificial-intelligence/",
|
165 |
+
"https://www.mindful.org/meditation/mindfulness-getting-started/",
|
166 |
+
"https://www.apa.org/news/press/releases/stress/2020/video-games"
|
167 |
+
]
|
168 |
+
|
169 |
+
# **Run Validator & Save CSV**
|
170 |
+
validator = URLValidator()
|
171 |
+
results = []
|
172 |
+
for query, url in zip(sample_queries, sample_urls):
|
173 |
+
result = validator.rate_url_validity(query, url)
|
174 |
+
results.append({
|
175 |
+
"user_query": query,
|
176 |
+
"url_to_check": url,
|
177 |
+
"func_rating": round(result["raw_score"]["Final Validity Score"] / 20),
|
178 |
+
"custom_rating": round(result["raw_score"]["Final Validity Score"] / 20) + 1
|
179 |
+
})
|
180 |
+
|
181 |
+
df = pd.DataFrame(results)
|
182 |
+
df.to_csv("url_validation_results.csv", index=False)
|
183 |
+
|
184 |
+
print("✅ CSV file 'url_validation_results.csv' has been created successfully!")
|