SaiKumar1627 commited on
Commit
11e62a5
·
verified ·
1 Parent(s): afc89a3

Update deliverable2.py

Browse files
Files changed (1) hide show
  1. deliverable2.py +49 -2
deliverable2.py CHANGED
@@ -1,5 +1,6 @@
1
  import requests
2
  from bs4 import BeautifulSoup
 
3
  from sentence_transformers import SentenceTransformer, util
4
  from transformers import pipeline
5
 
@@ -18,7 +19,7 @@ class URLValidator:
18
  def fetch_page_content(self, url: str) -> str:
19
  """ Fetches and extracts text content from the given URL, handling errors gracefully. """
20
  try:
21
- headers = {"User-Agent": "Mozilla/5.0"} # Helps bypass some bot protections
22
  response = requests.get(url, timeout=10, headers=headers)
23
  response.raise_for_status()
24
  soup = BeautifulSoup(response.text, "html.parser")
@@ -81,7 +82,7 @@ class URLValidator:
81
 
82
  def rate_url_validity(self, user_query: str, url: str):
83
  """ Main function to evaluate the validity of a webpage. """
84
- content = self.fetch_page_content(url) # ✅ Properly indented and referenced with self
85
 
86
  # Handle errors
87
  if "Error" in content:
@@ -115,3 +116,49 @@ class URLValidator:
115
  },
116
  "explanation": explanation
117
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
+ import pandas as pd
4
  from sentence_transformers import SentenceTransformer, util
5
  from transformers import pipeline
6
 
 
19
  def fetch_page_content(self, url: str) -> str:
20
  """ Fetches and extracts text content from the given URL, handling errors gracefully. """
21
  try:
22
+ headers = {"User-Agent": "Mozilla/5.0"}
23
  response = requests.get(url, timeout=10, headers=headers)
24
  response.raise_for_status()
25
  soup = BeautifulSoup(response.text, "html.parser")
 
82
 
83
  def rate_url_validity(self, user_query: str, url: str):
84
  """ Main function to evaluate the validity of a webpage. """
85
+ content = self.fetch_page_content(url)
86
 
87
  # Handle errors
88
  if "Error" in content:
 
116
  },
117
  "explanation": explanation
118
  }
119
+
120
+
121
+ # **✅ Sample Queries and URLs (10 Each)**
122
+ sample_queries = [
123
+ "How does climate change impact global weather?",
124
+ "What are the latest advancements in AI?",
125
+ "How does diet influence mental health?",
126
+ "What are the effects of space travel on astronauts?",
127
+ "Is cryptocurrency a safe investment?",
128
+ "What are the advantages of renewable energy?",
129
+ "How does deep learning work?",
130
+ "What are the health risks of 5G technology?",
131
+ "Is intermittent fasting effective for weight loss?",
132
+ "How do electric vehicles compare to gas cars?"
133
+ ]
134
+
135
+ sample_urls = [
136
+ "https://www.nationalgeographic.com/environment/article/climate-change",
137
+ "https://www.technologyreview.com/2023/05/01/latest-ai-advancements/",
138
+ "https://www.health.harvard.edu/mind-and-mood/foods-linked-to-better-brainpower",
139
+ "https://www.nasa.gov/hrp/long-term-health-risks-of-space-travel",
140
+ "https://www.investopedia.com/terms/c/cryptocurrency.asp",
141
+ "https://www.energy.gov/eere/renewable-energy",
142
+ "https://www.ibm.com/cloud/deep-learning",
143
+ "https://www.who.int/news-room/questions-and-answers/item/radiation-5g-mobile-networks-and-health",
144
+ "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6167940/",
145
+ "https://www.tesla.com/blog/benefits-of-electric-vehicles"
146
+ ]
147
+
148
+ # **✅ Running the Validator and Saving to CSV**
149
+ validator = URLValidator()
150
+
151
+ data_rows = []
152
+ for query, url in zip(sample_queries, sample_urls):
153
+ result = validator.rate_url_validity(query, url)
154
+ func_rating = round(result["raw_score"]["Final Validity Score"] / 20) # Convert 100-scale to 1-5
155
+ custom_rating = func_rating + 1 if func_rating < 5 else func_rating # User-adjusted rating
156
+
157
+ data_rows.append([query, url, func_rating, custom_rating])
158
+
159
+ # Save to CSV
160
+ csv_filename = "url_validation_results.csv"
161
+ df = pd.DataFrame(data_rows, columns=["user_prompt", "url_to_check", "func_rating", "custom_rating"])
162
+ df.to_csv(csv_filename, index=False)
163
+
164
+ print(f"✅ CSV file '{csv_filename}' has been created successfully!")