Spaces:

SaiKumar1627
/

Project2

Sleeping

App Files Files Community

SaiKumar1627 commited on Feb 18

Commit

841ac1e

verified ·

1 Parent(s): cff0d68

Update deliverable2.py

Browse files

Files changed (1) hide show

deliverable2.py +9 -76

deliverable2.py CHANGED Viewed

@@ -2,12 +2,11 @@ import requests
 from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer, util
 from transformers import pipeline
-import pandas as pd
 class URLValidator:
     """
-    A production-ready URL validation class that evaluates the credibility of a webpage
-    using multiple factors: domain trust, content relevance, fact-checking, bias detection, and citations.
     """
     def __init__(self):
@@ -17,9 +16,9 @@ class URLValidator:
         self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
     def fetch_page_content(self, url: str) -> str:
-        """ Fetches and extracts text content from the given URL, handling errors gracefully. """
         try:
-            headers = {"User-Agent": "Mozilla/5.0"}
             response = requests.get(url, timeout=10, headers=headers)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, "html.parser")
@@ -34,9 +33,9 @@ class URLValidator:
             return f"Error: Unable to fetch URL ({str(e)})."
     def get_domain_trust(self, url: str, content: str) -> int:
-        """ Computes the domain trust score. Uses a mock approach for now. """
         if "Error" in content:
-            return 0  # If page fetch failed, trust is low
         return len(url) % 5 + 1  # Mock trust rating (1-5)
     def compute_similarity_score(self, user_query: str, content: str) -> int:
@@ -81,23 +80,12 @@ class URLValidator:
         return " ".join(reasons) if reasons else "This source is highly credible and relevant."
     def rate_url_validity(self, user_query: str, url: str):
-        """ Main function to evaluate the validity of a webpage. """
         content = self.fetch_page_content(url)
         if "Error" in content:
-            return {
-                "raw_score": {
-                    "Domain Trust": 0,
-                    "Content Relevance": 0,
-                    "Fact-Check Score": 0,
-                    "Bias Score": 0,
-                    "Final Validity Score": 0
-                },
-                "stars": {
-                    "icon": "❌"
-                },
-                "explanation": content
-            }
         domain_trust = self.get_domain_trust(url, content)
         similarity_score = self.compute_similarity_score(user_query, content)
@@ -127,58 +115,3 @@ class URLValidator:
             },
             "explanation": explanation
         }
-# ✅ **Updated 15 Queries and 15 Different URLs**
-sample_queries = [
-    "How does artificial intelligence impact the job market?",
-    "What are the risks of genetically modified organisms (GMOs)?",
-    "What are the environmental effects of plastic pollution?",
-    "How does 5G technology affect human health?",
-    "What are the latest treatments for Alzheimer's disease?",
-    "Is red meat consumption linked to heart disease?",
-    "How does cryptocurrency mining impact the environment?",
-    "What are the benefits of electric cars?",
-    "How does sleep deprivation affect cognitive function?",
-    "What are the effects of social media on teenage mental health?",
-    "What are the ethical concerns of facial recognition technology?",
-    "How does air pollution contribute to lung diseases?",
-    "What are the potential dangers of artificial general intelligence?",
-    "How does meditation impact brain function?",
-    "What are the psychological effects of video game addiction?"
-]
-sample_urls = [
-    "https://www.forbes.com/sites/forbestechcouncil/2023/10/15/impact-of-ai-on-the-job-market/",
-    "https://www.fda.gov/food/food-labeling-nutrition/consumers-guide-gmo-foods",
-    "https://www.nationalgeographic.com/environment/article/plastic-pollution",
-    "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7453195/",
-    "https://www.alz.org/alzheimers-dementia/treatments",
-    "https://www.heart.org/en/news/2021/02/10/how-red-meat-affects-heart-health",
-    "https://www.scientificamerican.com/article/how-bitcoin-mining-impacts-the-environment/",
-    "https://www.tesla.com/blog/environmental-benefits-electric-cars",
-    "https://www.sleepfoundation.org/sleep-deprivation",
-    "https://www.psychologytoday.com/us/basics/teenagers-and-social-media",
-    "https://www.brookings.edu/research/facial-recognition-technology-ethical-concerns/",
-    "https://www.who.int/news-room/fact-sheets/detail/ambient-(outdoor)-air-quality-and-health",
-    "https://futureoflife.org/background/benefits-risks-of-artificial-intelligence/",
-    "https://www.mindful.org/meditation/mindfulness-getting-started/",
-    "https://www.apa.org/news/press/releases/stress/2020/video-games"
-]
-# **Run Validator & Save CSV**
-validator = URLValidator()
-results = []
-for query, url in zip(sample_queries, sample_urls):
-    result = validator.rate_url_validity(query, url)
-    results.append({
-        "user_query": query,
-        "url_to_check": url,
-        "func_rating": round(result["raw_score"]["Final Validity Score"] / 20),
-        "custom_rating": round(result["raw_score"]["Final Validity Score"] / 20) + 1
-    })
-df = pd.DataFrame(results)
-df.to_csv("url_validation_results.csv", index=False)
-print("✅ CSV file 'url_validation_results.csv' has been created successfully!")

 from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer, util
 from transformers import pipeline
 class URLValidator:
     """
+    URL Validator class that evaluates the credibility of a webpage
+    using domain trust, content relevance, fact-checking, bias detection, and citations.
     """
     def __init__(self):
         self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
     def fetch_page_content(self, url: str) -> str:
+        """ Fetches and extracts text content from the given URL. """
         try:
+            headers = {"User-Agent": "Mozilla/5.0"}  # Helps bypass some bot protections
             response = requests.get(url, timeout=10, headers=headers)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, "html.parser")
             return f"Error: Unable to fetch URL ({str(e)})."
     def get_domain_trust(self, url: str, content: str) -> int:
+        """ Simulated function to assess domain trust. """
         if "Error" in content:
+            return 0
         return len(url) % 5 + 1  # Mock trust rating (1-5)
     def compute_similarity_score(self, user_query: str, content: str) -> int:
         return " ".join(reasons) if reasons else "This source is highly credible and relevant."
     def rate_url_validity(self, user_query: str, url: str):
+        """ Main function to evaluate the validity of a webpage. """
         content = self.fetch_page_content(url)
+        # Handle errors
         if "Error" in content:
+            return {"Validation Error": content}
         domain_trust = self.get_domain_trust(url, content)
         similarity_score = self.compute_similarity_score(user_query, content)
             },
             "explanation": explanation
         }