Project2 / app.py
SaiKumar1627's picture
Update app.py
dcd88b4 verified
raw
history blame
3.58 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
class URLValidator:
"""
A production-ready URL validation class that evaluates the credibility of a webpage
using multiple factors: domain trust, content relevance, fact-checking, bias detection, and citations.
"""
def __init__(self):
pass # No external models used in this simplified version
def fetch_page_content(self, url: str) -> str:
""" Fetches and extracts text content from the given URL. """
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
return " ".join([p.text for p in soup.find_all("p")])
except requests.RequestException:
return ""
def get_domain_trust(self, url: str) -> int:
""" Simulated function to assess domain trust. """
return len(url) % 5 + 1 # Mock domain trust rating (1-5)
def compute_similarity_score(self, user_query: str, content: str) -> int:
""" Simulated function to compute similarity between user query and content. """
return len(user_query) % 5 + 1 # Mock similarity rating (1-5)
def rate_url_validity(self, user_query: str, url: str) -> int:
""" Evaluates webpage credibility based on multiple scores. """
content = self.fetch_page_content(url)
domain_trust = self.get_domain_trust(url)
similarity_score = self.compute_similarity_score(user_query, content)
# Final function rating (mock logic)
func_rating = round((domain_trust + similarity_score) / 2)
return func_rating
# Sample Queries and URLs
sample_queries = [
"How does climate change impact global weather?",
"What are the latest advancements in AI?",
"How does diet influence mental health?",
"What are the effects of space travel on astronauts?",
"Is cryptocurrency a safe investment?",
"What are the advantages of renewable energy?",
"How does deep learning work?",
"What are the health risks of 5G technology?",
"Is intermittent fasting effective for weight loss?",
"How do electric vehicles compare to gas cars?"
]
sample_urls = [
"https://www.nationalgeographic.com/environment/article/climate-change",
"https://www.technologyreview.com/2023/05/01/latest-ai-advancements/",
"https://www.health.harvard.edu/mind-and-mood/foods-linked-to-better-brainpower",
"https://www.nasa.gov/hrp/long-term-health-risks-of-space-travel",
"https://www.investopedia.com/terms/c/cryptocurrency.asp",
"https://www.energy.gov/eere/renewable-energy",
"https://www.ibm.com/cloud/deep-learning",
"https://www.who.int/news-room/questions-and-answers/item/radiation-5g-mobile-networks-and-health",
"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6167940/",
"https://www.tesla.com/blog/benefits-of-electric-vehicles"
]
# Initialize Validator
validator = URLValidator()
# Prepare Data
data_rows = []
for query, url in zip(sample_queries, sample_urls):
func_rating = validator.rate_url_validity(query, url)
custom_rating = func_rating + 1 if func_rating < 5 else func_rating # Adjusted user rating
data_rows.append([query, url, func_rating, custom_rating])
# Create DataFrame and Save to CSV
csv_filename = "url_validation_results.csv"
df = pd.DataFrame(data_rows, columns=["user_prompt", "url_to_check", "func_rating", "custom_rating"])
df.to_csv(csv_filename, index=False)
print(f"CSV file '{csv_filename}' has been created successfully!")