Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
import pandas as pd
|
|
|
4 |
|
5 |
class URLValidator:
|
6 |
"""
|
@@ -18,16 +19,19 @@ class URLValidator:
|
|
18 |
response.raise_for_status()
|
19 |
soup = BeautifulSoup(response.text, "html.parser")
|
20 |
return " ".join([p.text for p in soup.find_all("p")])
|
21 |
-
except requests.RequestException:
|
|
|
22 |
return ""
|
23 |
|
24 |
def get_domain_trust(self, url: str) -> int:
|
25 |
""" Simulated function to assess domain trust. """
|
26 |
-
return len(url) % 5 + 1 # Mock domain trust rating (1-5)
|
27 |
|
28 |
def compute_similarity_score(self, user_query: str, content: str) -> int:
|
29 |
""" Simulated function to compute similarity between user query and content. """
|
30 |
-
|
|
|
|
|
31 |
|
32 |
def rate_url_validity(self, user_query: str, url: str) -> int:
|
33 |
""" Evaluates webpage credibility based on multiple scores. """
|
@@ -73,7 +77,7 @@ validator = URLValidator()
|
|
73 |
data_rows = []
|
74 |
for query, url in zip(sample_queries, sample_urls):
|
75 |
func_rating = validator.rate_url_validity(query, url)
|
76 |
-
custom_rating = func_rating + 1
|
77 |
data_rows.append([query, url, func_rating, custom_rating])
|
78 |
|
79 |
# Create DataFrame and Save to CSV
|
@@ -81,4 +85,8 @@ csv_filename = "url_validation_results.csv"
|
|
81 |
df = pd.DataFrame(data_rows, columns=["user_prompt", "url_to_check", "func_rating", "custom_rating"])
|
82 |
df.to_csv(csv_filename, index=False)
|
83 |
|
84 |
-
print(f"CSV file '{csv_filename}' has been created successfully!")
|
|
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
import pandas as pd
|
4 |
+
import time
|
5 |
|
6 |
class URLValidator:
|
7 |
"""
|
|
|
19 |
response.raise_for_status()
|
20 |
soup = BeautifulSoup(response.text, "html.parser")
|
21 |
return " ".join([p.text for p in soup.find_all("p")])
|
22 |
+
except requests.RequestException as e:
|
23 |
+
print(f"Error fetching URL: {url} - {e}")
|
24 |
return ""
|
25 |
|
26 |
def get_domain_trust(self, url: str) -> int:
|
27 |
""" Simulated function to assess domain trust. """
|
28 |
+
return (len(url) % 5) + 1 # Mock domain trust rating (1-5)
|
29 |
|
30 |
def compute_similarity_score(self, user_query: str, content: str) -> int:
|
31 |
""" Simulated function to compute similarity between user query and content. """
|
32 |
+
if not content:
|
33 |
+
return 1 # If no content, assign lowest rating
|
34 |
+
return (len(user_query) % 5) + 1 # Mock similarity rating (1-5)
|
35 |
|
36 |
def rate_url_validity(self, user_query: str, url: str) -> int:
|
37 |
""" Evaluates webpage credibility based on multiple scores. """
|
|
|
77 |
data_rows = []
|
78 |
for query, url in zip(sample_queries, sample_urls):
|
79 |
func_rating = validator.rate_url_validity(query, url)
|
80 |
+
custom_rating = min(func_rating + 1, 5) # Adjusted user rating, max 5
|
81 |
data_rows.append([query, url, func_rating, custom_rating])
|
82 |
|
83 |
# Create DataFrame and Save to CSV
|
|
|
85 |
df = pd.DataFrame(data_rows, columns=["user_prompt", "url_to_check", "func_rating", "custom_rating"])
|
86 |
df.to_csv(csv_filename, index=False)
|
87 |
|
88 |
+
print(f"✅ CSV file '{csv_filename}' has been created successfully!")
|
89 |
+
|
90 |
+
# Keep the app running so Hugging Face Space does not stop
|
91 |
+
while True:
|
92 |
+
time.sleep(60) # Keep running indefinitely
|