DronA23 commited on
Commit
d670adf
·
verified ·
1 Parent(s): 9312f01

Upload Deliverable2.py

Browse files
Files changed (1) hide show
  1. Deliverable2.py +101 -0
Deliverable2.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled2.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1UPM7vEPoqKmrXRZqw6b0A2nri9S6mawa
8
+ """
9
+
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ from sentence_transformers import SentenceTransformer, util
13
+ from transformers import pipeline
14
+
15
+ class URLValidator:
16
+ def __init__(self):
17
+ self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
18
+ self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection")
19
+ self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
20
+
21
+ def fetch_page_content(self, url: str) -> str:
22
+ try:
23
+ response = requests.get(url, timeout=10)
24
+ response.raise_for_status()
25
+ soup = BeautifulSoup(response.text, "html.parser")
26
+ return " ".join([p.text for p in soup.find_all("p")])
27
+ except requests.RequestException:
28
+ return ""
29
+
30
+ def compute_similarity_score(self, user_query: str, content: str) -> int:
31
+ if not content:
32
+ return 0
33
+ return int(util.pytorch_cos_sim(self.similarity_model.encode(user_query), self.similarity_model.encode(content)).item() * 100)
34
+
35
+ def detect_bias(self, content: str) -> int:
36
+ if not content:
37
+ return 50
38
+ sentiment_result = self.sentiment_analyzer(content[:512])[0]
39
+ return 100 if sentiment_result["label"] == "POSITIVE" else 50 if sentiment_result["label"] == "NEUTRAL" else 30
40
+
41
+ def rate_url_validity(self, user_query: str, url: str) -> dict:
42
+ content = self.fetch_page_content(url)
43
+ similarity_score = self.compute_similarity_score(user_query, content)
44
+ bias_score = self.detect_bias(content)
45
+ return {
46
+ "Query": user_query,
47
+ "URL": url,
48
+ "Content Relevance": similarity_score,
49
+ "Bias Score": bias_score,
50
+ }
51
+
52
+ queries_urls = [
53
+ ("Climate change effects", "https://www.nationalgeographic.com/environment/article/climate-change-overview"),
54
+ ("COVID-19 vaccine effectiveness", "https://www.cdc.gov/coronavirus/2019-ncov/vaccines/effectiveness.html"),
55
+ ("Latest AI advancements", "https://www.technologyreview.com/topic/artificial-intelligence"),
56
+ ("Stock market trends", "https://www.bloomberg.com/markets"),
57
+ ("Healthy diet tips", "https://www.healthline.com/nutrition/healthy-eating-tips"),
58
+ ("Space exploration missions", "https://www.nasa.gov/missions"),
59
+ ("Electric vehicle benefits", "https://www.tesla.com/benefits"),
60
+ ("History of the internet", "https://www.history.com/topics/inventions/history-of-the-internet"),
61
+ ("Python programming tutorials", "https://realpython.com"),
62
+ ("Mental health awareness", "https://www.who.int/news-room/fact-sheets/detail/mental-health-strengthening-our-response")
63
+ ]
64
+
65
+ validator = URLValidator()
66
+ results = [validator.rate_url_validity(query, url) for query, url in queries_urls]
67
+
68
+ for result in results:
69
+ print(result)
70
+
71
+ # Generate formatted output for the 10 predefined queries and URLs
72
+ queries_urls = [
73
+ ("Climate change effects", "https://www.nationalgeographic.com/environment/article/climate-change-overview"),
74
+ ("COVID-19 vaccine effectiveness", "https://www.cdc.gov/coronavirus/2019-ncov/vaccines/effectiveness.html"),
75
+ ("Latest AI advancements", "https://www.technologyreview.com/topic/artificial-intelligence"),
76
+ ("Stock market trends", "https://www.bloomberg.com/markets"),
77
+ ("Healthy diet tips", "https://www.healthline.com/nutrition/healthy-eating-tips"),
78
+ ("Space exploration missions", "https://www.nasa.gov/missions"),
79
+ ("Electric vehicle benefits", "https://www.tesla.com/benefits"),
80
+ ("History of the internet", "https://www.history.com/topics/inventions/history-of-the-internet"),
81
+ ("Python programming tutorials", "https://realpython.com"),
82
+ ("Mental health awareness", "https://www.who.int/news-room/fact-sheets/detail/mental-health-strengthening-our-response")
83
+ ]
84
+
85
+ # Placeholder function ratings for demonstration
86
+ import random
87
+
88
+ formatted_output = []
89
+
90
+ for query, url in queries_urls:
91
+ output_entry = {
92
+ "Query": query,
93
+ "URL": url,
94
+ "Function Rating": random.randint(1, 5), # Simulated rating
95
+ "Custom Rating": random.randint(1, 5) # Simulated rating
96
+ }
97
+ formatted_output.append(output_entry)
98
+
99
+ # Display the formatted output
100
+ formatted_output
101
+