Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- .DS_Store +0 -0
- .devcontainer/devcontainer.json +33 -0
- README.md +85 -12
- fetch_data.py +278 -0
- main_page.py +243 -0
- notebooks/openai_data.ipynb +0 -0
- placeholder.jpeg +0 -0
- requirements.txt +72 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.devcontainer/devcontainer.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "Python 3",
|
3 |
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
4 |
+
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
|
5 |
+
"customizations": {
|
6 |
+
"codespaces": {
|
7 |
+
"openFiles": [
|
8 |
+
"README.md",
|
9 |
+
"main_page.py"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
"vscode": {
|
13 |
+
"settings": {},
|
14 |
+
"extensions": [
|
15 |
+
"ms-python.python",
|
16 |
+
"ms-python.vscode-pylance"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
|
21 |
+
"postAttachCommand": {
|
22 |
+
"server": "streamlit run main_page.py --server.enableCORS false --server.enableXsrfProtection false"
|
23 |
+
},
|
24 |
+
"portsAttributes": {
|
25 |
+
"8501": {
|
26 |
+
"label": "Application",
|
27 |
+
"onAutoForward": "openPreview"
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"forwardPorts": [
|
31 |
+
8501
|
32 |
+
]
|
33 |
+
}
|
README.md
CHANGED
@@ -1,12 +1,85 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Latest AI News
|
3 |
+
|
4 |
+
This repository serves as a curated collection of the most recent and significant developments in artificial intelligence. The goal is to provide AI enthusiasts, researchers, students, and professionals with a centralized resource to stay updated on breakthroughs, research papers, product launches, and industry trends.
|
5 |
+
|
6 |
+
## 📚 Contents
|
7 |
+
|
8 |
+
This repository includes:
|
9 |
+
- News about major AI product releases
|
10 |
+
- Updates on AI industry trends
|
11 |
+
- Insights from AI conferences and events
|
12 |
+
- Resources for learning about new AI technologies
|
13 |
+
|
14 |
+
## 🔗 App Link
|
15 |
+
|
16 |
+
Check out the live application: https://latest-ai-news.streamlit.app/
|
17 |
+
|
18 |
+
## 🎬 Demo
|
19 |
+
|
20 |
+
Watch our video demonstration to see the app in action:
|
21 |
+
|
22 |
+
|
23 |
+
https://github.com/user-attachments/assets/1db2065e-766a-47f5-b23a-56aad8938841
|
24 |
+
|
25 |
+
|
26 |
+
## ✨ Features
|
27 |
+
|
28 |
+
- **Modern Card-Based Interface**: News displayed in an elegant three-column grid layout
|
29 |
+
- **Responsive Design**: Works well on different screen sizes
|
30 |
+
- **RSS Feed Integration**: Automatically pulls content from top AI news sources
|
31 |
+
- **Smart Filtering**: Filter news by date range and source
|
32 |
+
- **Customizable View**: Select specific news sources or view all content
|
33 |
+
- **Real-time Updates**: Refresh data with a single click
|
34 |
+
- **Image Support**: Displays source images when available
|
35 |
+
|
36 |
+
## 🔄 Updates
|
37 |
+
|
38 |
+
This repository is updated regularly to ensure the content remains current and relevant. Check back frequently or watch the repository to be notified of new updates.
|
39 |
+
|
40 |
+
## 💻 Technical Details
|
41 |
+
|
42 |
+
- **Framework**: Built with Streamlit for rapid development and easy deployment
|
43 |
+
- **Data Sources**: Aggregates news from 12+ leading AI publications including:
|
44 |
+
- Google DeepMind Blog
|
45 |
+
- OpenAI News
|
46 |
+
- MIT Technology Review
|
47 |
+
- NVIDIA Blog
|
48 |
+
- Microsoft Research
|
49 |
+
- The Berkeley AI Research Blog
|
50 |
+
- And more...
|
51 |
+
- **Automatic Content Processing**: Handles different date formats and content structures
|
52 |
+
- **Parallel Processing**: Uses concurrent fetching for faster data retrieval
|
53 |
+
|
54 |
+
## 🤝 Contributing
|
55 |
+
|
56 |
+
Contributions are welcome! If you'd like to contribute to this repository:
|
57 |
+
|
58 |
+
1. Fork the repository
|
59 |
+
2. Create a new branch (`git checkout -b feature/your-feature`)
|
60 |
+
3. Commit your changes (`git commit -m 'Add some feature'`)
|
61 |
+
4. Push to the branch (`git push origin feature/your-feature`)
|
62 |
+
5. Open a Pull Request
|
63 |
+
|
64 |
+
### Contribution Guidelines
|
65 |
+
- Ensure all information is accurate and from reliable sources
|
66 |
+
- Include references or links to original sources
|
67 |
+
- Follow the existing formatting structure
|
68 |
+
|
69 |
+
## 📝 License
|
70 |
+
|
71 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
72 |
+
|
73 |
+
## 📞 Contact
|
74 |
+
|
75 |
+
If you have any questions or suggestions, feel free to:
|
76 |
+
- Open an issue in this repository
|
77 |
+
- Contact the repository owner (Buse Koseoglu) through GitHub
|
78 |
+
|
79 |
+
## ⭐ Star This Repository
|
80 |
+
|
81 |
+
If you find this repository useful, please consider giving it a star to help others discover it!
|
82 |
+
|
83 |
+
---
|
84 |
+
|
85 |
+
Created and maintained by [Buse Koseoglu](https://github.com/busekoseoglu)
|
fetch_data.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import feedparser
|
2 |
+
import pandas as pd
|
3 |
+
from datetime import datetime, timedelta
|
4 |
+
import ssl
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
import warnings
|
7 |
+
import concurrent.futures
|
8 |
+
import re
|
9 |
+
import requests
|
10 |
+
|
11 |
+
warnings.filterwarnings("ignore")
|
12 |
+
|
13 |
+
URL = "https://www.deeplearning.ai/the-batch/"
|
14 |
+
|
15 |
+
# Configure SSL once at the module level
|
16 |
+
if hasattr(ssl, '_create_unverified_context'):
|
17 |
+
ssl._create_default_https_context = ssl._create_unverified_context
|
18 |
+
|
19 |
+
def extract_date(date_str):
|
20 |
+
"""Extract date from various formats using regex patterns"""
|
21 |
+
try:
|
22 |
+
# Try different patterns to match various date formats
|
23 |
+
|
24 |
+
# Pattern 1: Standard RFC format like "Mon, 14 Apr 2025 10:00:00 GMT"
|
25 |
+
pattern1 = r'(?:\w+,\s+)?(\d{1,2}\s+\w{3}\s+\d{4})'
|
26 |
+
match = re.search(pattern1, date_str)
|
27 |
+
if match:
|
28 |
+
date_str = match.group(1)
|
29 |
+
return pd.to_datetime(date_str, format='%d %b %Y')
|
30 |
+
|
31 |
+
# Pattern 2: Simple format like "14 Apr 2025"
|
32 |
+
pattern2 = r'(\d{1,2}\s+\w{3}\s+\d{4})'
|
33 |
+
match = re.search(pattern2, date_str)
|
34 |
+
if match:
|
35 |
+
return pd.to_datetime(match.group(1), format='%d %b %Y')
|
36 |
+
|
37 |
+
# Pattern 3: ISO format like "2025-04-14"
|
38 |
+
pattern3 = r'(\d{4}-\d{2}-\d{2})'
|
39 |
+
match = re.search(pattern3, date_str)
|
40 |
+
if match:
|
41 |
+
return pd.to_datetime(match.group(1))
|
42 |
+
|
43 |
+
# Pattern 4: Format like "Mar 12, 2025"
|
44 |
+
pattern4 = r'(\w{3}\s+\d{1,2},\s+\d{4})'
|
45 |
+
match = re.search(pattern4, date_str)
|
46 |
+
if match:
|
47 |
+
return pd.to_datetime(match.group(1), format='%b %d, %Y')
|
48 |
+
|
49 |
+
# If none of the patterns match, return original parsed date
|
50 |
+
return pd.to_datetime(date_str)
|
51 |
+
except:
|
52 |
+
# If all else fails, return NaT
|
53 |
+
return pd.NaT
|
54 |
+
|
55 |
+
def clean_html(text):
|
56 |
+
"""Clean HTML tags from text"""
|
57 |
+
try:
|
58 |
+
soup = BeautifulSoup(text, "html.parser")
|
59 |
+
return soup.get_text()
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error cleaning HTML: {e}")
|
62 |
+
return text
|
63 |
+
|
64 |
+
def extract_image_url(entry, description):
|
65 |
+
"""Extract image URL from RSS entry if available"""
|
66 |
+
try:
|
67 |
+
# Check for media:content
|
68 |
+
if hasattr(entry, 'media_content') and entry.media_content:
|
69 |
+
for media in entry.media_content:
|
70 |
+
if isinstance(media, dict) and 'url' in media:
|
71 |
+
return media['url']
|
72 |
+
|
73 |
+
# Check for media:thumbnail
|
74 |
+
if hasattr(entry, 'media_thumbnail') and entry.media_thumbnail:
|
75 |
+
for media in entry.media_thumbnail:
|
76 |
+
if isinstance(media, dict) and 'url' in media:
|
77 |
+
return media['url']
|
78 |
+
|
79 |
+
# Check for enclosures
|
80 |
+
if hasattr(entry, 'enclosures') and entry.enclosures:
|
81 |
+
for enclosure in entry.enclosures:
|
82 |
+
if isinstance(enclosure, dict) and 'url' in enclosure and enclosure.get('type', '').startswith('image/'):
|
83 |
+
return enclosure['url']
|
84 |
+
|
85 |
+
# Try to extract from description using BeautifulSoup
|
86 |
+
if description:
|
87 |
+
soup = BeautifulSoup(description, "html.parser")
|
88 |
+
|
89 |
+
# First, check meta tags for twitter:image
|
90 |
+
meta_img = soup.find('meta', attrs={'name': 'twitter:image'})
|
91 |
+
if meta_img and meta_img.has_attr('content'):
|
92 |
+
return meta_img['content']
|
93 |
+
|
94 |
+
# Then check for regular img tags
|
95 |
+
img_tag = soup.find('img')
|
96 |
+
if img_tag and img_tag.has_attr('src'):
|
97 |
+
return img_tag['src']
|
98 |
+
|
99 |
+
# Try to extract image URL from HTML
|
100 |
+
img_match = re.search(r'<img[^>]+src=[\'"]([^\'"]+)[\'"]', description)
|
101 |
+
if img_match:
|
102 |
+
return img_match.group(1)
|
103 |
+
|
104 |
+
# No image found
|
105 |
+
return None
|
106 |
+
except Exception as e:
|
107 |
+
print(f"Error extracting image URL: {e}")
|
108 |
+
return None
|
109 |
+
|
110 |
+
def fetch_single_feed(link_source_tuple):
|
111 |
+
"""Fetch a single RSS feed and return its entries"""
|
112 |
+
link, source = link_source_tuple
|
113 |
+
entries = {"Title": [], "Link": [], "Published": [], "Description": [], "Source": [], "Image": []}
|
114 |
+
|
115 |
+
try:
|
116 |
+
feed = feedparser.parse(link)
|
117 |
+
|
118 |
+
for entry in feed.entries:
|
119 |
+
title = entry.get("title", "No Title")
|
120 |
+
link = entry.get("link", "No Link")
|
121 |
+
published = entry.get("published", "No Date")
|
122 |
+
description = entry.get("description", "No Description")
|
123 |
+
|
124 |
+
# Extract image URL
|
125 |
+
image_url = extract_image_url(entry, description)
|
126 |
+
|
127 |
+
entries["Title"].append(title)
|
128 |
+
entries["Link"].append(link)
|
129 |
+
entries["Published"].append(published)
|
130 |
+
entries["Description"].append(description)
|
131 |
+
entries["Source"].append(source)
|
132 |
+
entries["Image"].append(image_url) # Add image URL
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
print(f"Error fetching {link}: {e}")
|
136 |
+
|
137 |
+
return entries
|
138 |
+
|
139 |
+
def fetch_feed(links):
|
140 |
+
"""Fetch multiple RSS feeds in parallel"""
|
141 |
+
all_entries = {"Title": [], "Link": [], "Published": [], "Description": [], "Source": [], "Image": []}
|
142 |
+
|
143 |
+
# Use ThreadPoolExecutor to fetch feeds in parallel
|
144 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
145 |
+
future_to_link = {executor.submit(fetch_single_feed, (link, source)): (link, source)
|
146 |
+
for link, source in links.items()}
|
147 |
+
|
148 |
+
for future in concurrent.futures.as_completed(future_to_link):
|
149 |
+
link, source = future_to_link[future]
|
150 |
+
try:
|
151 |
+
result = future.result()
|
152 |
+
# Merge results into all_entries
|
153 |
+
for key in all_entries:
|
154 |
+
all_entries[key].extend(result[key])
|
155 |
+
except Exception as e:
|
156 |
+
print(f"Exception for {link}: {e}")
|
157 |
+
|
158 |
+
# Create a DataFrame from all entries
|
159 |
+
df = pd.DataFrame(all_entries)
|
160 |
+
return df
|
161 |
+
|
162 |
+
def scrape_the_batch_articles():
|
163 |
+
all_entries = {"Title": [], "Link": [], "Published": [], "Description": [], "Source": [], "Image": []}
|
164 |
+
|
165 |
+
try:
|
166 |
+
res = requests.get(URL)
|
167 |
+
soup = BeautifulSoup(res.text, "html.parser")
|
168 |
+
|
169 |
+
articles = soup.find_all("article")
|
170 |
+
|
171 |
+
for article in articles:
|
172 |
+
# Link
|
173 |
+
link_tag = article.find("a", href=True)
|
174 |
+
link = "https://www.deeplearning.ai" + link_tag["href"] if link_tag else "#"
|
175 |
+
|
176 |
+
# Title
|
177 |
+
title_tag = article.find("h2")
|
178 |
+
title = title_tag.get_text(strip=True) if title_tag else "No title"
|
179 |
+
|
180 |
+
# Summary
|
181 |
+
summary_tag = article.find("div", class_="text-sm")
|
182 |
+
summary = summary_tag.get_text(strip=True) if summary_tag else ""
|
183 |
+
|
184 |
+
# Date (based on div with specific class)
|
185 |
+
date_tag = article.find("div", class_="text-slate-500")
|
186 |
+
date_str = date_tag.get_text(strip=True) if date_tag else ""
|
187 |
+
|
188 |
+
# Image
|
189 |
+
img_tag = article.find("img")
|
190 |
+
image_url = img_tag["src"] if img_tag and img_tag.has_attr("src") else None
|
191 |
+
|
192 |
+
try:
|
193 |
+
parsed_date = datetime.strptime(date_str, "%b %d, %Y")
|
194 |
+
except Exception as e:
|
195 |
+
parsed_date = None
|
196 |
+
|
197 |
+
if parsed_date:
|
198 |
+
all_entries["Title"].append(title)
|
199 |
+
all_entries["Description"].append(summary)
|
200 |
+
all_entries["Link"].append(link)
|
201 |
+
all_entries["Published"].append(date_str)
|
202 |
+
all_entries["Source"].append("deeplearning.ai")
|
203 |
+
all_entries["Image"].append(image_url)
|
204 |
+
|
205 |
+
return pd.DataFrame(all_entries)
|
206 |
+
except Exception as e:
|
207 |
+
print(f"Error scraping The Batch: {e}")
|
208 |
+
return pd.DataFrame()
|
209 |
+
|
210 |
+
def extract_and_clean_data(df):
|
211 |
+
"""Process and clean the feed data"""
|
212 |
+
if df.empty:
|
213 |
+
return df
|
214 |
+
|
215 |
+
try:
|
216 |
+
# Apply the custom date extraction function
|
217 |
+
df['date'] = df['Published'].apply(extract_date)
|
218 |
+
|
219 |
+
# Drop rows with invalid dates
|
220 |
+
df = df.dropna(subset=['date'])
|
221 |
+
|
222 |
+
# Drop the original 'Published' column
|
223 |
+
df.drop(columns=['Published'], inplace=True)
|
224 |
+
|
225 |
+
# Filter for the last 30 days (increased from 7 for more content)
|
226 |
+
today = datetime.now()
|
227 |
+
thirty_days_ago = today - timedelta(days=30)
|
228 |
+
df_filtered = df[(df['date'] >= thirty_days_ago) & (df['date'] <= today)]
|
229 |
+
|
230 |
+
# Sort by date in descending order
|
231 |
+
df_filtered = df_filtered.sort_values(by='date', ascending=False)
|
232 |
+
|
233 |
+
# Clean HTML and limit description length in one step
|
234 |
+
df_filtered['Description'] = df_filtered['Description'].apply(
|
235 |
+
lambda x: clean_html(x)[:500].replace("\n", "")
|
236 |
+
)
|
237 |
+
|
238 |
+
return df_filtered
|
239 |
+
|
240 |
+
except Exception as e:
|
241 |
+
print(f"An error occurred while processing the data: {e}")
|
242 |
+
return pd.DataFrame()
|
243 |
+
|
244 |
+
def main():
|
245 |
+
# RSS links
|
246 |
+
links = {
|
247 |
+
"https://bair.berkeley.edu/blog/feed.xml": "The Berkeley Artificial Intelligence Research Blog",
|
248 |
+
"https://feeds.feedburner.com/nvidiablog": "NVDIA Blog",
|
249 |
+
"https://www.microsoft.com/en-us/research/feed/": "Microsoft Research",
|
250 |
+
"https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml": "Science Daily",
|
251 |
+
"https://research.facebook.com/feed/": "META Research",
|
252 |
+
"https://openai.com/news/rss.xml": "OpenAI News",
|
253 |
+
"https://deepmind.google/blog/feed/basic/": "Google DeepMind Blog",
|
254 |
+
"https://news.mit.edu/rss/topic/artificial-intelligence2": "MIT News - Artificial intelligence",
|
255 |
+
"https://www.technologyreview.com/topic/artificial-intelligence/feed": "MIT Technology Review - Artificial intelligence",
|
256 |
+
"https://www.wired.com/feed/tag/ai/latest/rss": "Wired: Artificial Intelligence Latest",
|
257 |
+
"https://raw.githubusercontent.com/Olshansk/rss-feeds/refs/heads/main/feeds/feed_ollama.xml": "Ollama Blog",
|
258 |
+
"https://newsroom.ibm.com/press-releases-artificial-intelligence?pagetemplate=rss": "IBM - Announcements (Artificial intelligence)"
|
259 |
+
}
|
260 |
+
|
261 |
+
# Fetch data from The Batch
|
262 |
+
batch_df = scrape_the_batch_articles()
|
263 |
+
|
264 |
+
# Fetch data from RSS feeds
|
265 |
+
rss_df = fetch_feed(links)
|
266 |
+
|
267 |
+
# Combine both dataframes
|
268 |
+
combined_df = pd.concat([batch_df, rss_df], ignore_index=True)
|
269 |
+
|
270 |
+
# Process and clean data
|
271 |
+
final_df = extract_and_clean_data(combined_df)
|
272 |
+
|
273 |
+
return final_df
|
274 |
+
|
275 |
+
if __name__ == "__main__":
|
276 |
+
df = main()
|
277 |
+
print(df.head())
|
278 |
+
df.to_excel("ai_news.xlsx")
|
main_page.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from datetime import datetime, timedelta
|
4 |
+
from fetch_data import main
|
5 |
+
from PIL import Image
|
6 |
+
import base64
|
7 |
+
import os
|
8 |
+
|
9 |
+
# Yerel placeholder görüntüyü yüklemek için fonksiyon
|
10 |
+
def get_base64_encoded_image(image_path):
|
11 |
+
with open(image_path, "rb") as img_file:
|
12 |
+
return base64.b64encode(img_file.read()).decode('utf-8')
|
13 |
+
|
14 |
+
# Placeholder görüntü yolu - kendi dosya yolunuzu buraya yazın
|
15 |
+
PLACEHOLDER_IMAGE_PATH = "placeholder.jpeg" # Bu dosyanın Python kodunuzla aynı dizinde olduğunu varsayıyorum
|
16 |
+
|
17 |
+
# Görüntüyü base64 formatına dönüştür (eğer dosya mevcutsa)
|
18 |
+
if os.path.exists(PLACEHOLDER_IMAGE_PATH):
|
19 |
+
img_base64 = get_base64_encoded_image(PLACEHOLDER_IMAGE_PATH)
|
20 |
+
PLACEHOLDER_IMAGE = f"data:image/jpeg;base64,{img_base64}"
|
21 |
+
else:
|
22 |
+
# Dosya bulunamazsa yedek olarak online bir görsel kullan
|
23 |
+
PLACEHOLDER_IMAGE = "https://img.freepik.com/free-vector/artificial-intelligence-ai-robot-server-room-digital-technology-banner_39422-794.jpg"
|
24 |
+
st.warning(f"Placeholder image not found at {PLACEHOLDER_IMAGE_PATH}. Using fallback image.")
|
25 |
+
|
26 |
+
all_sources = ["The Berkeley Artificial Intelligence Research Blog","NVDIA Blog","Microsoft Research","Science Daily","META Research","OpenAI News",
|
27 |
+
"Google DeepMind Blog","MIT News - Artificial intelligence","MIT Technology Review - Artificial intelligence","Wired: Artificial Intelligence Latest",
|
28 |
+
"Ollama Blog","IBM - Announcements (Artificial intelligence)","deeplearning.ai"]
|
29 |
+
|
30 |
+
|
31 |
+
# Use Streamlit's built-in caching
|
32 |
+
@st.cache_data(ttl=60) # Cache for 1 minute
|
33 |
+
def get_data():
|
34 |
+
with st.spinner('Fetching latest AI news...'):
|
35 |
+
return main()
|
36 |
+
|
37 |
+
def run_dashboard():
|
38 |
+
st.title("Latest AI News")
|
39 |
+
|
40 |
+
# Add sidebar for all filters
|
41 |
+
with st.sidebar:
|
42 |
+
st.header("Filter Options")
|
43 |
+
|
44 |
+
# Add a refresh button at the top of sidebar
|
45 |
+
if st.button("Refresh News"):
|
46 |
+
# Clear the cache and get fresh data
|
47 |
+
st.cache_data.clear()
|
48 |
+
st.rerun()
|
49 |
+
|
50 |
+
# Load data with caching
|
51 |
+
try:
|
52 |
+
df = get_data()
|
53 |
+
|
54 |
+
# Check if df is empty
|
55 |
+
if df.empty:
|
56 |
+
st.error("No news data available. Please try refreshing later.")
|
57 |
+
return
|
58 |
+
|
59 |
+
# Get min and max dates
|
60 |
+
min_date = df['date'].min()
|
61 |
+
max_date = df['date'].max()
|
62 |
+
|
63 |
+
# Calculate default date range (last 7 days)
|
64 |
+
default_end_date = max_date
|
65 |
+
default_start_date = default_end_date - timedelta(days=7)
|
66 |
+
if default_start_date < min_date:
|
67 |
+
default_start_date = min_date
|
68 |
+
|
69 |
+
# Continue with sidebar for filters
|
70 |
+
with st.sidebar:
|
71 |
+
# Date filter
|
72 |
+
selected_dates = st.date_input(
|
73 |
+
"Choose dates",
|
74 |
+
value=(default_start_date, default_end_date),
|
75 |
+
min_value=min_date,
|
76 |
+
max_value=max_date
|
77 |
+
)
|
78 |
+
|
79 |
+
# Handle single date selection
|
80 |
+
if len(selected_dates) == 1:
|
81 |
+
start_date = selected_dates[0]
|
82 |
+
end_date = selected_dates[0]
|
83 |
+
else:
|
84 |
+
start_date, end_date = selected_dates
|
85 |
+
|
86 |
+
|
87 |
+
# Get unique sources from predefined list
|
88 |
+
sources = sorted(all_sources)
|
89 |
+
|
90 |
+
# Use multiselect without "All" option
|
91 |
+
selected_sources = st.multiselect(
|
92 |
+
"Choose sources",
|
93 |
+
options=sources
|
94 |
+
)
|
95 |
+
|
96 |
+
|
97 |
+
# Main content area for displaying news
|
98 |
+
# Convert dates to datetime
|
99 |
+
start_date = pd.to_datetime(start_date)
|
100 |
+
end_date = pd.to_datetime(end_date)
|
101 |
+
|
102 |
+
# Filter by date range
|
103 |
+
df_filtered = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
|
104 |
+
|
105 |
+
# Handle source filtering
|
106 |
+
if selected_sources: # If sources are selected
|
107 |
+
# Filter by selected sources
|
108 |
+
df_filtered = df_filtered[df_filtered['Source'].isin(selected_sources)]
|
109 |
+
# If no sources selected, show all (no additional filtering needed)
|
110 |
+
|
111 |
+
# Display results
|
112 |
+
if len(df_filtered) > 0:
|
113 |
+
|
114 |
+
# Apply CSS styling for cards
|
115 |
+
st.markdown("""
|
116 |
+
<style>
|
117 |
+
.news-card {
|
118 |
+
border-radius: 20px;
|
119 |
+
padding: 0;
|
120 |
+
margin-bottom: 20px;
|
121 |
+
background-color: #E8E8E8;
|
122 |
+
height: 480px;
|
123 |
+
overflow: hidden;
|
124 |
+
position: relative;
|
125 |
+
box-shadow: 0 3px 6px rgba(0, 0, 0, 0.1);
|
126 |
+
}
|
127 |
+
.news-image-container {
|
128 |
+
width: 100%;
|
129 |
+
height: 220px;
|
130 |
+
overflow: hidden;
|
131 |
+
display: flex;
|
132 |
+
align-items: center;
|
133 |
+
justify-content: center;
|
134 |
+
padding: 10px 10px 0 10px;
|
135 |
+
}
|
136 |
+
.news-image {
|
137 |
+
width: 100%;
|
138 |
+
height: 100%;
|
139 |
+
object-fit: cover;
|
140 |
+
border-radius: 12px;
|
141 |
+
}
|
142 |
+
.news-content {
|
143 |
+
padding: 12px 15px;
|
144 |
+
}
|
145 |
+
.news-title {
|
146 |
+
font-weight: bold;
|
147 |
+
font-size: 16px;
|
148 |
+
margin-bottom: 10px;
|
149 |
+
color: #000;
|
150 |
+
line-height: 1.3;
|
151 |
+
max-height: 105px;
|
152 |
+
display: -webkit-box;
|
153 |
+
-webkit-line-clamp: 4;
|
154 |
+
-webkit-box-orient: vertical;
|
155 |
+
overflow: hidden;
|
156 |
+
}
|
157 |
+
.news-meta {
|
158 |
+
display: flex;
|
159 |
+
justify-content: space-between;
|
160 |
+
margin-bottom: 12px;
|
161 |
+
align-items: center;
|
162 |
+
border-bottom: 1px solid #ddd;
|
163 |
+
padding-bottom: 8px;
|
164 |
+
}
|
165 |
+
.news-source {
|
166 |
+
color: #555;
|
167 |
+
font-size: 12px;
|
168 |
+
font-style: italic;
|
169 |
+
}
|
170 |
+
.news-date {
|
171 |
+
color: #555;
|
172 |
+
font-size: 12px;
|
173 |
+
text-align: right;
|
174 |
+
font-style: italic;
|
175 |
+
}
|
176 |
+
.news-description {
|
177 |
+
color: #333;
|
178 |
+
font-size: 13px;
|
179 |
+
padding-bottom: 10px;
|
180 |
+
line-height: 1.4;
|
181 |
+
display: -webkit-box;
|
182 |
+
-webkit-line-clamp: 5;
|
183 |
+
-webkit-box-orient: vertical;
|
184 |
+
overflow: hidden;
|
185 |
+
height: 90px;
|
186 |
+
}
|
187 |
+
</style>
|
188 |
+
""", unsafe_allow_html=True)
|
189 |
+
|
190 |
+
# Create 3-column layout
|
191 |
+
num_cols = 3
|
192 |
+
|
193 |
+
# Process news items in groups of 3 for the grid
|
194 |
+
for i in range(0, len(df_filtered), num_cols):
|
195 |
+
cols = st.columns(num_cols)
|
196 |
+
|
197 |
+
# Get the current batch of news items
|
198 |
+
current_batch = df_filtered.iloc[i:i+num_cols]
|
199 |
+
|
200 |
+
# Display each news item in its column
|
201 |
+
for j, (_, row) in enumerate(current_batch.iterrows()):
|
202 |
+
if j < len(cols): # Ensure we have a column for this item
|
203 |
+
with cols[j]:
|
204 |
+
# Eğer kaynak deeplearning.ai ise veya geçerli bir görüntü yoksa, placeholder kullan
|
205 |
+
if row['Source'] == "deeplearning.ai" or not pd.notna(row.get('Image')) or row.get('Image') is None:
|
206 |
+
image_url = PLACEHOLDER_IMAGE
|
207 |
+
else:
|
208 |
+
image_url = row['Image']
|
209 |
+
|
210 |
+
# Format the date
|
211 |
+
date_str = row['date'].strftime('%d %b %Y')
|
212 |
+
|
213 |
+
# Truncate description if it's too long
|
214 |
+
description = row['Description'][:150] + "..." if len(row['Description']) > 150 else row['Description']
|
215 |
+
|
216 |
+
# Display card with HTML
|
217 |
+
html_content = f"""
|
218 |
+
<a href="{row['Link']}" target="_blank" style="text-decoration: none; color: inherit;">
|
219 |
+
<div class="news-card">
|
220 |
+
<div class="news-image-container">
|
221 |
+
<img src="{image_url}" class="news-image" onerror="this.onerror=null;this.src='{PLACEHOLDER_IMAGE}';">
|
222 |
+
</div>
|
223 |
+
<div class="news-content">
|
224 |
+
<div class="news-title">{row['Title']}</div>
|
225 |
+
<div class="news-meta">
|
226 |
+
<div class="news-source">{row['Source']}</div>
|
227 |
+
<div class="news-date">{date_str}</div>
|
228 |
+
</div>
|
229 |
+
<div class="news-description">{description}</div>
|
230 |
+
</div>
|
231 |
+
</div>
|
232 |
+
</a>
|
233 |
+
"""
|
234 |
+
st.markdown(html_content, unsafe_allow_html=True)
|
235 |
+
else:
|
236 |
+
st.warning("No news found with the selected filters. Please adjust your date range or source selection.")
|
237 |
+
|
238 |
+
except Exception as e:
|
239 |
+
st.error(f"An error occurred: {str(e)}")
|
240 |
+
st.info("Try refreshing the data using the button above.")
|
241 |
+
|
242 |
+
if __name__ == '__main__':
|
243 |
+
run_dashboard()
|
notebooks/openai_data.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
placeholder.jpeg
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.5.0
|
2 |
+
appnope==0.1.4
|
3 |
+
asttokens==3.0.0
|
4 |
+
attrs==25.3.0
|
5 |
+
beautifulsoup4==4.13.3
|
6 |
+
blinker==1.9.0
|
7 |
+
bs4==0.0.2
|
8 |
+
cachetools==5.5.2
|
9 |
+
certifi==2025.1.31
|
10 |
+
charset-normalizer==3.4.1
|
11 |
+
click==8.1.8
|
12 |
+
comm==0.2.2
|
13 |
+
debugpy==1.8.14
|
14 |
+
decorator==5.2.1
|
15 |
+
et_xmlfile==2.0.0
|
16 |
+
executing==2.2.0
|
17 |
+
feedparser==6.0.11
|
18 |
+
gitdb==4.0.12
|
19 |
+
GitPython==3.1.44
|
20 |
+
idna==3.10
|
21 |
+
ipykernel==6.29.5
|
22 |
+
ipython==9.1.0
|
23 |
+
ipython_pygments_lexers==1.1.1
|
24 |
+
jedi==0.19.2
|
25 |
+
Jinja2==3.1.6
|
26 |
+
jsonschema==4.23.0
|
27 |
+
jsonschema-specifications==2024.10.1
|
28 |
+
jupyter_client==8.6.3
|
29 |
+
jupyter_core==5.7.2
|
30 |
+
MarkupSafe==3.0.2
|
31 |
+
matplotlib-inline==0.1.7
|
32 |
+
narwhals==1.34.1
|
33 |
+
nest-asyncio==1.6.0
|
34 |
+
numpy==2.2.4
|
35 |
+
oauthlib==3.2.2
|
36 |
+
openpyxl==3.1.5
|
37 |
+
packaging==24.2
|
38 |
+
pandas==2.2.3
|
39 |
+
parso==0.8.4
|
40 |
+
pexpect==4.9.0
|
41 |
+
pillow==11.1.0
|
42 |
+
platformdirs==4.3.7
|
43 |
+
prompt_toolkit==3.0.50
|
44 |
+
protobuf==5.29.4
|
45 |
+
psutil==7.0.0
|
46 |
+
ptyprocess==0.7.0
|
47 |
+
pure_eval==0.2.3
|
48 |
+
pyarrow==19.0.1
|
49 |
+
pydeck==0.9.1
|
50 |
+
Pygments==2.19.1
|
51 |
+
python-dateutil==2.9.0.post0
|
52 |
+
pytz==2025.2
|
53 |
+
pyzmq==26.4.0
|
54 |
+
referencing==0.36.2
|
55 |
+
requests==2.32.3
|
56 |
+
requests-oauthlib==2.0.0
|
57 |
+
rpds-py==0.24.0
|
58 |
+
sgmllib3k==1.0.0
|
59 |
+
six==1.17.0
|
60 |
+
smmap==5.0.2
|
61 |
+
soupsieve==2.6
|
62 |
+
stack-data==0.6.3
|
63 |
+
streamlit==1.44.1
|
64 |
+
tenacity==9.1.2
|
65 |
+
toml==0.10.2
|
66 |
+
tornado==6.4.2
|
67 |
+
traitlets==5.14.3
|
68 |
+
tweepy==4.15.0
|
69 |
+
typing_extensions==4.13.2
|
70 |
+
tzdata==2025.2
|
71 |
+
urllib3==2.4.0
|
72 |
+
wcwidth==0.2.13
|