Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
|
|
19 |
# Global flag to track background loading
|
20 |
loading_complete = True # Start as True to allow initial rendering
|
21 |
last_update_time = time.time()
|
|
|
22 |
|
23 |
def load_feeds_in_background():
|
24 |
global loading_complete, last_update_time
|
@@ -62,9 +63,20 @@ def get_all_docs_from_dbs():
|
|
62 |
|
63 |
return all_docs
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
@app.route('/')
|
66 |
def index():
|
67 |
-
global loading_complete, last_update_time
|
68 |
|
69 |
# Check if any DB exists; if not, download from Hugging Face
|
70 |
db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
|
@@ -133,11 +145,14 @@ def index():
|
|
133 |
if len(categorized_articles[cat]) >= 2:
|
134 |
logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
|
135 |
|
|
|
|
|
|
|
136 |
logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
|
137 |
return render_template("index.html",
|
138 |
categorized_articles=categorized_articles,
|
139 |
has_articles=True,
|
140 |
-
loading=True)
|
141 |
except Exception as e:
|
142 |
logger.error(f"Error retrieving articles at startup: {e}")
|
143 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
@@ -155,7 +170,6 @@ def search():
|
|
155 |
if not all_docs.get('metadatas'):
|
156 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
|
157 |
|
158 |
-
# Simple keyword search for now (can be improved with similarity_search later)
|
159 |
enriched_articles = []
|
160 |
seen_keys = set()
|
161 |
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
|
@@ -170,7 +184,6 @@ def search():
|
|
170 |
link = clean_text(link)
|
171 |
description = clean_text(description)
|
172 |
|
173 |
-
# Basic keyword match
|
174 |
if query.lower() in title or query.lower() in description:
|
175 |
description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
|
176 |
key = f"{title}|{link}|{published}|{description_hash}"
|
@@ -209,11 +222,11 @@ def check_loading():
|
|
209 |
|
210 |
@app.route('/get_updates')
|
211 |
def get_updates():
|
212 |
-
global last_update_time
|
213 |
try:
|
214 |
all_docs = get_all_docs_from_dbs()
|
215 |
if not all_docs.get('metadatas'):
|
216 |
-
return jsonify({"articles": [], "last_update": last_update_time})
|
217 |
|
218 |
enriched_articles = []
|
219 |
seen_keys = set()
|
@@ -266,10 +279,29 @@ def get_updates():
|
|
266 |
unique_articles.append(article)
|
267 |
categorized_articles[cat] = unique_articles[:10]
|
268 |
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
except Exception as e:
|
271 |
logger.error(f"Error fetching updates: {e}")
|
272 |
-
return jsonify({"articles": {}, "last_update": last_update_time}), 500
|
273 |
|
274 |
@app.route('/get_all_articles/<category>')
|
275 |
def get_all_articles(category):
|
|
|
19 |
# Global flag to track background loading
|
20 |
loading_complete = True # Start as True to allow initial rendering
|
21 |
last_update_time = time.time()
|
22 |
+
last_data_hash = None # Track the hash of the last data to detect changes
|
23 |
|
24 |
def load_feeds_in_background():
|
25 |
global loading_complete, last_update_time
|
|
|
63 |
|
64 |
return all_docs
|
65 |
|
66 |
+
def compute_data_hash(categorized_articles):
|
67 |
+
"""Compute a hash of the current articles to detect changes."""
|
68 |
+
if not categorized_articles:
|
69 |
+
return ""
|
70 |
+
# Create a sorted string representation of the articles for consistent hashing
|
71 |
+
data_str = ""
|
72 |
+
for cat, articles in sorted(categorized_articles.items()):
|
73 |
+
for article in sorted(articles, key=lambda x: x["published"]):
|
74 |
+
data_str += f"{cat}|{article['title']}|{article['link']}|{article['published']}|"
|
75 |
+
return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
|
76 |
+
|
77 |
@app.route('/')
|
78 |
def index():
|
79 |
+
global loading_complete, last_update_time, last_data_hash
|
80 |
|
81 |
# Check if any DB exists; if not, download from Hugging Face
|
82 |
db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
|
|
|
145 |
if len(categorized_articles[cat]) >= 2:
|
146 |
logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
|
147 |
|
148 |
+
# Compute initial data hash
|
149 |
+
last_data_hash = compute_data_hash(categorized_articles)
|
150 |
+
|
151 |
logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
|
152 |
return render_template("index.html",
|
153 |
categorized_articles=categorized_articles,
|
154 |
has_articles=True,
|
155 |
+
loading=True)
|
156 |
except Exception as e:
|
157 |
logger.error(f"Error retrieving articles at startup: {e}")
|
158 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
|
|
170 |
if not all_docs.get('metadatas'):
|
171 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
|
172 |
|
|
|
173 |
enriched_articles = []
|
174 |
seen_keys = set()
|
175 |
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
|
|
|
184 |
link = clean_text(link)
|
185 |
description = clean_text(description)
|
186 |
|
|
|
187 |
if query.lower() in title or query.lower() in description:
|
188 |
description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
|
189 |
key = f"{title}|{link}|{published}|{description_hash}"
|
|
|
222 |
|
223 |
@app.route('/get_updates')
|
224 |
def get_updates():
|
225 |
+
global last_update_time, last_data_hash
|
226 |
try:
|
227 |
all_docs = get_all_docs_from_dbs()
|
228 |
if not all_docs.get('metadatas'):
|
229 |
+
return jsonify({"articles": [], "last_update": last_update_time, "has_updates": False})
|
230 |
|
231 |
enriched_articles = []
|
232 |
seen_keys = set()
|
|
|
279 |
unique_articles.append(article)
|
280 |
categorized_articles[cat] = unique_articles[:10]
|
281 |
|
282 |
+
# Compute hash of new data
|
283 |
+
current_data_hash = compute_data_hash(categorized_articles)
|
284 |
+
|
285 |
+
# Compare with last data hash to determine if there are updates
|
286 |
+
has_updates = last_data_hash != current_data_hash
|
287 |
+
if has_updates:
|
288 |
+
logger.info("New RSS data detected, sending updates to frontend")
|
289 |
+
last_data_hash = current_data_hash
|
290 |
+
return jsonify({
|
291 |
+
"articles": categorized_articles,
|
292 |
+
"last_update": last_update_time,
|
293 |
+
"has_updates": True
|
294 |
+
})
|
295 |
+
else:
|
296 |
+
logger.info("No new RSS data, skipping update")
|
297 |
+
return jsonify({
|
298 |
+
"articles": {},
|
299 |
+
"last_update": last_update_time,
|
300 |
+
"has_updates": False
|
301 |
+
})
|
302 |
except Exception as e:
|
303 |
logger.error(f"Error fetching updates: {e}")
|
304 |
+
return jsonify({"articles": {}, "last_update": last_update_time, "has_updates": False}), 500
|
305 |
|
306 |
@app.route('/get_all_articles/<category>')
|
307 |
def get_all_articles(category):
|