broadfield-dev commited on
Commit
1c9f24c
·
verified ·
1 Parent(s): 1176acb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -8
app.py CHANGED
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
19
  # Global flag to track background loading
20
  loading_complete = True # Start as True to allow initial rendering
21
  last_update_time = time.time()
 
22
 
23
  def load_feeds_in_background():
24
  global loading_complete, last_update_time
@@ -62,9 +63,20 @@ def get_all_docs_from_dbs():
62
 
63
  return all_docs
64
 
 
 
 
 
 
 
 
 
 
 
 
65
  @app.route('/')
66
  def index():
67
- global loading_complete, last_update_time
68
 
69
  # Check if any DB exists; if not, download from Hugging Face
70
  db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
@@ -133,11 +145,14 @@ def index():
133
  if len(categorized_articles[cat]) >= 2:
134
  logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
135
 
 
 
 
136
  logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
137
  return render_template("index.html",
138
  categorized_articles=categorized_articles,
139
  has_articles=True,
140
- loading=True) # Show spinner while background task runs
141
  except Exception as e:
142
  logger.error(f"Error retrieving articles at startup: {e}")
143
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
@@ -155,7 +170,6 @@ def search():
155
  if not all_docs.get('metadatas'):
156
  return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
157
 
158
- # Simple keyword search for now (can be improved with similarity_search later)
159
  enriched_articles = []
160
  seen_keys = set()
161
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -170,7 +184,6 @@ def search():
170
  link = clean_text(link)
171
  description = clean_text(description)
172
 
173
- # Basic keyword match
174
  if query.lower() in title or query.lower() in description:
175
  description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
176
  key = f"{title}|{link}|{published}|{description_hash}"
@@ -209,11 +222,11 @@ def check_loading():
209
 
210
  @app.route('/get_updates')
211
  def get_updates():
212
- global last_update_time
213
  try:
214
  all_docs = get_all_docs_from_dbs()
215
  if not all_docs.get('metadatas'):
216
- return jsonify({"articles": [], "last_update": last_update_time})
217
 
218
  enriched_articles = []
219
  seen_keys = set()
@@ -266,10 +279,29 @@ def get_updates():
266
  unique_articles.append(article)
267
  categorized_articles[cat] = unique_articles[:10]
268
 
269
- return jsonify({"articles": categorized_articles, "last_update": last_update_time})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  except Exception as e:
271
  logger.error(f"Error fetching updates: {e}")
272
- return jsonify({"articles": {}, "last_update": last_update_time}), 500
273
 
274
  @app.route('/get_all_articles/<category>')
275
  def get_all_articles(category):
 
19
  # Global flag to track background loading
20
  loading_complete = True # Start as True to allow initial rendering
21
  last_update_time = time.time()
22
+ last_data_hash = None # Track the hash of the last data to detect changes
23
 
24
  def load_feeds_in_background():
25
  global loading_complete, last_update_time
 
63
 
64
  return all_docs
65
 
66
+ def compute_data_hash(categorized_articles):
67
+ """Compute a hash of the current articles to detect changes."""
68
+ if not categorized_articles:
69
+ return ""
70
+ # Create a sorted string representation of the articles for consistent hashing
71
+ data_str = ""
72
+ for cat, articles in sorted(categorized_articles.items()):
73
+ for article in sorted(articles, key=lambda x: x["published"]):
74
+ data_str += f"{cat}|{article['title']}|{article['link']}|{article['published']}|"
75
+ return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
76
+
77
  @app.route('/')
78
  def index():
79
+ global loading_complete, last_update_time, last_data_hash
80
 
81
  # Check if any DB exists; if not, download from Hugging Face
82
  db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
 
145
  if len(categorized_articles[cat]) >= 2:
146
  logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
147
 
148
+ # Compute initial data hash
149
+ last_data_hash = compute_data_hash(categorized_articles)
150
+
151
  logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
152
  return render_template("index.html",
153
  categorized_articles=categorized_articles,
154
  has_articles=True,
155
+ loading=True)
156
  except Exception as e:
157
  logger.error(f"Error retrieving articles at startup: {e}")
158
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
 
170
  if not all_docs.get('metadatas'):
171
  return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
172
 
 
173
  enriched_articles = []
174
  seen_keys = set()
175
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
184
  link = clean_text(link)
185
  description = clean_text(description)
186
 
 
187
  if query.lower() in title or query.lower() in description:
188
  description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
189
  key = f"{title}|{link}|{published}|{description_hash}"
 
222
 
223
  @app.route('/get_updates')
224
  def get_updates():
225
+ global last_update_time, last_data_hash
226
  try:
227
  all_docs = get_all_docs_from_dbs()
228
  if not all_docs.get('metadatas'):
229
+ return jsonify({"articles": [], "last_update": last_update_time, "has_updates": False})
230
 
231
  enriched_articles = []
232
  seen_keys = set()
 
279
  unique_articles.append(article)
280
  categorized_articles[cat] = unique_articles[:10]
281
 
282
+ # Compute hash of new data
283
+ current_data_hash = compute_data_hash(categorized_articles)
284
+
285
+ # Compare with last data hash to determine if there are updates
286
+ has_updates = last_data_hash != current_data_hash
287
+ if has_updates:
288
+ logger.info("New RSS data detected, sending updates to frontend")
289
+ last_data_hash = current_data_hash
290
+ return jsonify({
291
+ "articles": categorized_articles,
292
+ "last_update": last_update_time,
293
+ "has_updates": True
294
+ })
295
+ else:
296
+ logger.info("No new RSS data, skipping update")
297
+ return jsonify({
298
+ "articles": {},
299
+ "last_update": last_update_time,
300
+ "has_updates": False
301
+ })
302
  except Exception as e:
303
  logger.error(f"Error fetching updates: {e}")
304
+ return jsonify({"articles": {}, "last_update": last_update_time, "has_updates": False}), 500
305
 
306
  @app.route('/get_all_articles/<category>')
307
  def get_all_articles(category):