Spaces:

broadfield-dev
/

RSS_News

Running

App Files Files Community

broadfield-dev commited on 3 days ago

Commit

1c9f24c

verified ·

1 Parent(s): 1176acb

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -8

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
 # Global flag to track background loading
 loading_complete = True  # Start as True to allow initial rendering
 last_update_time = time.time()
 def load_feeds_in_background():
     global loading_complete, last_update_time
@@ -62,9 +63,20 @@ def get_all_docs_from_dbs():
     return all_docs
 @app.route('/')
 def index():
-    global loading_complete, last_update_time
     # Check if any DB exists; if not, download from Hugging Face
     db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
@@ -133,11 +145,14 @@ def index():
             if len(categorized_articles[cat]) >= 2:
                 logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
         logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
-                              loading=True)  # Show spinner while background task runs
     except Exception as e:
         logger.error(f"Error retrieving articles at startup: {e}")
         return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
@@ -155,7 +170,6 @@ def search():
         if not all_docs.get('metadatas'):
             return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
-        # Simple keyword search for now (can be improved with similarity_search later)
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -170,7 +184,6 @@ def search():
             link = clean_text(link)
             description = clean_text(description)
-            # Basic keyword match
             if query.lower() in title or query.lower() in description:
                 description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
                 key = f"{title}|{link}|{published}|{description_hash}"
@@ -209,11 +222,11 @@ def check_loading():
 @app.route('/get_updates')
 def get_updates():
-    global last_update_time
     try:
         all_docs = get_all_docs_from_dbs()
         if not all_docs.get('metadatas'):
-            return jsonify({"articles": [], "last_update": last_update_time})
         enriched_articles = []
         seen_keys = set()
@@ -266,10 +279,29 @@ def get_updates():
                     unique_articles.append(article)
             categorized_articles[cat] = unique_articles[:10]
-        return jsonify({"articles": categorized_articles, "last_update": last_update_time})
     except Exception as e:
         logger.error(f"Error fetching updates: {e}")
-        return jsonify({"articles": {}, "last_update": last_update_time}), 500
 @app.route('/get_all_articles/<category>')
 def get_all_articles(category):

 # Global flag to track background loading
 loading_complete = True  # Start as True to allow initial rendering
 last_update_time = time.time()
+last_data_hash = None  # Track the hash of the last data to detect changes
 def load_feeds_in_background():
     global loading_complete, last_update_time
     return all_docs
+def compute_data_hash(categorized_articles):
+    """Compute a hash of the current articles to detect changes."""
+    if not categorized_articles:
+        return ""
+    # Create a sorted string representation of the articles for consistent hashing
+    data_str = ""
+    for cat, articles in sorted(categorized_articles.items()):
+        for article in sorted(articles, key=lambda x: x["published"]):
+            data_str += f"{cat}|{article['title']}|{article['link']}|{article['published']}|"
+    return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
 @app.route('/')
 def index():
+    global loading_complete, last_update_time, last_data_hash
     # Check if any DB exists; if not, download from Hugging Face
     db_exists = any(os.path.exists(db_path) for db_path in glob.glob("chroma_db*"))
             if len(categorized_articles[cat]) >= 2:
                 logger.debug(f"Category {cat} top 2: {categorized_articles[cat][0]['title']} | {categorized_articles[cat][1]['title']}")
+        # Compute initial data hash
+        last_data_hash = compute_data_hash(categorized_articles)
         logger.info(f"Displaying articles at startup: {sum(len(articles) for articles in categorized_articles.values())} total")
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
+                              loading=True)
     except Exception as e:
         logger.error(f"Error retrieving articles at startup: {e}")
         return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
         if not all_docs.get('metadatas'):
             return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
             link = clean_text(link)
             description = clean_text(description)
             if query.lower() in title or query.lower() in description:
                 description_hash = hashlib.sha256(description.encode('utf-8')).hexdigest()
                 key = f"{title}|{link}|{published}|{description_hash}"
 @app.route('/get_updates')
 def get_updates():
+    global last_update_time, last_data_hash
     try:
         all_docs = get_all_docs_from_dbs()
         if not all_docs.get('metadatas'):
+            return jsonify({"articles": [], "last_update": last_update_time, "has_updates": False})
         enriched_articles = []
         seen_keys = set()
                     unique_articles.append(article)
             categorized_articles[cat] = unique_articles[:10]
+        # Compute hash of new data
+        current_data_hash = compute_data_hash(categorized_articles)
+        # Compare with last data hash to determine if there are updates
+        has_updates = last_data_hash != current_data_hash
+        if has_updates:
+            logger.info("New RSS data detected, sending updates to frontend")
+            last_data_hash = current_data_hash
+            return jsonify({
+                "articles": categorized_articles,
+                "last_update": last_update_time,
+                "has_updates": True
+            })
+        else:
+            logger.info("No new RSS data, skipping update")
+            return jsonify({
+                "articles": {},
+                "last_update": last_update_time,
+                "has_updates": False
+            })
     except Exception as e:
         logger.error(f"Error fetching updates: {e}")
+        return jsonify({"articles": {}, "last_update": last_update_time, "has_updates": False}), 500
 @app.route('/get_all_articles/<category>')
 def get_all_articles(category):