Spaces:

broadfield-dev
/

RSS_News

Sleeping

App Files Files Community

broadfield-dev commited on Feb 22

Commit

38e9099

verified ·

1 Parent(s): b5bbce9

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -20,12 +20,12 @@ last_update_time = time.time()
 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
-        logger.info("Starting background RSS feed fetch")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
-        logger.info("Background feed processing complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
@@ -40,22 +40,29 @@ def index():
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
         loading_complete = False
-        logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
-        threading.Thread(target=load_feeds_in_background, daemon=True).start()
-    elif not loading_complete:
-        pass  # Let background loading continue
-    else:
         loading_complete = True
     try:
-        # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
-            logger.info("No articles in DB yet")
-            return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
-        # Process and categorize articles with strict deduplication
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):

 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
+        logger.info("Starting background RSS feed fetch and database population")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
+        logger.info("Background feed processing and database population complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
         loading_complete = False
+        logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
         download_from_hf_hub()
+        # Immediately populate the database with RSS feeds (synchronously for first load)
+        articles = fetch_rss_feeds()
+        process_and_store_articles(articles)
+        upload_to_hf_hub()
         loading_complete = True
+    else:
+        # Database exists, but check if loading is complete
+        if not loading_complete:
+            threading.Thread(target=load_feeds_in_background, daemon=True).start()
     try:
+        # Retrieve all articles from Chroma DB (always load from database)
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
+            logger.info("No articles in DB yet, initializing with RSS feeds...")
+            articles = fetch_rss_feeds()
+            process_and_store_articles(articles)
+            upload_to_hf_hub()
+            all_docs = vector_db.get(include=['documents', 'metadatas'])
+        # Process and categorize articles with strict deduplication from the database
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):