Spaces:

shayan5422
/

back_rag_huggingface

Running

App Files Files Community

shayan5422 commited on 20 days ago

Commit

3214c9b

verified ·

1 Parent(s): 21cad66

Upload 2 files

Browse files

Files changed (2) hide show

app.py +25 -1
build_index.py +33 -2

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import traceback
 app = Flask(__name__) # Create app object FIRST
 # Allow requests from the Vercel frontend and localhost for development
-CORS(app, origins=["http://127.0.0.1:3000", "http://localhost:3000", "https://rag-huggingface.vercel.app"], supports_credentials=True)
 # --- Configuration ---
 INDEX_FILE = "index.faiss"
@@ -161,6 +161,20 @@ def search():
                             with open(filepath, 'r', encoding='utf-8') as f:
                                 model_data = json.load(f)
                                 description = model_data.get('description')
                         except Exception as e:
                             print(f"Error reading description file {filepath}: {e}")
                             # Keep description as None
@@ -182,4 +196,14 @@ def search():
         traceback.print_exc() # Print full traceback for search errors
         return jsonify({"error": "An error occurred during search."}), 500
 # The if __name__ == '__main__': block remains removed.

 app = Flask(__name__) # Create app object FIRST
 # Allow requests from the Vercel frontend and localhost for development
+CORS(app, origins=["http://127.0.0.1:3000", "http://localhost:3000", "https://rag-huggingface.vercel.app", "https://rag-huggingface-frontend.vercel.app"], supports_credentials=True)
 # --- Configuration ---
 INDEX_FILE = "index.faiss"
                             with open(filepath, 'r', encoding='utf-8') as f:
                                 model_data = json.load(f)
                                 description = model_data.get('description')
+                                # Add additional metadata fields if not already present
+                                if 'model_explanation_gemini' not in metadata and 'model_explanation_gemini' in model_data:
+                                    metadata['model_explanation_gemini'] = model_data.get('model_explanation_gemini')
+                                if 'release_year' not in metadata and 'release_year' in model_data:
+                                    metadata['release_year'] = model_data.get('release_year')
+                                if 'parameter_count' not in metadata and 'parameter_count' in model_data:
+                                    metadata['parameter_count'] = model_data.get('parameter_count')
+                                if 'is_fine_tuned' not in metadata and 'is_fine_tuned' in model_data:
+                                    metadata['is_fine_tuned'] = model_data.get('is_fine_tuned')
+                                if 'category' not in metadata and 'category' in model_data:
+                                    metadata['category'] = model_data.get('category')
+                                if 'model_family' not in metadata and 'model_family' in model_data:
+                                    metadata['model_family'] = model_data.get('model_family')
                         except Exception as e:
                             print(f"Error reading description file {filepath}: {e}")
                             # Keep description as None
         traceback.print_exc() # Print full traceback for search errors
         return jsonify({"error": "An error occurred during search."}), 500
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Simple health check endpoint to verify the API is running."""
+    status = "ok" if RESOURCES_LOADED else "resources_not_loaded"
+    return jsonify({
+        "status": status,
+        "resources_loaded": RESOURCES_LOADED,
+        "model_data_dir_exists": os.path.exists(MODEL_DATA_DIR) if MODEL_DATA_DIR else False
+    })
 # The if __name__ == '__main__': block remains removed.

build_index.py CHANGED Viewed

@@ -53,6 +53,13 @@ def load_model_data(directory):
                         tag_string = " ".join(filtered_tags)
                         explanation = data.get(MODEL_EXPLANATION_KEY) # Get the new explanation
                         # --- Construct combined text with priority weighting ---
                         text_parts = []
                         # 1. Add explanation (repeated for emphasis) if available
@@ -64,14 +71,25 @@ def load_model_data(directory):
                         # 3. Add filtered tags if available
                         if tag_string:
                             text_parts.append(f"Tags: {tag_string}")
-                        # 4. Add original description
                         text_parts.append(f"Description: {description}")
                         combined_text = " ".join(text_parts).strip() # Join all parts
                         # --- End construction ---
                         all_texts.append(combined_text)
-                        # Add explanation to metadata as well for potential display
                         metadata_entry = {
                             "model_id": model_id,
                             "tags": original_tags, # Keep ORIGINAL tags in metadata
@@ -79,6 +97,19 @@ def load_model_data(directory):
                         }
                         if explanation and isinstance(explanation, str):
                             metadata_entry[MODEL_EXPLANATION_KEY] = explanation
                         all_metadata.append(metadata_entry)
                 else:
                     print(f"Warning: Skipping {filename}, missing 'description' or 'model_id' key.")

                         tag_string = " ".join(filtered_tags)
                         explanation = data.get(MODEL_EXPLANATION_KEY) # Get the new explanation
+                        # Get the new metadata fields
+                        release_year = data.get('release_year')
+                        parameter_count = data.get('parameter_count')
+                        is_fine_tuned = data.get('is_fine_tuned', False)
+                        category = data.get('category', 'Other')
+                        model_family = data.get('model_family')
                         # --- Construct combined text with priority weighting ---
                         text_parts = []
                         # 1. Add explanation (repeated for emphasis) if available
                         # 3. Add filtered tags if available
                         if tag_string:
                             text_parts.append(f"Tags: {tag_string}")
+                        # 4. Add category, model family and parameter count for better search
+                        if category:
+                            text_parts.append(f"Category: {category}")
+                        if model_family:
+                            text_parts.append(f"Family: {model_family}")
+                        if parameter_count:
+                            text_parts.append(f"Parameters: {parameter_count}")
+                        if release_year:
+                            text_parts.append(f"Year: {release_year}")
+                        if is_fine_tuned:
+                            text_parts.append("Fine-tuned model")
+                        # 5. Add original description
                         text_parts.append(f"Description: {description}")
                         combined_text = " ".join(text_parts).strip() # Join all parts
                         # --- End construction ---
                         all_texts.append(combined_text)
+                        # Add all metadata to the entry
                         metadata_entry = {
                             "model_id": model_id,
                             "tags": original_tags, # Keep ORIGINAL tags in metadata
                         }
                         if explanation and isinstance(explanation, str):
                             metadata_entry[MODEL_EXPLANATION_KEY] = explanation
+                        # Add the new metadata fields
+                        if release_year:
+                            metadata_entry["release_year"] = release_year
+                        if parameter_count:
+                            metadata_entry["parameter_count"] = parameter_count
+                        if is_fine_tuned is not None:
+                            metadata_entry["is_fine_tuned"] = is_fine_tuned
+                        if category:
+                            metadata_entry["category"] = category
+                        if model_family:
+                            metadata_entry["model_family"] = model_family
                         all_metadata.append(metadata_entry)
                 else:
                     print(f"Warning: Skipping {filename}, missing 'description' or 'model_id' key.")