Spaces:
Running
Running
Upload 2 files
Browse files- app.py +25 -1
- build_index.py +33 -2
app.py
CHANGED
@@ -8,7 +8,7 @@ import traceback
|
|
8 |
|
9 |
app = Flask(__name__) # Create app object FIRST
|
10 |
# Allow requests from the Vercel frontend and localhost for development
|
11 |
-
CORS(app, origins=["http://127.0.0.1:3000", "http://localhost:3000", "https://rag-huggingface.vercel.app"], supports_credentials=True)
|
12 |
|
13 |
# --- Configuration ---
|
14 |
INDEX_FILE = "index.faiss"
|
@@ -161,6 +161,20 @@ def search():
|
|
161 |
with open(filepath, 'r', encoding='utf-8') as f:
|
162 |
model_data = json.load(f)
|
163 |
description = model_data.get('description')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
except Exception as e:
|
165 |
print(f"Error reading description file {filepath}: {e}")
|
166 |
# Keep description as None
|
@@ -182,4 +196,14 @@ def search():
|
|
182 |
traceback.print_exc() # Print full traceback for search errors
|
183 |
return jsonify({"error": "An error occurred during search."}), 500
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# The if __name__ == '__main__': block remains removed.
|
|
|
8 |
|
9 |
app = Flask(__name__) # Create app object FIRST
|
10 |
# Allow requests from the Vercel frontend and localhost for development
|
11 |
+
CORS(app, origins=["http://127.0.0.1:3000", "http://localhost:3000", "https://rag-huggingface.vercel.app", "https://rag-huggingface-frontend.vercel.app"], supports_credentials=True)
|
12 |
|
13 |
# --- Configuration ---
|
14 |
INDEX_FILE = "index.faiss"
|
|
|
161 |
with open(filepath, 'r', encoding='utf-8') as f:
|
162 |
model_data = json.load(f)
|
163 |
description = model_data.get('description')
|
164 |
+
|
165 |
+
# Add additional metadata fields if not already present
|
166 |
+
if 'model_explanation_gemini' not in metadata and 'model_explanation_gemini' in model_data:
|
167 |
+
metadata['model_explanation_gemini'] = model_data.get('model_explanation_gemini')
|
168 |
+
if 'release_year' not in metadata and 'release_year' in model_data:
|
169 |
+
metadata['release_year'] = model_data.get('release_year')
|
170 |
+
if 'parameter_count' not in metadata and 'parameter_count' in model_data:
|
171 |
+
metadata['parameter_count'] = model_data.get('parameter_count')
|
172 |
+
if 'is_fine_tuned' not in metadata and 'is_fine_tuned' in model_data:
|
173 |
+
metadata['is_fine_tuned'] = model_data.get('is_fine_tuned')
|
174 |
+
if 'category' not in metadata and 'category' in model_data:
|
175 |
+
metadata['category'] = model_data.get('category')
|
176 |
+
if 'model_family' not in metadata and 'model_family' in model_data:
|
177 |
+
metadata['model_family'] = model_data.get('model_family')
|
178 |
except Exception as e:
|
179 |
print(f"Error reading description file {filepath}: {e}")
|
180 |
# Keep description as None
|
|
|
196 |
traceback.print_exc() # Print full traceback for search errors
|
197 |
return jsonify({"error": "An error occurred during search."}), 500
|
198 |
|
199 |
+
@app.route('/health', methods=['GET'])
|
200 |
+
def health_check():
|
201 |
+
"""Simple health check endpoint to verify the API is running."""
|
202 |
+
status = "ok" if RESOURCES_LOADED else "resources_not_loaded"
|
203 |
+
return jsonify({
|
204 |
+
"status": status,
|
205 |
+
"resources_loaded": RESOURCES_LOADED,
|
206 |
+
"model_data_dir_exists": os.path.exists(MODEL_DATA_DIR) if MODEL_DATA_DIR else False
|
207 |
+
})
|
208 |
+
|
209 |
# The if __name__ == '__main__': block remains removed.
|
build_index.py
CHANGED
@@ -53,6 +53,13 @@ def load_model_data(directory):
|
|
53 |
tag_string = " ".join(filtered_tags)
|
54 |
explanation = data.get(MODEL_EXPLANATION_KEY) # Get the new explanation
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# --- Construct combined text with priority weighting ---
|
57 |
text_parts = []
|
58 |
# 1. Add explanation (repeated for emphasis) if available
|
@@ -64,14 +71,25 @@ def load_model_data(directory):
|
|
64 |
# 3. Add filtered tags if available
|
65 |
if tag_string:
|
66 |
text_parts.append(f"Tags: {tag_string}")
|
67 |
-
# 4. Add
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
text_parts.append(f"Description: {description}")
|
69 |
|
70 |
combined_text = " ".join(text_parts).strip() # Join all parts
|
71 |
# --- End construction ---
|
72 |
|
73 |
all_texts.append(combined_text)
|
74 |
-
# Add
|
75 |
metadata_entry = {
|
76 |
"model_id": model_id,
|
77 |
"tags": original_tags, # Keep ORIGINAL tags in metadata
|
@@ -79,6 +97,19 @@ def load_model_data(directory):
|
|
79 |
}
|
80 |
if explanation and isinstance(explanation, str):
|
81 |
metadata_entry[MODEL_EXPLANATION_KEY] = explanation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
all_metadata.append(metadata_entry)
|
83 |
else:
|
84 |
print(f"Warning: Skipping {filename}, missing 'description' or 'model_id' key.")
|
|
|
53 |
tag_string = " ".join(filtered_tags)
|
54 |
explanation = data.get(MODEL_EXPLANATION_KEY) # Get the new explanation
|
55 |
|
56 |
+
# Get the new metadata fields
|
57 |
+
release_year = data.get('release_year')
|
58 |
+
parameter_count = data.get('parameter_count')
|
59 |
+
is_fine_tuned = data.get('is_fine_tuned', False)
|
60 |
+
category = data.get('category', 'Other')
|
61 |
+
model_family = data.get('model_family')
|
62 |
+
|
63 |
# --- Construct combined text with priority weighting ---
|
64 |
text_parts = []
|
65 |
# 1. Add explanation (repeated for emphasis) if available
|
|
|
71 |
# 3. Add filtered tags if available
|
72 |
if tag_string:
|
73 |
text_parts.append(f"Tags: {tag_string}")
|
74 |
+
# 4. Add category, model family and parameter count for better search
|
75 |
+
if category:
|
76 |
+
text_parts.append(f"Category: {category}")
|
77 |
+
if model_family:
|
78 |
+
text_parts.append(f"Family: {model_family}")
|
79 |
+
if parameter_count:
|
80 |
+
text_parts.append(f"Parameters: {parameter_count}")
|
81 |
+
if release_year:
|
82 |
+
text_parts.append(f"Year: {release_year}")
|
83 |
+
if is_fine_tuned:
|
84 |
+
text_parts.append("Fine-tuned model")
|
85 |
+
# 5. Add original description
|
86 |
text_parts.append(f"Description: {description}")
|
87 |
|
88 |
combined_text = " ".join(text_parts).strip() # Join all parts
|
89 |
# --- End construction ---
|
90 |
|
91 |
all_texts.append(combined_text)
|
92 |
+
# Add all metadata to the entry
|
93 |
metadata_entry = {
|
94 |
"model_id": model_id,
|
95 |
"tags": original_tags, # Keep ORIGINAL tags in metadata
|
|
|
97 |
}
|
98 |
if explanation and isinstance(explanation, str):
|
99 |
metadata_entry[MODEL_EXPLANATION_KEY] = explanation
|
100 |
+
|
101 |
+
# Add the new metadata fields
|
102 |
+
if release_year:
|
103 |
+
metadata_entry["release_year"] = release_year
|
104 |
+
if parameter_count:
|
105 |
+
metadata_entry["parameter_count"] = parameter_count
|
106 |
+
if is_fine_tuned is not None:
|
107 |
+
metadata_entry["is_fine_tuned"] = is_fine_tuned
|
108 |
+
if category:
|
109 |
+
metadata_entry["category"] = category
|
110 |
+
if model_family:
|
111 |
+
metadata_entry["model_family"] = model_family
|
112 |
+
|
113 |
all_metadata.append(metadata_entry)
|
114 |
else:
|
115 |
print(f"Warning: Skipping {filename}, missing 'description' or 'model_id' key.")
|