inference-api-g1

Running on CPU Upgrade

App Files Files Community

alexfremont commited on 11 days ago

Commit

bccef3b

1 Parent(s): 832b9bb

Refactor API auth and add management endpoints for model loading/updating

Browse files

Files changed (5) hide show

api/dependencies.py +40 -0
api/management.py +80 -0
api/prediction.py +1 -1
api/router.py +28 -131
main.py +2 -1

api/dependencies.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import logging
+from typing import Optional
+from fastapi import Header, HTTPException
+from config.settings import API_KEY, MANAGEMENT_API_KEY
+logger = logging.getLogger(__name__)
+async def verify_api_key(api_key: Optional[str] = Header(None, alias="X-API-Key")):
+    """Vérifie si la clé API client fournie correspond à celle configurée."""
+    if not API_KEY:
+        logger.error("API_KEY environment variable is not set. Client endpoints are unsecured!")
+        # Bloquer si aucune clé n'est définie pour éviter l'accès non sécurisé
+        raise HTTPException(status_code=503, detail="Server configuration error: API Key not set.")
+    # Gestion de plusieurs clés possibles séparées par des virgules
+    valid_keys = [key.strip() for key in API_KEY.split(',')]
+    if api_key is None or api_key not in valid_keys:
+        logger.warning("Invalid or missing client API key attempt.")
+        # Utiliser 403 Forbidden car l'authentification a échoué
+        raise HTTPException(
+            status_code=403,
+            detail="Invalid or missing API Key"
+        )
+    # logger.debug("Client API Key verified successfully.") # Optionnel: Peut être bruyant
+    return True # Clé valide
+async def verify_management_api_key(x_api_key: Optional[str] = Header(None, alias="X-API-Key")):
+    """Vérifie si la clé API de gestion fournie correspond à celle configurée."""
+    if not MANAGEMENT_API_KEY:
+        logger.warning("MANAGEMENT_API_KEY is not set. Management endpoints are inaccessible!")
+        raise HTTPException(status_code=503, detail="Management API key not configured on server.")
+    if not x_api_key or x_api_key != MANAGEMENT_API_KEY:
+        logger.warning("Invalid or missing management API key attempt.")
+        raise HTTPException(status_code=403, detail="Invalid or missing Management API Key")
+    # logger.debug("Management API Key verified successfully.") # Optionnel
+    return True # Clé valide

api/management.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import logging
+from typing import Any, Dict, List
+from fastapi import APIRouter, HTTPException, Depends
+from api.dependencies import verify_management_api_key
+from db.models import fetch_model_by_id
+from models.loader import model_pipelines, _load_single_model_pipeline
+from models.schemas import ModelInfo
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/manage", tags=["Management"], dependencies=[Depends(verify_management_api_key)])
+@router.post("/load_model/{model_db_id}", summary="Load a specific model into memory")
+async def load_single_model(model_db_id: Any):
+    """Charge un modèle spécifique en mémoire en utilisant son ID de la base de données."""
+    if model_db_id in model_pipelines:
+        raise HTTPException(status_code=400, detail=f"Model ID {model_db_id} is already loaded.")
+    try:
+        model_data = await fetch_model_by_id(model_db_id)
+        if not model_data:
+            raise HTTPException(status_code=404, detail=f"Model ID {model_db_id} not found in database.")
+        logger.info(f"Loading model {model_db_id} ({model_data['model_name']}) on demand...")
+        success = await _load_single_model_pipeline(model_data)
+        if success:
+            logger.info(f"Model {model_db_id} ({model_data['model_name']}) loaded successfully.")
+            return {"message": f"Model {model_db_id} loaded successfully."}
+        else:
+            # L'erreur spécifique devrait être loggée dans _load_single_model_pipeline
+            raise HTTPException(status_code=500, detail=f"Failed to load model {model_db_id}.")
+    except HTTPException as http_exc:
+        raise http_exc # Re-lancer les exceptions HTTP déjà formatées
+    except Exception as e:
+        logger.exception(f"Error loading model {model_db_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Internal server error while loading model {model_db_id}.")
+@router.post("/update_model/{model_db_id}", summary="Update (reload) an already loaded model")
+async def update_single_model(model_db_id: Any):
+    """Met à jour un modèle déjà chargé en le rechargeant depuis le Hub.
+    Le modèle doit être actuellement chargé pour pouvoir être mis à jour.
+    """
+    if model_db_id not in model_pipelines:
+        raise HTTPException(status_code=404, detail=f"Model ID {model_db_id} is not currently loaded. Use load_model first.")
+    try:
+        model_data = await fetch_model_by_id(model_db_id)
+        if not model_data:
+            # Devrait être peu probable si le modèle est chargé, mais vérification par sécurité
+            raise HTTPException(status_code=404, detail=f"Model ID {model_db_id} not found in database despite being loaded.")
+        logger.info(f"Updating (reloading) model {model_db_id} ({model_data['model_name']})...")
+        # Supprimer l'ancien modèle de la mémoire
+        if model_db_id in model_pipelines:
+            del model_pipelines[model_db_id]
+            logger.info(f"Removed old instance of model {model_db_id} from memory.")
+        # Tenter de recharger
+        success = await _load_single_model_pipeline(model_data, force_download=True) # Forcer le re-téléchargement
+        if success:
+            logger.info(f"Model {model_db_id} ({model_data['model_name']}) updated successfully.")
+            return {"message": f"Model {model_db_id} updated successfully."}
+        else:
+            # Si le rechargement échoue, le modèle n'est plus en mémoire
+            logger.error(f"Failed to update model {model_db_id}. It has been unloaded.")
+            raise HTTPException(status_code=500, detail=f"Failed to update model {model_db_id}. Model is now unloaded.")
+    except HTTPException as http_exc:
+        raise http_exc
+    except Exception as e:
+        logger.exception(f"Error updating model {model_db_id}: {e}")
+        # Le modèle pourrait être dans un état indéterminé (déchargé ou erreur interne)
+        if model_db_id in model_pipelines:
+            del model_pipelines[model_db_id] # Assurer le déchargement en cas d'erreur imprévue
+        raise HTTPException(status_code=500, detail=f"Internal server error while updating model {model_db_id}. Model has been unloaded.")

api/prediction.py CHANGED Viewed

@@ -12,7 +12,7 @@ from schemas.requests import BatchPredictRequest
 from models.loader import get_model
 from steps.preprocess import process_image
 from config.settings import IMAGE_SIZE, NUM_THREADS
-from api.router import verify_api_key
 logger = logging.getLogger(__name__)
 router = APIRouter()

 from models.loader import get_model
 from steps.preprocess import process_image
 from config.settings import IMAGE_SIZE, NUM_THREADS
+from api.dependencies import verify_api_key
 logger = logging.getLogger(__name__)
 router = APIRouter()

api/router.py CHANGED Viewed

@@ -1,149 +1,46 @@
-from fastapi import APIRouter, Request, HTTPException
 import logging
 import os
-from api import prediction
-from config.settings import API_KEY, MANAGEMENT_API_KEY
 from db.models import fetch_model_by_id
 from models.loader import model_pipelines, _load_single_model_pipeline, get_model
-from models.schemas import PredictionRequest, PredictionResponse
 logger = logging.getLogger(__name__)
-# Router principal
 router = APIRouter()
-# Middleware d'authentification
-async def verify_api_key(request: Request, call_next):
-    """Middleware pour vérifier la clé API dans les en-têtes."""
     # Skip if we're in debug mode or during startup
     if os.environ.get("DEBUG") == "1":
         return await call_next(request)
-    # Pour les routes Hugging Face Space (logs, etc.)
-    # Aussi pour les assets statiques de Gradio
-    if request.url.path.startswith('/assets/') \
-            or request.url.path == '/theme.css' \
-            or request.url.path.startswith('/queue/'):
-        return await call_next(request)
-    if request.url.path == "/" and "logs=container" in request.url.query:
         return await call_next(request)
-    api_key = request.headers.get("x-api-key")
-    if api_key is None or api_key not in API_KEY.split(','):
-        logger.warning(f"Unauthorized API access attempt from {request.client.host}")
-        raise HTTPException(status_code=403, detail="Unauthorized")
     response = await call_next(request)
     return response
-# Dépendance pour la Sécurité de l'API de Gestion
-async def verify_management_api_key(x_api_key: str = Header(...)):
-    """Vérifie si la clé API fournie correspond à celle configurée."""
-    if not MANAGEMENT_API_KEY:
-        logger.warning("MANAGEMENT_API_KEY is not set. Management endpoints are unsecured!")
-        # Décider si on bloque ou on autorise sans clé définie
-        # Pour la sécurité, il vaut mieux bloquer par défaut
-        raise HTTPException(status_code=500, detail="Management API key not configured on server.")
-    if x_api_key != MANAGEMENT_API_KEY:
-        logger.warning(f"Invalid or missing API key attempt for management endpoint.")
-        raise HTTPException(status_code=401, detail="Invalid or missing API Key")
-    return True # Clé valide
-# Inclure les routes des autres modules
-router.include_router(prediction.router, tags=["Prediction"])
-# Nouvel Endpoint de Gestion
-@router.post(
-    "/manage/load_model/{model_db_id}",
-    summary="Load a specific model into memory",
-    dependencies=[Depends(verify_management_api_key)] # Sécurise l'endpoint
-)
-async def load_single_model(model_db_id: Any): # L'ID peut être int ou str
-    """Charge un modèle spécifique en mémoire en utilisant son ID de la base de données."""
-    logger.info(f"Received request to load model with DB ID: {model_db_id}")
-    # 1. Vérifier si le modèle est déjà chargé
-    if model_db_id in model_pipelines:
-        logger.info(f"Model ID {model_db_id} is already loaded.")
-        return {"status": "success", "message": f"Model ID {model_db_id} is already loaded."}
-    # 2. Récupérer les informations du modèle depuis la DB
-    try:
-        model_data = await fetch_model_by_id(model_db_id)
-        if not model_data:
-            logger.error(f"Model ID {model_db_id} not found in database.")
-            raise HTTPException(status_code=404, detail=f"Model ID {model_db_id} not found in database.")
-    except Exception as e:
-        logger.exception(f"Database error fetching model ID {model_db_id}: {e}")
-        raise HTTPException(status_code=500, detail=f"Database error checking model ID {model_db_id}.")
-    # 3. Charger le modèle
-    try:
-        logger.info(f"Attempting to load model ID {model_db_id} ('{model_data.get('name', 'N/A')}') into memory...")
-        pipeline = await _load_single_model_pipeline(model_data)
-        # 4. Ajouter au dictionnaire des modèles chargés
-        model_pipelines[model_db_id] = pipeline
-        logger.info(f"Successfully loaded and added model ID {model_db_id} to running pipelines.")
-        return {"status": "success", "message": f"Model ID {model_db_id} loaded successfully."}
-    except Exception as e:
-        logger.exception(f"Failed to load model ID {model_db_id}: {e}")
-        # Ne pas laisser un pipeline potentiellement corrompu dans le dictionnaire
-        if model_db_id in model_pipelines:
-            del model_pipelines[model_db_id]
-        raise HTTPException(status_code=500, detail=f"Failed to load model ID {model_db_id}. Check server logs for details.")
-@router.post(
-    "/manage/update_model/{model_db_id}",
-    summary="Reload/Update a specific model already in memory",
-    dependencies=[Depends(verify_management_api_key)] # Sécurise l'endpoint
-)
-async def update_single_model(model_db_id: Any):
-    """Retélécharge et met à jour un modèle spécifique qui est déjà chargé en mémoire."""
-    logger.info(f"Received request to update model with DB ID: {model_db_id}")
-    # 1. Vérifier si le modèle est actuellement chargé
-    if model_db_id not in model_pipelines:
-        logger.error(f"Attempted to update model ID {model_db_id}, but it is not loaded.")
-        raise HTTPException(
-            status_code=404,
-            detail=f"Model ID {model_db_id} is not currently loaded. Use load_model first."
-        )
-    # 2. Récupérer les informations du modèle depuis la DB (pour s'assurer qu'elles sont à jour si besoin)
-    try:
-        model_data = await fetch_model_by_id(model_db_id)
-        if not model_data:
-            # Ceci indiquerait une incohérence si le modèle est dans model_pipelines mais pas dans la DB
-            logger.error(f"Inconsistency: Model ID {model_db_id} loaded but not found in database during update.")
-            raise HTTPException(status_code=500, detail=f"Inconsistency: Model ID {model_db_id} not found in database.")
-    except Exception as e:
-        logger.exception(f"Database error fetching model ID {model_db_id} during update: {e}")
-        raise HTTPException(status_code=500, detail=f"Database error checking model ID {model_db_id} for update.")
-    # 3. Recharger le modèle
-    try:
-        logger.info(f"Attempting to reload model ID {model_db_id} ('{model_data.get('name', 'N/A')}') from source...")
-        # Supprimer l'ancien modèle de la mémoire avant de charger le nouveau pour libérer des ressources GPU/CPU si possible
-        # Attention : ceci pourrait causer une brève indisponibilité du modèle pendant le rechargement.
-        # Une stratégie alternative serait de charger le nouveau d'abord, puis de remplacer.
-        if model_db_id in model_pipelines:
-            del model_pipelines[model_db_id]
-            # Potentiellement forcer le nettoyage de la mémoire GPU ici si nécessaire (torch.cuda.empty_cache() - à utiliser avec prudence)
-            logger.debug(f"Removed old instance of model ID {model_db_id} from memory before update.")
-        pipeline = await _load_single_model_pipeline(model_data)
-        # 4. Mettre à jour le dictionnaire avec le nouveau pipeline
-        model_pipelines[model_db_id] = pipeline
-        logger.info(f"Successfully updated model ID {model_db_id} in running pipelines.")
-        return {"status": "success", "message": f"Model ID {model_db_id} updated successfully."}
-    except Exception as e:
-        logger.exception(f"Failed to reload model ID {model_db_id}: {e}")
-        # Si le rechargement échoue, l'ancien modèle a déjà été supprimé.
-        # Il faut soit tenter de recharger l'ancien, soit le laisser déchargé.
-        # Pour l'instant, on le laisse déchargé et on signale l'erreur.
-        raise HTTPException(status_code=500, detail=f"Failed to reload model ID {model_db_id}. Model is now unloaded. Check server logs.")

 import logging
 import os
+from typing import Any, Optional, Dict, List
+from fastapi import APIRouter, HTTPException, Depends, Header, Request, Response
+# Import des dépendances
+from api.dependencies import verify_api_key, verify_management_api_key
+# Import des sous-routeurs
+from api import prediction, management
+from config.settings import MANAGEMENT_API_KEY
 from db.models import fetch_model_by_id
 from models.loader import model_pipelines, _load_single_model_pipeline, get_model
 logger = logging.getLogger(__name__)
+# Routeur principal
 router = APIRouter()
+# --- Middleware (Laissé ici pour l'instant) ---
+@router.middleware("http")
+async def api_key_middleware(request: Request, call_next):
     # Skip if we're in debug mode or during startup
     if os.environ.get("DEBUG") == "1":
         return await call_next(request)
+    # Pour les routes Hugging Face Space, Gradio, etc.
+    # (Liste simplifiée, ajuster si nécessaire)
+    public_paths = ['/assets/', '/file=', '/queue/', '/startup-logs', '/config', '/info', '/gradio', '/favicon.ico', '/']
+    if any(request.url.path.startswith(p) for p in public_paths):
         return await call_next(request)
+    # Optionnel: Vérification globale de clé API (si non gérée par Depends)
+    # Désactivé par défaut pour se fier aux Depends sur les endpoints
+    # try:
+    #     api_key = request.headers.get("X-API-Key")
+    #     await verify_api_key(api_key) # Ne fonctionnera pas directement ici
+    # except HTTPException as e:
+    #     return Response(content=e.detail, status_code=e.status_code)
     response = await call_next(request)
     return response
+# --- Inclusion des sous-routeurs ---
+router.include_router(prediction.router)       # Routes de prédiction (/batch_predict)
+router.include_router(management.router)       # Routes de gestion (/manage/load_model, /manage/update_model)
+router.include_router(management.public_router) # Routes d'info publiques (/models, /models/{id})

main.py CHANGED Viewed

@@ -3,7 +3,8 @@ from fastapi import FastAPI
 import gradio as gr
 from gradio.routes import mount_gradio_app
-from api.router import router, verify_api_key
 from db.models import fetch_models_for_group
 from models.loader import load_models, model_pipelines
 from config.settings import RESOURCE_GROUP

 import gradio as gr
 from gradio.routes import mount_gradio_app
+from api.router import router
+from api.dependencies import verify_api_key
 from db.models import fetch_models_for_group
 from models.loader import load_models, model_pipelines
 from config.settings import RESOURCE_GROUP