Spaces:

pagezyhf
/

inference-cache-explorer

Sleeping

App Files Files Community

pagezyhf HF Staff commited on 28 days ago

Commit

d6a1cfd

1 Parent(s): 5d99e77

test

Browse files

Files changed (2) hide show

app/main.py +26 -4
requirements.txt +2 -1

app/main.py CHANGED Viewed

@@ -7,6 +7,7 @@ from optimum.neuron import utils
 import logging
 import sys
 import os
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -102,12 +103,33 @@ async def get_model_list():
 async def get_model_info_endpoint(model_id: str):
     logger.info(f"Fetching configurations for model: {model_id}")
     try:
-        configs = utils.get_hub_cached_entries(model_id=model_id, mode="inference")
         logger.info(f"Found {len(configs)} configurations for model {model_id}")
-        # Return empty list if no configurations found
-        if not configs:
-            return JSONResponse(content={"configurations": []})
         return JSONResponse(content={"configurations": configs})
     except Exception as e:
         logger.error(f"Error fetching configurations for model {model_id}: {str(e)}", exc_info=True)
         return JSONResponse(

 import logging
 import sys
 import os
+import httpx
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
 async def get_model_info_endpoint(model_id: str):
     logger.info(f"Fetching configurations for model: {model_id}")
     try:
+        # Define the base URL for the HuggingFace API
+        base_url = "https://huggingface.co/api/integrations/aws/v1/lookup"
+        api_url = f"{base_url}/{model_id}"
+        # Make async HTTP request with timeout
+        timeout = httpx.Timeout(15.0, connect=5.0)  # 10s for entire request, 5s for connection
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.get(api_url)
+            response.raise_for_status()
+            data = response.json()
+            configs = data.get("cached_configs", [])
         logger.info(f"Found {len(configs)} configurations for model {model_id}")
         return JSONResponse(content={"configurations": configs})
+    except httpx.TimeoutException as e:
+        logger.error(f"Timeout while fetching configurations for model {model_id}: {str(e)}", exc_info=True)
+        return JSONResponse(
+            status_code=504,  # Gateway Timeout
+            content={"error": "Request timed out while fetching model configurations"}
+        )
+    except httpx.HTTPError as e:
+        logger.error(f"HTTP error fetching configurations for model {model_id}: {str(e)}", exc_info=True)
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to fetch model configurations: {str(e)}"}
+        )
     except Exception as e:
         logger.error(f"Error fetching configurations for model {model_id}: {str(e)}", exc_info=True)
         return JSONResponse(

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ uvicorn==0.24.0
 jinja2==3.1.2
 optimum-neuron==0.1.0
 python-multipart==0.0.6
-optimum==1.23.3

 jinja2==3.1.2
 optimum-neuron==0.1.0
 python-multipart==0.0.6
+optimum==1.23.3
+httpx>=0.25.0