llm-security-leaderboard

Running

App Files Files Community

Daniel Kantor commited on Mar 3

Commit

58582d3

1 Parent(s): 39651ed

set up ruff for formatting

Browse files

Files changed (37) hide show

.gitignore +0 -1
Dockerfile +1 -1
backend/Dockerfile.dev +1 -1
backend/app/api/dependencies.py +4 -2
backend/app/api/endpoints/leaderboard.py +7 -2
backend/app/api/endpoints/models.py +27 -22
backend/app/api/endpoints/votes.py +34 -34
backend/app/api/router.py +1 -1
backend/app/asgi.py +20 -11
backend/app/config/base.py +4 -2
backend/app/config/hf_config.py +4 -4
backend/app/config/logging_config.py +11 -7
backend/app/core/cache.py +23 -21
backend/app/core/fastapi_cache.py +15 -13
backend/app/core/formatting.py +22 -21
backend/app/main.py +4 -3
backend/app/services/hf_service.py +10 -4
backend/app/services/leaderboard.py +73 -50
backend/app/services/models.py +242 -166
backend/app/services/rate_limiter.py +0 -72
backend/app/services/votes.py +161 -102
backend/app/utils/logging.py +1 -1
backend/app/utils/model_validation.py +118 -71
backend/utils/analyze_prod_datasets.py +52 -59
backend/utils/analyze_prod_models.py +40 -32
backend/utils/fix_wrong_model_size.py +34 -30
backend/utils/last_activity.py +50 -41
backend/utils/sync_datasets_locally.py +25 -26
docker-compose.yml +1 -1
frontend/Dockerfile.dev +1 -1
frontend/src/components/Logo/HFLogo.js +1 -1
frontend/src/components/shared/CodeBlock.js +1 -1
frontend/src/config/auth.js +1 -1
frontend/src/hooks/useThemeMode.js +2 -2
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js +5 -5
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js +1 -1
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js +1 -1

.gitignore CHANGED Viewed

@@ -42,4 +42,3 @@ package-lock.json
 .env
 .env.*
 !.env.example

 .env
 .env.*
 !.env.example

Dockerfile CHANGED Viewed

@@ -59,4 +59,4 @@ USER user
 EXPOSE 7860
 # Start both servers with wait-for
-CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]

 EXPOSE 7860
 # Start both servers with wait-for
+CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]

backend/Dockerfile.dev CHANGED Viewed

@@ -22,4 +22,4 @@ ENV PYTHONUNBUFFERED=1
 ENV LOG_LEVEL=INFO
 # In dev, mount volume directly
-CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]

 ENV LOG_LEVEL=INFO
 # In dev, mount volume directly
+CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]

backend/app/api/dependencies.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import Depends, HTTPException
 import logging
 from app.services.models import ModelService
 from app.services.votes import VoteService
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
 model_service = ModelService()
 vote_service = VoteService()
 async def get_model_service() -> ModelService:
     """Dependency to get ModelService instance"""
     try:
@@ -21,6 +22,7 @@ async def get_model_service() -> ModelService:
         logger.error(LogFormatter.error(error_msg, e))
         raise HTTPException(status_code=500, detail=str(e))
 async def get_vote_service() -> VoteService:
     """Dependency to get VoteService instance"""
     try:
@@ -31,4 +33,4 @@ async def get_vote_service() -> VoteService:
     except Exception as e:
         error_msg = "Failed to initialize vote service"
         logger.error(LogFormatter.error(error_msg, e))
-        raise HTTPException(status_code=500, detail=str(e))

+from fastapi import HTTPException
 import logging
 from app.services.models import ModelService
 from app.services.votes import VoteService
 model_service = ModelService()
 vote_service = VoteService()
 async def get_model_service() -> ModelService:
     """Dependency to get ModelService instance"""
     try:
         logger.error(LogFormatter.error(error_msg, e))
         raise HTTPException(status_code=500, detail=str(e))
 async def get_vote_service() -> VoteService:
     """Dependency to get VoteService instance"""
     try:
     except Exception as e:
         error_msg = "Failed to initialize vote service"
         logger.error(LogFormatter.error(error_msg, e))
+        raise HTTPException(status_code=500, detail=str(e))

backend/app/api/endpoints/leaderboard.py CHANGED Viewed

@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
 leaderboard_service = LeaderboardService()
 def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
     """Build cache key for leaderboard data"""
     key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
@@ -16,6 +17,7 @@ def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
     logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
     return key
 @router.get("")
 @cached(expire=300, key_builder=leaderboard_key_builder)
 async def get_leaderboard() -> List[Dict[str, Any]]:
@@ -32,6 +34,7 @@ async def get_leaderboard() -> List[Dict[str, Any]]:
         logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
         raise
 @router.get("/formatted")
 @cached(expire=300, key_builder=leaderboard_key_builder)
 async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
@@ -45,5 +48,7 @@ async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
         logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
         return data
     except Exception as e:
-        logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
-        raise

 router = APIRouter()
 leaderboard_service = LeaderboardService()
 def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
     """Build cache key for leaderboard data"""
     key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
     logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
     return key
 @router.get("")
 @cached(expire=300, key_builder=leaderboard_key_builder)
 async def get_leaderboard() -> List[Dict[str, Any]]:
         logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
         raise
 @router.get("/formatted")
 @cached(expire=300, key_builder=leaderboard_key_builder)
 async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
         logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
         return data
     except Exception as e:
+        logger.error(
+            LogFormatter.error("Failed to fetch formatted leaderboard data", e)
+        )
+        raise

backend/app/api/endpoints/models.py CHANGED Viewed

@@ -9,18 +9,17 @@ from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["models"])
 @router.get("/status")
 @cached(expire=300)
 async def get_models_status(
-    model_service: ModelService = Depends(get_model_service)
 ) -> Dict[str, List[Dict[str, Any]]]:
     """Get all models grouped by status"""
     try:
         logger.info(LogFormatter.info("Fetching status for all models"))
         result = await model_service.get_models()
-        stats = {
-            status: len(models) for status, models in result.items()
-        }
         for line in LogFormatter.stats(stats, "Models by Status"):
             logger.info(line)
         return result
@@ -28,10 +27,11 @@ async def get_models_status(
         logger.error(LogFormatter.error("Failed to get models status", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/pending")
 @cached(expire=60)
 async def get_pending_models(
-    model_service: ModelService = Depends(get_model_service)
 ) -> List[Dict[str, Any]]:
     """Get all models waiting for evaluation"""
     try:
@@ -44,35 +44,35 @@ async def get_pending_models(
         logger.error(LogFormatter.error("Failed to get pending models", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/submit")
 async def submit_model(
-    model_data: Dict[str, Any],
-    model_service: ModelService = Depends(get_model_service)
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.section("MODEL SUBMISSION"))
-        user_id = model_data.pop('user_id', None)
         if not user_id:
             error_msg = "user_id is required"
             logger.error(LogFormatter.error("Validation failed", error_msg))
             raise ValueError(error_msg)
         # Log submission details
         submission_info = {
             "Model_ID": model_data.get("model_id"),
             "User": user_id,
             "Base_Model": model_data.get("base_model"),
             "Precision": model_data.get("precision"),
-            "Model_Type": model_data.get("model_type")
         }
         for line in LogFormatter.tree(submission_info, "Submission Details"):
             logger.info(line)
         result = await model_service.submit_model(model_data, user_id)
         logger.info(LogFormatter.success("Model submitted successfully"))
         return result
     except ValueError as e:
         logger.error(LogFormatter.error("Invalid submission data", e))
         raise HTTPException(status_code=400, detail=str(e))
@@ -80,37 +80,42 @@ async def submit_model(
         logger.error(LogFormatter.error("Submission failed", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/organization/{organization}/submissions")
 async def get_organization_submissions(
     organization: str,
     days: int = Query(default=7, ge=1, le=30),
-    model_service: ModelService = Depends(get_model_service)
 ) -> List[Dict[str, Any]]:
     """Get all submissions from an organization in the last n days"""
     try:
-        submissions = await model_service.get_organization_submissions(organization, days)
         return submissions
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/{model_id}/status")
 async def get_model_status(
-    model_id: str,
-    model_service: ModelService = Depends(get_model_service)
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
         status = await model_service.get_model_status(model_id)
         if status["status"] != "not_found":
             logger.info(LogFormatter.success("Status found"))
             for line in LogFormatter.tree(status, "Model Status"):
                 logger.info(line)
         else:
-            logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
         return status
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get model status", e))
-        raise HTTPException(status_code=500, detail=str(e))

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["models"])
 @router.get("/status")
 @cached(expire=300)
 async def get_models_status(
+    model_service: ModelService = Depends(get_model_service),
 ) -> Dict[str, List[Dict[str, Any]]]:
     """Get all models grouped by status"""
     try:
         logger.info(LogFormatter.info("Fetching status for all models"))
         result = await model_service.get_models()
+        stats = {status: len(models) for status, models in result.items()}
         for line in LogFormatter.stats(stats, "Models by Status"):
             logger.info(line)
         return result
         logger.error(LogFormatter.error("Failed to get models status", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/pending")
 @cached(expire=60)
 async def get_pending_models(
+    model_service: ModelService = Depends(get_model_service),
 ) -> List[Dict[str, Any]]:
     """Get all models waiting for evaluation"""
     try:
         logger.error(LogFormatter.error("Failed to get pending models", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/submit")
 async def submit_model(
+    model_data: Dict[str, Any], model_service: ModelService = Depends(get_model_service)
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.section("MODEL SUBMISSION"))
+        user_id = model_data.pop("user_id", None)
         if not user_id:
             error_msg = "user_id is required"
             logger.error(LogFormatter.error("Validation failed", error_msg))
             raise ValueError(error_msg)
         # Log submission details
         submission_info = {
             "Model_ID": model_data.get("model_id"),
             "User": user_id,
             "Base_Model": model_data.get("base_model"),
             "Precision": model_data.get("precision"),
+            "Model_Type": model_data.get("model_type"),
         }
         for line in LogFormatter.tree(submission_info, "Submission Details"):
             logger.info(line)
         result = await model_service.submit_model(model_data, user_id)
         logger.info(LogFormatter.success("Model submitted successfully"))
         return result
     except ValueError as e:
         logger.error(LogFormatter.error("Invalid submission data", e))
         raise HTTPException(status_code=400, detail=str(e))
         logger.error(LogFormatter.error("Submission failed", e))
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/organization/{organization}/submissions")
 async def get_organization_submissions(
     organization: str,
     days: int = Query(default=7, ge=1, le=30),
+    model_service: ModelService = Depends(get_model_service),
 ) -> List[Dict[str, Any]]:
     """Get all submissions from an organization in the last n days"""
     try:
+        submissions = await model_service.get_organization_submissions(
+            organization, days
+        )
         return submissions
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/{model_id}/status")
 async def get_model_status(
+    model_id: str, model_service: ModelService = Depends(get_model_service)
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
         status = await model_service.get_model_status(model_id)
         if status["status"] != "not_found":
             logger.info(LogFormatter.success("Status found"))
             for line in LogFormatter.tree(status, "Model Status"):
                 logger.info(line)
         else:
+            logger.warning(
+                LogFormatter.warning(f"No status found for model: {model_id}")
+            )
         return status
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get model status", e))
+        raise HTTPException(status_code=500, detail=str(e))

backend/app/api/endpoints/votes.py CHANGED Viewed

@@ -1,10 +1,9 @@
-from fastapi import APIRouter, HTTPException, Query, Depends, Response
 from typing import Dict, Any, List
 from app.services.votes import VoteService
 from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
 import logging
 from app.core.formatting import LogFormatter
-from datetime import datetime, timezone
 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -12,28 +11,31 @@ vote_service = VoteService()
 CACHE_TTL = 30  # 30 seconds cache
 def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
     """Build cache key for model votes"""
-    provider = kwargs.get('provider')
-    model = kwargs.get('model')
     key = build_cache_key(namespace, provider, model)
     logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
     return key
 def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
     """Build cache key for user votes"""
-    user_id = kwargs.get('user_id')
     key = build_cache_key(namespace, user_id)
     logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
     return key
 @router.post("/{model_id:path}")
 async def add_vote(
     response: Response,
     model_id: str,
     vote_type: str = Query(..., description="Type of vote (up/down)"),
     user_id: str = Query(..., description="HuggingFace username"),
-    vote_data: Dict[str, Any] = None
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.section("ADDING VOTE"))
@@ -41,50 +43,46 @@ async def add_vote(
             "Model": model_id,
             "User": user_id,
             "Type": vote_type,
-            "Config": vote_data or {}
         }
         for line in LogFormatter.tree(stats, "Vote Details"):
             logger.info(line)
         await vote_service.initialize()
         result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
         # Invalidate affected caches
         try:
             logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
-            provider, model = model_id.split('/', 1)
             # Build and invalidate cache keys
             model_cache_key = build_cache_key("model_votes", provider, model)
             user_cache_key = build_cache_key("user_votes", user_id)
             await invalidate_cache_key(model_cache_key)
             await invalidate_cache_key(user_cache_key)
-            cache_stats = {
-                "Model_Cache": model_cache_key,
-                "User_Cache": user_cache_key
-            }
             for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Failed to invalidate cache", e))
         # Add cache control headers
         response.headers["Cache-Control"] = "no-cache"
         return result
     except Exception as e:
         logger.error(LogFormatter.error("Failed to add vote", e))
         raise HTTPException(status_code=400, detail=str(e))
 @router.get("/model/{provider}/{model}")
 @cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
 async def get_model_votes(
-    response: Response,
-    provider: str,
-    model: str
 ) -> Dict[str, Any]:
     """Get all votes for a specific model"""
     try:
@@ -92,35 +90,37 @@ async def get_model_votes(
         await vote_service.initialize()
         model_id = f"{provider}/{model}"
         result = await vote_service.get_model_votes(model_id)
         # Add cache control headers
         response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
-        response.headers["Last-Modified"] = vote_service._last_sync.strftime("%a, %d %b %Y %H:%M:%S GMT")
         logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
         return result
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get model votes", e))
         raise HTTPException(status_code=400, detail=str(e))
 @router.get("/user/{user_id}")
 @cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
-async def get_user_votes(
-    response: Response,
-    user_id: str
-) -> List[Dict[str, Any]]:
     """Get all votes from a specific user"""
     try:
         logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
         await vote_service.initialize()
         votes = await vote_service.get_user_votes(user_id)
         # Add cache control headers
         response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
-        response.headers["Last-Modified"] = vote_service._last_sync.strftime("%a, %d %b %Y %H:%M:%S GMT")
         logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
         return votes
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get user votes", e))
-        raise HTTPException(status_code=400, detail=str(e))

+from fastapi import APIRouter, HTTPException, Query, Response
 from typing import Dict, Any, List
 from app.services.votes import VoteService
 from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
 import logging
 from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 router = APIRouter()
 CACHE_TTL = 30  # 30 seconds cache
 def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
     """Build cache key for model votes"""
+    provider = kwargs.get("provider")
+    model = kwargs.get("model")
     key = build_cache_key(namespace, provider, model)
     logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
     return key
 def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
     """Build cache key for user votes"""
+    user_id = kwargs.get("user_id")
     key = build_cache_key(namespace, user_id)
     logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
     return key
 @router.post("/{model_id:path}")
 async def add_vote(
     response: Response,
     model_id: str,
     vote_type: str = Query(..., description="Type of vote (up/down)"),
     user_id: str = Query(..., description="HuggingFace username"),
+    vote_data: Dict[str, Any] = None,
 ) -> Dict[str, Any]:
     try:
         logger.info(LogFormatter.section("ADDING VOTE"))
             "Model": model_id,
             "User": user_id,
             "Type": vote_type,
+            "Config": vote_data or {},
         }
         for line in LogFormatter.tree(stats, "Vote Details"):
             logger.info(line)
         await vote_service.initialize()
         result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
         # Invalidate affected caches
         try:
             logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
+            provider, model = model_id.split("/", 1)
             # Build and invalidate cache keys
             model_cache_key = build_cache_key("model_votes", provider, model)
             user_cache_key = build_cache_key("user_votes", user_id)
             await invalidate_cache_key(model_cache_key)
             await invalidate_cache_key(user_cache_key)
+            cache_stats = {"Model_Cache": model_cache_key, "User_Cache": user_cache_key}
             for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Failed to invalidate cache", e))
         # Add cache control headers
         response.headers["Cache-Control"] = "no-cache"
         return result
     except Exception as e:
         logger.error(LogFormatter.error("Failed to add vote", e))
         raise HTTPException(status_code=400, detail=str(e))
 @router.get("/model/{provider}/{model}")
 @cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
 async def get_model_votes(
+    response: Response, provider: str, model: str
 ) -> Dict[str, Any]:
     """Get all votes for a specific model"""
     try:
         await vote_service.initialize()
         model_id = f"{provider}/{model}"
         result = await vote_service.get_model_votes(model_id)
         # Add cache control headers
         response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
+        response.headers["Last-Modified"] = vote_service._last_sync.strftime(
+            "%a, %d %b %Y %H:%M:%S GMT"
+        )
         logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
         return result
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get model votes", e))
         raise HTTPException(status_code=400, detail=str(e))
 @router.get("/user/{user_id}")
 @cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
+async def get_user_votes(response: Response, user_id: str) -> List[Dict[str, Any]]:
     """Get all votes from a specific user"""
     try:
         logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
         await vote_service.initialize()
         votes = await vote_service.get_user_votes(user_id)
         # Add cache control headers
         response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
+        response.headers["Last-Modified"] = vote_service._last_sync.strftime(
+            "%a, %d %b %Y %H:%M:%S GMT"
+        )
         logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
         return votes
     except Exception as e:
         logger.error(LogFormatter.error("Failed to get user votes", e))
+        raise HTTPException(status_code=400, detail=str(e))

backend/app/api/router.py CHANGED Viewed

@@ -6,4 +6,4 @@ router = APIRouter()
 router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
 router.include_router(votes.router, prefix="/votes", tags=["votes"])
-router.include_router(models.router, prefix="/models", tags=["models"])

 router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
 router.include_router(votes.router, prefix="/votes", tags=["votes"])
+router.include_router(models.router, prefix="/models", tags=["models"])

backend/app/asgi.py CHANGED Viewed

@@ -1,14 +1,12 @@
 """
 ASGI entry point for the Open LLM Leaderboard API.
 """
-import os
-import uvicorn
 import logging
 import logging.config
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
-import sys
 from app.api.router import router
 from app.core.fastapi_cache import setup_cache
@@ -51,12 +49,12 @@ LOGGING_CONFIG = {
             "handlers": ["default"],
             "level": "WARNING",
             "propagate": False,
-        }
     },
     "root": {
         "handlers": ["default"],
         "level": "WARNING",
-    }
 }
 # Apply logging configuration
@@ -85,22 +83,33 @@ app.add_middleware(GZipMiddleware, minimum_size=500)
 # Include API router
 app.include_router(router, prefix="/api")
 @app.on_event("startup")
 async def startup_event():
     """Initialize services on startup"""
     logger.info("\n")
     logger.info(LogFormatter.section("APPLICATION STARTUP"))
     # Log HF configuration
     logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
     logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
-    logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
-    logger.info(LogFormatter.info(f"Using repositories:"))
     logger.info(LogFormatter.info(f"  - Queue: {hf_config.QUEUE_REPO}"))
     logger.info(LogFormatter.info(f"  - Aggregated: {hf_config.AGGREGATED_REPO}"))
     logger.info(LogFormatter.info(f"  - Votes: {hf_config.VOTES_REPO}"))
-    logger.info(LogFormatter.info(f"  - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"))
     # Setup cache
     setup_cache()
-    logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))

 """
 ASGI entry point for the Open LLM Leaderboard API.
 """
 import logging
 import logging.config
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from app.api.router import router
 from app.core.fastapi_cache import setup_cache
             "handlers": ["default"],
             "level": "WARNING",
             "propagate": False,
+        },
     },
     "root": {
         "handlers": ["default"],
         "level": "WARNING",
+    },
 }
 # Apply logging configuration
 # Include API router
 app.include_router(router, prefix="/api")
 @app.on_event("startup")
 async def startup_event():
     """Initialize services on startup"""
     logger.info("\n")
     logger.info(LogFormatter.section("APPLICATION STARTUP"))
     # Log HF configuration
     logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
     logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
+    logger.info(
+        LogFormatter.info(
+            f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"
+        )
+    )
+    logger.info(LogFormatter.info("Using repositories:"))
     logger.info(LogFormatter.info(f"  - Queue: {hf_config.QUEUE_REPO}"))
     logger.info(LogFormatter.info(f"  - Aggregated: {hf_config.AGGREGATED_REPO}"))
     logger.info(LogFormatter.info(f"  - Votes: {hf_config.VOTES_REPO}"))
+    logger.info(
+        LogFormatter.info(
+            f"  - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"
+        )
+    )
     # Setup cache
     setup_cache()
+    logger.info(
+        LogFormatter.success("FastAPI Cache initialized with in-memory backend")
+    )

backend/app/config/base.py CHANGED Viewed

@@ -8,7 +8,9 @@ WORKERS = 4
 RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
 # CORS configuration
-ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
 # Cache configuration
 CACHE_TTL = int(os.environ.get("CACHE_TTL", 300))  # 5 minutes default
@@ -23,7 +25,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 HF_ORGANIZATION = "stacklok"
 API = {
     "INFERENCE": "https://api-inference.huggingface.co/models",
-    "HUB": "https://huggingface.co"
 }
 # Cache paths

 RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
 # CORS configuration
+ORIGINS = (
+    ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
+)
 # Cache configuration
 CACHE_TTL = int(os.environ.get("CACHE_TTL", 300))  # 5 minutes default
 HF_ORGANIZATION = "stacklok"
 API = {
     "INFERENCE": "https://api-inference.huggingface.co/models",
+    "HUB": "https://huggingface.co",
 }
 # Cache paths

backend/app/config/hf_config.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import os
 import logging
-from typing import Optional
 from huggingface_hub import HfApi
-from pathlib import Path
 from app.core.cache import cache_config
 logger = logging.getLogger(__name__)
@@ -13,7 +11,9 @@ HF_ORGANIZATION = "stacklok"
 # Get HF token directly from environment
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
-    logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
 # Initialize HF API
 API = HfApi(token=HF_TOKEN)
@@ -22,7 +22,7 @@ API = HfApi(token=HF_TOKEN)
 QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
 AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
 VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
-OFFICIAL_PROVIDERS_REPO = f"open-llm-leaderboard/official-providers"
 logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
 logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")

 import os
 import logging
 from huggingface_hub import HfApi
 from app.core.cache import cache_config
 logger = logging.getLogger(__name__)
 # Get HF token directly from environment
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
+    logger.warning(
+        "HF_TOKEN not found in environment variables. Some features may be limited."
+    )
 # Initialize HF API
 API = HfApi(token=HF_TOKEN)
 QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
 AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
 VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
+OFFICIAL_PROVIDERS_REPO = "open-llm-leaderboard/official-providers"
 logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
 logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")

backend/app/config/logging_config.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import logging
-import sys
 from tqdm import tqdm
 def get_tqdm_handler():
     """
     Creates a special handler for tqdm that doesn't interfere with other logs.
     """
     class TqdmLoggingHandler(logging.Handler):
         def emit(self, record):
             try:
@@ -17,22 +18,25 @@ def get_tqdm_handler():
     return TqdmLoggingHandler()
 def setup_service_logger(service_name: str) -> logging.Logger:
     """
     Configure a specific logger for a given service.
     """
     logger = logging.getLogger(f"app.services.{service_name}")
     # If the logger already has handlers, don't reconfigure it
     if logger.handlers:
         return logger
     # Add tqdm handler for this service
     tqdm_handler = get_tqdm_handler()
-    tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
     logger.addHandler(tqdm_handler)
     # Don't propagate logs to parent loggers
     logger.propagate = False
-    return logger

 import logging
 from tqdm import tqdm
 def get_tqdm_handler():
     """
     Creates a special handler for tqdm that doesn't interfere with other logs.
     """
     class TqdmLoggingHandler(logging.Handler):
         def emit(self, record):
             try:
     return TqdmLoggingHandler()
 def setup_service_logger(service_name: str) -> logging.Logger:
     """
     Configure a specific logger for a given service.
     """
     logger = logging.getLogger(f"app.services.{service_name}")
     # If the logger already has handlers, don't reconfigure it
     if logger.handlers:
         return logger
     # Add tqdm handler for this service
     tqdm_handler = get_tqdm_handler()
+    tqdm_handler.setFormatter(
+        logging.Formatter("%(name)s - %(levelname)s - %(message)s")
+    )
     logger.addHandler(tqdm_handler)
     # Don't propagate logs to parent loggers
     logger.propagate = False
+    return logger

backend/app/core/cache.py CHANGED Viewed

@@ -10,11 +10,12 @@ from app.config.base import (
     MODELS_CACHE,
     VOTES_CACHE,
     EVAL_CACHE,
-    CACHE_TTL
 )
 logger = logging.getLogger(__name__)
 class CacheConfig:
     def __init__(self):
         # Get cache paths from config
@@ -23,59 +24,60 @@ class CacheConfig:
         self.models_cache = MODELS_CACHE
         self.votes_cache = VOTES_CACHE
         self.eval_cache = EVAL_CACHE
         # Specific files
         self.votes_file = self.votes_cache / "votes_data.jsonl"
         self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
         # Cache TTL
         self.cache_ttl = timedelta(seconds=CACHE_TTL)
         self._initialize_cache_dirs()
         self._setup_environment()
     def _initialize_cache_dirs(self):
         """Initialize all necessary cache directories"""
         try:
             logger.info(LogFormatter.section("CACHE INITIALIZATION"))
             cache_dirs = {
                 "Root": self.cache_root,
                 "Datasets": self.datasets_cache,
                 "Models": self.models_cache,
                 "Votes": self.votes_cache,
-                "Eval": self.eval_cache
             }
             for name, cache_dir in cache_dirs.items():
                 cache_dir.mkdir(parents=True, exist_ok=True)
-                logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
         except Exception as e:
             logger.error(LogFormatter.error("Failed to create cache directories", e))
             raise
     def _setup_environment(self):
         """Configure HuggingFace environment variables"""
         logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
         env_vars = {
             "HF_HOME": str(self.cache_root),
-            "HF_DATASETS_CACHE": str(self.datasets_cache)
         }
         for var, value in env_vars.items():
             os.environ[var] = value
             logger.info(LogFormatter.info(f"Set {var}={value}"))
     def get_cache_path(self, cache_type: str) -> Path:
         """Returns the path for a specific cache type"""
         cache_paths = {
             "datasets": self.datasets_cache,
             "models": self.models_cache,
             "votes": self.votes_cache,
-            "eval": self.eval_cache
         }
         return cache_paths.get(cache_type, self.cache_root)
@@ -83,13 +85,12 @@ class CacheConfig:
         """Flush specified cache or all caches if no type is specified"""
         try:
             if cache_type:
-                logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
                 cache_dir = self.get_cache_path(cache_type)
                 if cache_dir.exists():
-                    stats = {
-                        "Cache_Type": cache_type,
-                        "Directory": str(cache_dir)
-                    }
                     for line in LogFormatter.tree(stats, "Cache Details"):
                         logger.info(line)
                     shutil.rmtree(cache_dir)
@@ -100,10 +101,11 @@ class CacheConfig:
                 for cache_type in ["datasets", "models", "votes", "eval"]:
                     self.flush_cache(cache_type)
                 logger.info(LogFormatter.success("All caches cleared successfully"))
         except Exception as e:
             logger.error(LogFormatter.error("Failed to flush cache", e))
             raise
 # Singleton instance of cache configuration
-cache_config = CacheConfig()

     MODELS_CACHE,
     VOTES_CACHE,
     EVAL_CACHE,
+    CACHE_TTL,
 )
 logger = logging.getLogger(__name__)
 class CacheConfig:
     def __init__(self):
         # Get cache paths from config
         self.models_cache = MODELS_CACHE
         self.votes_cache = VOTES_CACHE
         self.eval_cache = EVAL_CACHE
         # Specific files
         self.votes_file = self.votes_cache / "votes_data.jsonl"
         self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
         # Cache TTL
         self.cache_ttl = timedelta(seconds=CACHE_TTL)
         self._initialize_cache_dirs()
         self._setup_environment()
     def _initialize_cache_dirs(self):
         """Initialize all necessary cache directories"""
         try:
             logger.info(LogFormatter.section("CACHE INITIALIZATION"))
             cache_dirs = {
                 "Root": self.cache_root,
                 "Datasets": self.datasets_cache,
                 "Models": self.models_cache,
                 "Votes": self.votes_cache,
+                "Eval": self.eval_cache,
             }
             for name, cache_dir in cache_dirs.items():
                 cache_dir.mkdir(parents=True, exist_ok=True)
+                logger.info(
+                    LogFormatter.success(f"{name} cache directory: {cache_dir}")
+                )
         except Exception as e:
             logger.error(LogFormatter.error("Failed to create cache directories", e))
             raise
     def _setup_environment(self):
         """Configure HuggingFace environment variables"""
         logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
         env_vars = {
             "HF_HOME": str(self.cache_root),
+            "HF_DATASETS_CACHE": str(self.datasets_cache),
         }
         for var, value in env_vars.items():
             os.environ[var] = value
             logger.info(LogFormatter.info(f"Set {var}={value}"))
     def get_cache_path(self, cache_type: str) -> Path:
         """Returns the path for a specific cache type"""
         cache_paths = {
             "datasets": self.datasets_cache,
             "models": self.models_cache,
             "votes": self.votes_cache,
+            "eval": self.eval_cache,
         }
         return cache_paths.get(cache_type, self.cache_root)
         """Flush specified cache or all caches if no type is specified"""
         try:
             if cache_type:
+                logger.info(
+                    LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE")
+                )
                 cache_dir = self.get_cache_path(cache_type)
                 if cache_dir.exists():
+                    stats = {"Cache_Type": cache_type, "Directory": str(cache_dir)}
                     for line in LogFormatter.tree(stats, "Cache Details"):
                         logger.info(line)
                     shutil.rmtree(cache_dir)
                 for cache_type in ["datasets", "models", "votes", "eval"]:
                     self.flush_cache(cache_type)
                 logger.info(LogFormatter.success("All caches cleared successfully"))
         except Exception as e:
             logger.error(LogFormatter.error("Failed to flush cache", e))
             raise
 # Singleton instance of cache configuration
+cache_config = CacheConfig()

backend/app/core/fastapi_cache.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from fastapi_cache import FastAPICache
 from fastapi_cache.backends.inmemory import InMemoryBackend
 from fastapi_cache.decorator import cache
-from datetime import timedelta
 from app.config import CACHE_TTL
 import logging
 from app.core.formatting import LogFormatter
@@ -9,6 +8,7 @@ from typing import Optional, Any
 logger = logging.getLogger(__name__)
 class CustomInMemoryBackend(InMemoryBackend):
     def __init__(self):
         """Initialize the cache backend"""
@@ -23,7 +23,9 @@ class CustomInMemoryBackend(InMemoryBackend):
                 return True
             return False
         except Exception as e:
-            logger.error(LogFormatter.error(f"Failed to delete key {key} from cache", e))
             return False
     async def get(self, key: str) -> Any:
@@ -34,43 +36,43 @@ class CustomInMemoryBackend(InMemoryBackend):
         """Set a value in the cache"""
         self.cache[key] = value
 def setup_cache():
     """Initialize FastAPI Cache with in-memory backend"""
     try:
         logger.info(LogFormatter.section("CACHE INITIALIZATION"))
-        FastAPICache.init(
-            backend=CustomInMemoryBackend(),
-            prefix="fastapi-cache"
-        )
         logger.info(LogFormatter.success("Cache initialized successfully"))
     except Exception as e:
         logger.error(LogFormatter.error("Failed to initialize cache", e))
         raise
 async def invalidate_cache_key(key: str):
     """Invalidate a specific cache key"""
     try:
         backend = FastAPICache.get_backend()
-        if hasattr(backend, 'delete'):
             await backend.delete(key)
             logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
         else:
-            logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
     except Exception as e:
         logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
 def build_cache_key(*args) -> str:
     """Build a cache key from multiple arguments"""
     return ":".join(str(arg) for arg in args if arg is not None)
 def cached(expire: int = CACHE_TTL, key_builder=None):
     """Decorator for caching endpoint responses
     Args:
         expire (int): Cache TTL in seconds
         key_builder (callable, optional): Custom key builder function
     """
-    return cache(
-        expire=expire,
-        key_builder=key_builder
-    )

 from fastapi_cache import FastAPICache
 from fastapi_cache.backends.inmemory import InMemoryBackend
 from fastapi_cache.decorator import cache
 from app.config import CACHE_TTL
 import logging
 from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class CustomInMemoryBackend(InMemoryBackend):
     def __init__(self):
         """Initialize the cache backend"""
                 return True
             return False
         except Exception as e:
+            logger.error(
+                LogFormatter.error(f"Failed to delete key {key} from cache", e)
+            )
             return False
     async def get(self, key: str) -> Any:
         """Set a value in the cache"""
         self.cache[key] = value
 def setup_cache():
     """Initialize FastAPI Cache with in-memory backend"""
     try:
         logger.info(LogFormatter.section("CACHE INITIALIZATION"))
+        FastAPICache.init(backend=CustomInMemoryBackend(), prefix="fastapi-cache")
         logger.info(LogFormatter.success("Cache initialized successfully"))
     except Exception as e:
         logger.error(LogFormatter.error("Failed to initialize cache", e))
         raise
 async def invalidate_cache_key(key: str):
     """Invalidate a specific cache key"""
     try:
         backend = FastAPICache.get_backend()
+        if hasattr(backend, "delete"):
             await backend.delete(key)
             logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
         else:
+            logger.warning(
+                LogFormatter.warning("Cache backend does not support deletion")
+            )
     except Exception as e:
         logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
 def build_cache_key(*args) -> str:
     """Build a cache key from multiple arguments"""
     return ":".join(str(arg) for arg in args if arg is not None)
 def cached(expire: int = CACHE_TTL, key_builder=None):
     """Decorator for caching endpoint responses
     Args:
         expire (int): Cache TTL in seconds
         key_builder (callable, optional): Custom key builder function
     """
+    return cache(expire=expire, key_builder=key_builder)

backend/app/core/formatting.py CHANGED Viewed

@@ -3,48 +3,49 @@ from typing import Dict, Any, List, Optional
 logger = logging.getLogger(__name__)
 class LogFormatter:
     """Utility class for consistent log formatting across the application"""
     @staticmethod
     def section(title: str) -> str:
         """Create a section header"""
-        return f"\n{'='*20} {title.upper()} {'='*20}"
     @staticmethod
     def subsection(title: str) -> str:
         """Create a subsection header"""
-        return f"\n{'─'*20} {title} {'─'*20}"
     @staticmethod
     def tree(items: Dict[str, Any], title: str = None) -> List[str]:
         """Create a tree view of dictionary data"""
         lines = []
         if title:
             lines.append(f"📊 {title}:")
         # Get the maximum length for alignment
         max_key_length = max(len(str(k)) for k in items.keys())
         # Format each item
         for i, (key, value) in enumerate(items.items()):
             prefix = "└──" if i == len(items) - 1 else "├──"
             if isinstance(value, (int, float)):
                 value = f"{value:,}"  # Add thousand separators
             lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
         return lines
     @staticmethod
     def stats(stats: Dict[str, int], title: str = None) -> List[str]:
         """Format statistics with icons"""
         lines = []
         if title:
             lines.append(f"📊 {title}:")
         # Get the maximum length for alignment
         max_key_length = max(len(str(k)) for k in stats.keys())
         # Format each stat with an appropriate icon
         icons = {
             "total": "📌",
@@ -59,19 +60,19 @@ class LogFormatter:
             "cached": "💾",
             "size": "📏",
             "time": "⏱️",
-            "rate": "🚀"
         }
         # Format each item
         for i, (key, value) in enumerate(stats.items()):
             prefix = "└──" if i == len(stats) - 1 else "├──"
-            icon = icons.get(key.lower().split('_')[0], "•")
             if isinstance(value, (int, float)):
                 value = f"{value:,}"  # Add thousand separators
             lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
         return lines
     @staticmethod
     def progress_bar(current: int, total: int, width: int = 20) -> str:
         """Create a progress bar"""
@@ -79,7 +80,7 @@ class LogFormatter:
         filled = "█" * (percentage * width // 100)
         empty = "░" * (width - len(filled))
         return f"{filled}{empty} {percentage:3d}%"
     @staticmethod
     def error(message: str, error: Optional[Exception] = None) -> str:
         """Format error message"""
@@ -87,18 +88,18 @@ class LogFormatter:
         if error:
             error_msg += f"\n   └── Details: {str(error)}"
         return error_msg
     @staticmethod
     def success(message: str) -> str:
         """Format success message"""
         return f"✅ {message}"
     @staticmethod
     def warning(message: str) -> str:
         """Format warning message"""
         return f"⚠️  {message}"
     @staticmethod
     def info(message: str) -> str:
         """Format info message"""
-        return f"ℹ️  {message}"

 logger = logging.getLogger(__name__)
 class LogFormatter:
     """Utility class for consistent log formatting across the application"""
     @staticmethod
     def section(title: str) -> str:
         """Create a section header"""
+        return f"\n{'=' * 20} {title.upper()} {'=' * 20}"
     @staticmethod
     def subsection(title: str) -> str:
         """Create a subsection header"""
+        return f"\n{'─' * 20} {title} {'─' * 20}"
     @staticmethod
     def tree(items: Dict[str, Any], title: str = None) -> List[str]:
         """Create a tree view of dictionary data"""
         lines = []
         if title:
             lines.append(f"📊 {title}:")
         # Get the maximum length for alignment
         max_key_length = max(len(str(k)) for k in items.keys())
         # Format each item
         for i, (key, value) in enumerate(items.items()):
             prefix = "└──" if i == len(items) - 1 else "├──"
             if isinstance(value, (int, float)):
                 value = f"{value:,}"  # Add thousand separators
             lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
         return lines
     @staticmethod
     def stats(stats: Dict[str, int], title: str = None) -> List[str]:
         """Format statistics with icons"""
         lines = []
         if title:
             lines.append(f"📊 {title}:")
         # Get the maximum length for alignment
         max_key_length = max(len(str(k)) for k in stats.keys())
         # Format each stat with an appropriate icon
         icons = {
             "total": "📌",
             "cached": "💾",
             "size": "📏",
             "time": "⏱️",
+            "rate": "🚀",
         }
         # Format each item
         for i, (key, value) in enumerate(stats.items()):
             prefix = "└──" if i == len(stats) - 1 else "├──"
+            icon = icons.get(key.lower().split("_")[0], "•")
             if isinstance(value, (int, float)):
                 value = f"{value:,}"  # Add thousand separators
             lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
         return lines
     @staticmethod
     def progress_bar(current: int, total: int, width: int = 20) -> str:
         """Create a progress bar"""
         filled = "█" * (percentage * width // 100)
         empty = "░" * (width - len(filled))
         return f"{filled}{empty} {percentage:3d}%"
     @staticmethod
     def error(message: str, error: Optional[Exception] = None) -> str:
         """Format error message"""
         if error:
             error_msg += f"\n   └── Details: {str(error)}"
         return error_msg
     @staticmethod
     def success(message: str) -> str:
         """Format success message"""
         return f"✅ {message}"
     @staticmethod
     def warning(message: str) -> str:
         """Format warning message"""
         return f"⚠️  {message}"
     @staticmethod
     def info(message: str) -> str:
         """Format info message"""
+        return f"ℹ️  {message}"

backend/app/main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from fastapi import FastAPI
 from app.config.logging_config import setup_logging
 import logging
 # Initialize logging configuration
@@ -8,11 +9,11 @@ logger = logging.getLogger(__name__)
 app = FastAPI(title="Open LLM Leaderboard API")
 @app.on_event("startup")
 async def startup_event():
     logger.info("Starting up the application...")
-# Import and include routers after app initialization
-from app.api import models, votes
 app.include_router(models.router, prefix="/api", tags=["models"])
-app.include_router(votes.router, prefix="/api", tags=["votes"])

 from fastapi import FastAPI
 from app.config.logging_config import setup_logging
+from app.api import models, votes
 import logging
 # Initialize logging configuration
 app = FastAPI(title="Open LLM Leaderboard API")
 @app.on_event("startup")
 async def startup_event():
     logger.info("Starting up the application...")
 app.include_router(models.router, prefix="/api", tags=["models"])
+app.include_router(votes.router, prefix="/api", tags=["votes"])

backend/app/services/hf_service.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Optional
-from huggingface_hub import HfApi
 from app.config import HF_TOKEN, API
 from app.core.cache import cache_config
 from app.core.formatting import LogFormatter
@@ -7,6 +6,7 @@ import logging
 logger = logging.getLogger(__name__)
 class HuggingFaceService:
     def __init__(self):
         self.api = API
@@ -31,7 +31,11 @@ class HuggingFaceService:
         try:
             logger.info(LogFormatter.info("Fetching user information..."))
             info = self.api.get_token_permission()
-            logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
             return info
         except Exception as e:
             logger.error(LogFormatter.error("Failed to get user info", e))
@@ -39,7 +43,9 @@ class HuggingFaceService:
     def _log_repo_operation(self, operation: str, repo: str, details: str = None):
         """Helper to log repository operations"""
-        logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
         stats = {
             "Operation": operation,
             "Repository": repo,
@@ -47,4 +53,4 @@ class HuggingFaceService:
         if details:
             stats["Details"] = details
         for line in LogFormatter.tree(stats):
-            logger.info(line)

 from typing import Optional
 from app.config import HF_TOKEN, API
 from app.core.cache import cache_config
 from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class HuggingFaceService:
     def __init__(self):
         self.api = API
         try:
             logger.info(LogFormatter.info("Fetching user information..."))
             info = self.api.get_token_permission()
+            logger.info(
+                LogFormatter.success(
+                    f"User info retrieved for: {info.get('user', 'Unknown')}"
+                )
+            )
             return info
         except Exception as e:
             logger.error(LogFormatter.error("Failed to get user info", e))
     def _log_repo_operation(self, operation: str, repo: str, details: str = None):
         """Helper to log repository operations"""
+        logger.info(
+            LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}")
+        )
         stats = {
             "Operation": operation,
             "Repository": repo,
         if details:
             stats["Details"] = details
         for line in LogFormatter.tree(stats):
+            logger.info(line)

backend/app/services/leaderboard.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from app.core.cache import cache_config
-from datetime import datetime
 from typing import List, Dict, Any
 import datasets
 from fastapi import HTTPException
@@ -9,33 +8,38 @@ from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class LeaderboardService:
     def __init__(self):
         pass
     async def fetch_raw_data(self) -> List[Dict[str, Any]]:
         """Fetch raw leaderboard data from HuggingFace dataset"""
         try:
             logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
-            logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"))
             dataset = datasets.load_dataset(
                 f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
-                cache_dir=cache_config.get_cache_path("datasets")
             )["train"]
             df = dataset.to_pandas()
-            data = df.to_dict('records')
             stats = {
                 "Total_Entries": len(data),
-                "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
             }
             for line in LogFormatter.stats(stats, "Dataset Statistics"):
                 logger.info(line)
             return data
         except Exception as e:
             logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
             raise HTTPException(status_code=500, detail=str(e))
@@ -44,53 +48,60 @@ class LeaderboardService:
         """Get formatted leaderboard data"""
         try:
             logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
             raw_data = await self.fetch_raw_data()
             formatted_data = []
             type_counts = {}
             error_count = 0
             # Initialize progress tracking
             total_items = len(raw_data)
             logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
             for i, item in enumerate(raw_data, 1):
                 try:
                     formatted_item = await self.transform_data(item)
                     formatted_data.append(formatted_item)
                     # Count model types
                     model_type = formatted_item["model"]["type"]
                     type_counts[model_type] = type_counts.get(model_type, 0) + 1
                 except Exception as e:
                     error_count += 1
-                    logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
                     continue
                 # Log progress every 10%
                 if i % max(1, total_items // 10) == 0:
-                    progress = (i / total_items) * 100
-                    logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
             # Log final statistics
             stats = {
                 "Total_Processed": total_items,
                 "Successful": len(formatted_data),
-                "Failed": error_count
             }
             logger.info(LogFormatter.section("PROCESSING SUMMARY"))
             for line in LogFormatter.stats(stats, "Processing Statistics"):
                 logger.info(line)
             # Log model type distribution
             type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
             logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
             for line in LogFormatter.stats(type_stats):
                 logger.info(line)
             return formatted_data
         except Exception as e:
             logger.error(LogFormatter.error("Failed to format leaderboard data", e))
             raise HTTPException(status_code=500, detail=str(e))
@@ -100,42 +111,44 @@ class LeaderboardService:
         try:
             # Extract model name for logging
             model_name = data.get("fullname", "Unknown")
-            logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
             # Create unique ID combining model name, precision, sha and chat template status
             unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
             evaluations = {
                 "ifeval": {
                     "name": "IFEval",
                     "value": data.get("IFEval Raw", 0),
-                    "normalized_score": data.get("IFEval", 0)
                 },
                 "bbh": {
                     "name": "BBH",
                     "value": data.get("BBH Raw", 0),
-                    "normalized_score": data.get("BBH", 0)
                 },
                 "math": {
                     "name": "MATH Level 5",
                     "value": data.get("MATH Lvl 5 Raw", 0),
-                    "normalized_score": data.get("MATH Lvl 5", 0)
                 },
                 "gpqa": {
                     "name": "GPQA",
                     "value": data.get("GPQA Raw", 0),
-                    "normalized_score": data.get("GPQA", 0)
                 },
                 "musr": {
                     "name": "MUSR",
                     "value": data.get("MUSR Raw", 0),
-                    "normalized_score": data.get("MUSR", 0)
                 },
                 "mmlu_pro": {
                     "name": "MMLU-PRO",
                     "value": data.get("MMLU-PRO Raw", 0),
-                    "normalized_score": data.get("MMLU-PRO", 0)
-                }
             }
             features = {
@@ -143,7 +156,7 @@ class LeaderboardService:
                 "is_merged": data.get("Merged", False),
                 "is_moe": data.get("MoE", False),
                 "is_flagged": data.get("Flagged", False),
-                "is_official_provider": data.get("Official Providers", False)
             }
             metadata = {
@@ -154,18 +167,18 @@ class LeaderboardService:
                 "hub_license": data.get("Hub License"),
                 "hub_hearts": data.get("Hub ❤️"),
                 "params_billions": data.get("#Params (B)"),
-                "co2_cost": data.get("CO₂ cost (kg)", 0)
             }
             # Clean model type by removing emojis if present
             original_type = data.get("Type", "")
             model_type = original_type.lower().strip()
             # Remove emojis and parentheses
             if "(" in model_type:
                 model_type = model_type.split("(")[0].strip()
-            model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
             # Map old model types to new ones
             model_type_mapping = {
                 "fine-tuned": "fined-tuned-on-domain-specific-dataset",
@@ -175,14 +188,18 @@ class LeaderboardService:
                 "ft": "fined-tuned-on-domain-specific-dataset",
                 "finetuning": "fined-tuned-on-domain-specific-dataset",
                 "fine tuning": "fined-tuned-on-domain-specific-dataset",
-                "fine-tuning": "fined-tuned-on-domain-specific-dataset"
             }
             mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
             if mapped_type != model_type:
-                logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
             transformed_data = {
                 "id": unique_id,
                 "model": {
@@ -193,16 +210,22 @@ class LeaderboardService:
                     "weight_type": data.get("Weight type"),
                     "architecture": data.get("Architecture"),
                     "average_score": data.get("Average ⬆️"),
-                    "has_chat_template": data.get("Chat Template", False)
                 },
                 "evaluations": evaluations,
                 "features": features,
-                "metadata": metadata
             }
-            logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
             return transformed_data
         except Exception as e:
-            logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
             raise

 from app.core.cache import cache_config
 from typing import List, Dict, Any
 import datasets
 from fastapi import HTTPException
 logger = logging.getLogger(__name__)
 class LeaderboardService:
     def __init__(self):
         pass
     async def fetch_raw_data(self) -> List[Dict[str, Any]]:
         """Fetch raw leaderboard data from HuggingFace dataset"""
         try:
             logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
+            logger.info(
+                LogFormatter.info(
+                    f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"
+                )
+            )
             dataset = datasets.load_dataset(
                 f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
+                cache_dir=cache_config.get_cache_path("datasets"),
             )["train"]
             df = dataset.to_pandas()
+            data = df.to_dict("records")
             stats = {
                 "Total_Entries": len(data),
+                "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB",
             }
             for line in LogFormatter.stats(stats, "Dataset Statistics"):
                 logger.info(line)
             return data
         except Exception as e:
             logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
             raise HTTPException(status_code=500, detail=str(e))
         """Get formatted leaderboard data"""
         try:
             logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
             raw_data = await self.fetch_raw_data()
             formatted_data = []
             type_counts = {}
             error_count = 0
             # Initialize progress tracking
             total_items = len(raw_data)
             logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
             for i, item in enumerate(raw_data, 1):
                 try:
                     formatted_item = await self.transform_data(item)
                     formatted_data.append(formatted_item)
                     # Count model types
                     model_type = formatted_item["model"]["type"]
                     type_counts[model_type] = type_counts.get(model_type, 0) + 1
                 except Exception as e:
                     error_count += 1
+                    logger.error(
+                        LogFormatter.error(
+                            f"Failed to format entry {i}/{total_items}", e
+                        )
+                    )
                     continue
                 # Log progress every 10%
                 if i % max(1, total_items // 10) == 0:
+                    logger.info(
+                        LogFormatter.info(
+                            f"Progress: {LogFormatter.progress_bar(i, total_items)}"
+                        )
+                    )
             # Log final statistics
             stats = {
                 "Total_Processed": total_items,
                 "Successful": len(formatted_data),
+                "Failed": error_count,
             }
             logger.info(LogFormatter.section("PROCESSING SUMMARY"))
             for line in LogFormatter.stats(stats, "Processing Statistics"):
                 logger.info(line)
             # Log model type distribution
             type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
             logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
             for line in LogFormatter.stats(type_stats):
                 logger.info(line)
             return formatted_data
         except Exception as e:
             logger.error(LogFormatter.error("Failed to format leaderboard data", e))
             raise HTTPException(status_code=500, detail=str(e))
         try:
             # Extract model name for logging
             model_name = data.get("fullname", "Unknown")
+            logger.debug(
+                LogFormatter.info(f"Transforming data for model: {model_name}")
+            )
             # Create unique ID combining model name, precision, sha and chat template status
             unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
             evaluations = {
                 "ifeval": {
                     "name": "IFEval",
                     "value": data.get("IFEval Raw", 0),
+                    "normalized_score": data.get("IFEval", 0),
                 },
                 "bbh": {
                     "name": "BBH",
                     "value": data.get("BBH Raw", 0),
+                    "normalized_score": data.get("BBH", 0),
                 },
                 "math": {
                     "name": "MATH Level 5",
                     "value": data.get("MATH Lvl 5 Raw", 0),
+                    "normalized_score": data.get("MATH Lvl 5", 0),
                 },
                 "gpqa": {
                     "name": "GPQA",
                     "value": data.get("GPQA Raw", 0),
+                    "normalized_score": data.get("GPQA", 0),
                 },
                 "musr": {
                     "name": "MUSR",
                     "value": data.get("MUSR Raw", 0),
+                    "normalized_score": data.get("MUSR", 0),
                 },
                 "mmlu_pro": {
                     "name": "MMLU-PRO",
                     "value": data.get("MMLU-PRO Raw", 0),
+                    "normalized_score": data.get("MMLU-PRO", 0),
+                },
             }
             features = {
                 "is_merged": data.get("Merged", False),
                 "is_moe": data.get("MoE", False),
                 "is_flagged": data.get("Flagged", False),
+                "is_official_provider": data.get("Official Providers", False),
             }
             metadata = {
                 "hub_license": data.get("Hub License"),
                 "hub_hearts": data.get("Hub ❤️"),
                 "params_billions": data.get("#Params (B)"),
+                "co2_cost": data.get("CO₂ cost (kg)", 0),
             }
             # Clean model type by removing emojis if present
             original_type = data.get("Type", "")
             model_type = original_type.lower().strip()
             # Remove emojis and parentheses
             if "(" in model_type:
                 model_type = model_type.split("(")[0].strip()
+            model_type = "".join(c for c in model_type if c not in "🔶🟢🟩💬🤝🌸 ")
             # Map old model types to new ones
             model_type_mapping = {
                 "fine-tuned": "fined-tuned-on-domain-specific-dataset",
                 "ft": "fined-tuned-on-domain-specific-dataset",
                 "finetuning": "fined-tuned-on-domain-specific-dataset",
                 "fine tuning": "fined-tuned-on-domain-specific-dataset",
+                "fine-tuning": "fined-tuned-on-domain-specific-dataset",
             }
             mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
             if mapped_type != model_type:
+                logger.debug(
+                    LogFormatter.info(
+                        f"Model type mapped: {original_type} -> {mapped_type}"
+                    )
+                )
             transformed_data = {
                 "id": unique_id,
                 "model": {
                     "weight_type": data.get("Weight type"),
                     "architecture": data.get("Architecture"),
                     "average_score": data.get("Average ⬆️"),
+                    "has_chat_template": data.get("Chat Template", False),
                 },
                 "evaluations": evaluations,
                 "features": features,
+                "metadata": metadata,
             }
+            logger.debug(
+                LogFormatter.success(f"Successfully transformed data for {model_name}")
+            )
             return transformed_data
         except Exception as e:
+            logger.error(
+                LogFormatter.error(
+                    f"Failed to transform data for {data.get('fullname', 'Unknown')}", e
+                )
+            )
             raise

backend/app/services/models.py CHANGED Viewed

@@ -5,22 +5,16 @@ import os
 from pathlib import Path
 import logging
 import aiohttp
-import asyncio
 import time
-from huggingface_hub import HfApi, CommitOperationAdd
 from huggingface_hub.utils import build_hf_headers
 from datasets import disable_progress_bar
 import sys
 import contextlib
-from concurrent.futures import ThreadPoolExecutor
 import tempfile
-from app.config import (
-    QUEUE_REPO,
-    HF_TOKEN,
-    EVAL_REQUESTS_PATH
-)
-from app.config.hf_config import HF_ORGANIZATION, QUEUE_REPO
 from app.services.hf_service import HuggingFaceService
 from app.utils.model_validation import ModelValidator
 from app.services.votes import VoteService
@@ -32,12 +26,13 @@ disable_progress_bar()
 logger = logging.getLogger(__name__)
 # Context manager to temporarily disable stdout and stderr
 @contextlib.contextmanager
 def suppress_output():
     stdout = sys.stdout
     stderr = sys.stderr
-    devnull = open(os.devnull, 'w')
     try:
         sys.stdout = devnull
         sys.stderr = devnull
@@ -47,6 +42,7 @@ def suppress_output():
         sys.stderr = stderr
         devnull.close()
 class ProgressTracker:
     def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
         self.total = total
@@ -55,57 +51,63 @@ class ProgressTracker:
         self.start_time = time.time()
         self.update_frequency = update_frequency  # Percentage steps
         self.last_update = -1
         # Initial log with fancy formatting
         logger.info(LogFormatter.section(desc))
         logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
         sys.stdout.flush()
     def update(self, n: int = 1):
         self.current += n
         current_percentage = (self.current * 100) // self.total
         # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
-        if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
             elapsed = time.time() - self.start_time
             rate = self.current / elapsed if elapsed > 0 else 0
             remaining = (self.total - self.current) / rate if rate > 0 else 0
             # Create progress stats
             stats = {
                 "Progress": LogFormatter.progress_bar(self.current, self.total),
                 "Items": f"{self.current:,}/{self.total:,}",
                 "Time": f"⏱️  {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
-                "Rate": f"🚀 {rate:.1f} items/s"
             }
             # Log progress using tree format
             for line in LogFormatter.tree(stats):
                 logger.info(line)
             sys.stdout.flush()
-            self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
     def close(self):
         elapsed = time.time() - self.start_time
         rate = self.total / elapsed if elapsed > 0 else 0
         # Final summary with fancy formatting
         logger.info(LogFormatter.section("COMPLETED"))
         stats = {
             "Total": f"{self.total:,} items",
             "Time": f"{elapsed:.1f}s",
-            "Rate": f"{rate:.1f} items/s"
         }
         for line in LogFormatter.stats(stats):
             logger.info(line)
-        logger.info("="*50)
         sys.stdout.flush()
 class ModelService(HuggingFaceService):
-    _instance: Optional['ModelService'] = None
     _initialized = False
     def __new__(cls):
         if cls._instance is None:
             logger.info(LogFormatter.info("Creating new ModelService instance"))
@@ -113,14 +115,18 @@ class ModelService(HuggingFaceService):
         return cls._instance
     def __init__(self):
-        if not hasattr(self, '_init_done'):
             logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
             super().__init__()
             self.validator = ModelValidator()
             self.vote_service = VoteService()
             self.eval_requests_path = cache_config.eval_requests_file
-            logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
             self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
             self.hf_api = HfApi(token=HF_TOKEN)
             self.cached_models = None
@@ -129,56 +135,66 @@ class ModelService(HuggingFaceService):
             self._init_done = True
             logger.info(LogFormatter.success("Initialization complete"))
-    async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
         """Download and process a file asynchronously"""
         try:
             # Build file URL
             url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
             headers = build_hf_headers(token=self.token)
             # Download file
             async with session.get(url, headers=headers) as response:
                 if response.status != 200:
-                    logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
                     progress.update()
                     return None
                 try:
                     # First read content as text
                     text_content = await response.text()
                     # Then parse JSON
                     content = json.loads(text_content)
                 except json.JSONDecodeError as e:
-                    logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
                     progress.update()
                     return None
             # Get status and determine target status
             status = content.get("status", "PENDING").upper()
             target_status = None
             status_map = {
                 "PENDING": ["PENDING"],
                 "EVALUATING": ["RUNNING"],
-                "FINISHED": ["FINISHED"]
             }
             for target, source_statuses in status_map.items():
                 if status in source_statuses:
                     target_status = target
                     break
             if not target_status:
                 progress.update()
                 return None
             # Calculate wait time
             try:
-                submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
                 if submit_time.tzinfo is None:
                     submit_time = submit_time.replace(tzinfo=timezone.utc)
                 current_time = datetime.now(timezone.utc)
                 wait_time = current_time - submit_time
                 model_info = {
                     "name": content["model"],
                     "submitter": content.get("sender", "Unknown"),
@@ -186,17 +202,17 @@ class ModelService(HuggingFaceService):
                     "wait_time": f"{wait_time.total_seconds():.1f}s",
                     "submission_time": content["submitted_time"],
                     "status": target_status,
-                    "precision": content.get("precision", "Unknown")
                 }
                 progress.update()
                 return model_info
             except (ValueError, TypeError) as e:
                 logger.error(LogFormatter.error(f"Failed to process {file}", e))
                 progress.update()
                 return None
         except Exception as e:
             logger.error(LogFormatter.error(f"Failed to load {file}", e))
             progress.update()
@@ -207,31 +223,25 @@ class ModelService(HuggingFaceService):
         try:
             logger.info(LogFormatter.section("CACHE REFRESH"))
             self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
             # Initialize models dictionary
-            models = {
-                "finished": [],
-                "evaluating": [],
-                "pending": []
-            }
             try:
                 logger.info(LogFormatter.subsection("DATASET LOADING"))
                 logger.info(LogFormatter.info("Loading dataset..."))
                 # Download entire dataset snapshot
                 with suppress_output():
                     local_dir = self.hf_api.snapshot_download(
-                        repo_id=QUEUE_REPO,
-                        repo_type="dataset",
-                        token=self.token
                     )
                 # List JSON files in local directory
                 local_path = Path(local_dir)
                 json_files = list(local_path.glob("**/*.json"))
                 total_files = len(json_files)
                 # Log repository stats
                 stats = {
                     "Total_Files": total_files,
@@ -239,46 +249,48 @@ class ModelService(HuggingFaceService):
                 }
                 for line in LogFormatter.stats(stats, "Repository Statistics"):
                     logger.info(line)
                 if not json_files:
                     raise Exception("No JSON files found in repository")
                 # Initialize progress tracker
                 progress = ProgressTracker(total_files, "PROCESSING FILES")
                 # Process local files
                 model_submissions = {}  # Dict to track latest submission for each (model_id, revision, precision)
                 for file_path in json_files:
                     try:
-                        with open(file_path, 'r') as f:
                             content = json.load(f)
                         # Get status and determine target status
                         status = content.get("status", "PENDING").upper()
                         target_status = None
                         status_map = {
                             "PENDING": ["PENDING"],
                             "EVALUATING": ["RUNNING"],
-                            "FINISHED": ["FINISHED"]
                         }
                         for target, source_statuses in status_map.items():
                             if status in source_statuses:
                                 target_status = target
                                 break
                         if not target_status:
                             progress.update()
                             continue
                         # Calculate wait time
                         try:
-                            submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
                             if submit_time.tzinfo is None:
                                 submit_time = submit_time.replace(tzinfo=timezone.utc)
                             current_time = datetime.now(timezone.utc)
                             wait_time = current_time - submit_time
                             model_info = {
                                 "name": content["model"],
                                 "submitter": content.get("sender", "Unknown"),
@@ -286,50 +298,68 @@ class ModelService(HuggingFaceService):
                                 "wait_time": f"{wait_time.total_seconds():.1f}s",
                                 "submission_time": content["submitted_time"],
                                 "status": target_status,
-                                "precision": content.get("precision", "Unknown")
                             }
                             # Use (model_id, revision, precision) as key to track latest submission
-                            key = (content["model"], content["revision"], content.get("precision", "Unknown"))
-                            if key not in model_submissions or submit_time > datetime.fromisoformat(model_submissions[key]["submission_time"].replace("Z", "+00:00")):
                                 model_submissions[key] = model_info
                         except (ValueError, TypeError) as e:
-                            logger.error(LogFormatter.error(f"Failed to process {file_path.name}", e))
                     except Exception as e:
-                        logger.error(LogFormatter.error(f"Failed to load {file_path.name}", e))
                     finally:
                         progress.update()
                 # Populate models dict with deduplicated submissions
                 for model_info in model_submissions.values():
                     models[model_info["status"].lower()].append(model_info)
                 progress.close()
                 # Final summary with fancy formatting
                 logger.info(LogFormatter.section("CACHE SUMMARY"))
                 stats = {
                     "Finished": len(models["finished"]),
                     "Evaluating": len(models["evaluating"]),
-                    "Pending": len(models["pending"])
                 }
                 for line in LogFormatter.stats(stats, "Models by Status"):
                     logger.info(line)
-                logger.info("="*50)
             except Exception as e:
                 logger.error(LogFormatter.error("Error processing files", e))
                 raise
             # Update cache
             self.cached_models = models
             self.last_cache_update = time.time()
             logger.info(LogFormatter.success("Cache updated successfully"))
             return models
         except Exception as e:
             logger.error(LogFormatter.error("Cache refresh failed", e))
             raise
@@ -337,40 +367,48 @@ class ModelService(HuggingFaceService):
     async def initialize(self):
         """Initialize the model service"""
         if self._initialized:
-            logger.info(LogFormatter.info("Service already initialized, using cached data"))
             return
         try:
             logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
             # Check if cache already exists
             cache_path = cache_config.get_cache_path("datasets")
             if not cache_path.exists() or not any(cache_path.iterdir()):
-                logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
                 cache_config.flush_cache("datasets")
             else:
                 logger.info(LogFormatter.info("Using existing datasets cache"))
             # Ensure eval requests directory exists
             self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
-            logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
             # List existing files
             if self.eval_requests_path.exists():
                 files = list(self.eval_requests_path.glob("**/*.json"))
                 stats = {
                     "Total_Files": len(files),
-                    "Directory": str(self.eval_requests_path)
                 }
                 for line in LogFormatter.stats(stats, "Eval Requests"):
                     logger.info(line)
             # Load initial cache
             await self._refresh_models_cache()
             self._initialized = True
             logger.info(LogFormatter.success("Model service initialization complete"))
         except Exception as e:
             logger.error(LogFormatter.error("Initialization failed", e))
             raise
@@ -378,44 +416,59 @@ class ModelService(HuggingFaceService):
     async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
         """Get all models with their status"""
         if not self._initialized:
-            logger.info(LogFormatter.info("Service not initialized, initializing now..."))
             await self.initialize()
         current_time = time.time()
         cache_age = current_time - self.last_cache_update
         # Check if cache needs refresh
         if not self.cached_models:
-            logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
             return await self._refresh_models_cache()
         elif cache_age > self.cache_ttl:
-            logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
             return await self._refresh_models_cache()
         else:
             logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
             return self.cached_models
     async def submit_model(
-        self,
-        model_data: Dict[str, Any],
-        user_id: str
     ) -> Dict[str, Any]:
         logger.info(LogFormatter.section("MODEL SUBMISSION"))
-        self._log_repo_operation("write", QUEUE_REPO, f"Submitting model {model_data['model_id']} by {user_id}")
         stats = {
             "Model": model_data["model_id"],
             "User": user_id,
             "Revision": model_data["revision"],
             "Precision": model_data["precision"],
-            "Type": model_data["model_type"]
         }
         for line in LogFormatter.tree(stats, "Submission Details"):
             logger.info(line)
         # Validate required fields
         required_fields = [
-            "model_id", "base_model", "revision", "precision",
-            "weight_type", "model_type", "use_chat_template"
         ]
         for field in required_fields:
             if field not in model_data:
@@ -424,23 +477,25 @@ class ModelService(HuggingFaceService):
         # Get model info and validate it exists on HuggingFace
         try:
             logger.info(LogFormatter.subsection("MODEL VALIDATION"))
             # Get the model info to check if it exists
             model_info = self.hf_api.model_info(
                 model_data["model_id"],
                 revision=model_data["revision"],
-                token=self.token
             )
             if not model_info:
-                raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
             logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
         except Exception as e:
             logger.error(LogFormatter.error("Model validation failed", e))
             raise
         # Update model revision with commit sha
         model_data["revision"] = model_info.sha
@@ -448,11 +503,13 @@ class ModelService(HuggingFaceService):
         try:
             logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
             existing_models = await self.get_models()
             # Call the official provider status check
-            is_valid, error_message = await self.validator.check_official_provider_status(
-                model_data["model_id"],
-                existing_models
             )
             if not is_valid:
                 raise ValueError(error_message)
@@ -460,11 +517,16 @@ class ModelService(HuggingFaceService):
             # Check in all statuses (pending, evaluating, finished)
             for status, models in existing_models.items():
                 for model in models:
-                    if model["name"] == model_data["model_id"] and model["revision"] == model_data["revision"]:
                         error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
-                        logger.error(LogFormatter.error("Submission rejected", error_msg))
                         raise ValueError(error_msg)
             logger.info(LogFormatter.success("No existing submission found"))
         except ValueError:
             raise
@@ -474,9 +536,7 @@ class ModelService(HuggingFaceService):
         # Check that model on hub and valid
         valid, error, model_config = await self.validator.is_model_on_hub(
-            model_data["model_id"],
-            model_data["revision"],
-            test_tokenizer=True
         )
         if not valid:
             logger.error(LogFormatter.error("Model on hub validation failed", error))
@@ -497,12 +557,14 @@ class ModelService(HuggingFaceService):
             model_info,
             model_data["precision"],
             model_data["base_model"],
-            revision=model_data["revision"]
         )
         if model_size is None:
             logger.error(LogFormatter.error("Model size validation failed", error))
             raise Exception(error)
-        logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}B"))
         # Size limits based on precision
         if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
@@ -513,16 +575,16 @@ class ModelService(HuggingFaceService):
         # Chat template validation if requested
         if model_data["use_chat_template"]:
             valid, error = await self.validator.check_chat_template(
-                model_data["model_id"],
-                model_data["revision"]
             )
             if not valid:
-                logger.error(LogFormatter.error("Chat template validation failed", error))
                 raise Exception(error)
             logger.info(LogFormatter.success("Chat template validation passed"))
-        architectures = model_info.config.get("architectures", "")
         if architectures:
             architectures = ";".join(architectures)
@@ -541,9 +603,9 @@ class ModelService(HuggingFaceService):
             "job_id": -1,
             "job_start_time": None,
             "use_chat_template": model_data["use_chat_template"],
-            "sender": user_id
         }
         logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
         for line in LogFormatter.tree(eval_entry):
             logger.info(line)
@@ -552,18 +614,24 @@ class ModelService(HuggingFaceService):
         try:
             logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
             logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
             # Construct the path in the dataset
-            org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
             model_path = model_data["model_id"].split("/")[-1]
             relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
             # Create a temporary file with the request
-            with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
                 json.dump(eval_entry, temp_file, indent=2)
                 temp_file.flush()
                 temp_path = temp_file.name
             # Upload file directly
             self.hf_api.upload_file(
                 path_or_fileobj=temp_path,
@@ -571,14 +639,14 @@ class ModelService(HuggingFaceService):
                 repo_id=QUEUE_REPO,
                 repo_type="dataset",
                 commit_message=f"Add {model_data['model_id']} to eval queue",
-                token=self.token
             )
             # Clean up temp file
             os.unlink(temp_path)
             logger.info(LogFormatter.success("Upload successful"))
         except Exception as e:
             logger.error(LogFormatter.error("Upload failed", e))
             raise
@@ -586,15 +654,19 @@ class ModelService(HuggingFaceService):
         # Add automatic vote
         try:
             logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
-            logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
             await self.vote_service.add_vote(
                 model_data["model_id"],
                 user_id,
                 "up",
                 {
                     "precision": model_data["precision"],
-                    "revision": model_data["revision"]
-                }
             )
             logger.info(LogFormatter.success("Vote recorded successfully"))
         except Exception as e:
@@ -603,14 +675,14 @@ class ModelService(HuggingFaceService):
         return {
             "status": "success",
-            "message": "The model was submitted successfully, and the vote has been recorded"
         }
     async def get_model_status(self, model_id: str) -> Dict[str, Any]:
         """Get evaluation status of a model"""
         logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
         eval_path = self.eval_requests_path
         for user_folder in eval_path.iterdir():
             if user_folder.is_dir():
                 for file in user_folder.glob("*.json"):
@@ -620,24 +692,26 @@ class ModelService(HuggingFaceService):
                             status = {
                                 "status": data["status"],
                                 "submitted_time": data["submitted_time"],
-                                "job_id": data.get("job_id", -1)
                             }
                             logger.info(LogFormatter.success("Status found"))
                             for line in LogFormatter.tree(status, "Model Status"):
                                 logger.info(line)
                             return status
         logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
         return {"status": "not_found"}
-    async def get_organization_submissions(self, organization: str, days: int = 7) -> List[Dict[str, Any]]:
         """Get all submissions from a user in the last n days"""
         try:
             # Get all models
             all_models = await self.get_models()
             current_time = datetime.now(timezone.utc)
             cutoff_time = current_time - timedelta(days=days)
             # Filter models by submitter and submission time
             user_submissions = []
             for status, models in all_models.items():
@@ -650,19 +724,21 @@ class ModelService(HuggingFaceService):
                         )
                         # Check if within time window
                         if submit_time > cutoff_time:
-                            user_submissions.append({
-                                "name": model["name"],
-                                "status": status,
-                                "submission_time": model["submission_time"],
-                                "precision": model["precision"]
-                            })
             return sorted(
-                user_submissions,
-                key=lambda x: x["submission_time"],
-                reverse=True
             )
         except Exception as e:
-            logger.error(LogFormatter.error(f"Failed to get submissions for {organization}", e))
-            raise

 from pathlib import Path
 import logging
 import aiohttp
 import time
+from huggingface_hub import HfApi
 from huggingface_hub.utils import build_hf_headers
 from datasets import disable_progress_bar
 import sys
 import contextlib
 import tempfile
+from app.config import HF_TOKEN
+from app.config.hf_config import QUEUE_REPO
 from app.services.hf_service import HuggingFaceService
 from app.utils.model_validation import ModelValidator
 from app.services.votes import VoteService
 logger = logging.getLogger(__name__)
 # Context manager to temporarily disable stdout and stderr
 @contextlib.contextmanager
 def suppress_output():
     stdout = sys.stdout
     stderr = sys.stderr
+    devnull = open(os.devnull, "w")
     try:
         sys.stdout = devnull
         sys.stderr = devnull
         sys.stderr = stderr
         devnull.close()
 class ProgressTracker:
     def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
         self.total = total
         self.start_time = time.time()
         self.update_frequency = update_frequency  # Percentage steps
         self.last_update = -1
         # Initial log with fancy formatting
         logger.info(LogFormatter.section(desc))
         logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
         sys.stdout.flush()
     def update(self, n: int = 1):
         self.current += n
         current_percentage = (self.current * 100) // self.total
         # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
+        if (
+            current_percentage >= self.last_update + self.update_frequency
+            or current_percentage == 100
+        ):
             elapsed = time.time() - self.start_time
             rate = self.current / elapsed if elapsed > 0 else 0
             remaining = (self.total - self.current) / rate if rate > 0 else 0
             # Create progress stats
             stats = {
                 "Progress": LogFormatter.progress_bar(self.current, self.total),
                 "Items": f"{self.current:,}/{self.total:,}",
                 "Time": f"⏱️  {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
+                "Rate": f"🚀 {rate:.1f} items/s",
             }
             # Log progress using tree format
             for line in LogFormatter.tree(stats):
                 logger.info(line)
             sys.stdout.flush()
+            self.last_update = (
+                current_percentage // self.update_frequency
+            ) * self.update_frequency
     def close(self):
         elapsed = time.time() - self.start_time
         rate = self.total / elapsed if elapsed > 0 else 0
         # Final summary with fancy formatting
         logger.info(LogFormatter.section("COMPLETED"))
         stats = {
             "Total": f"{self.total:,} items",
             "Time": f"{elapsed:.1f}s",
+            "Rate": f"{rate:.1f} items/s",
         }
         for line in LogFormatter.stats(stats):
             logger.info(line)
+        logger.info("=" * 50)
         sys.stdout.flush()
 class ModelService(HuggingFaceService):
+    _instance: Optional["ModelService"] = None
     _initialized = False
     def __new__(cls):
         if cls._instance is None:
             logger.info(LogFormatter.info("Creating new ModelService instance"))
         return cls._instance
     def __init__(self):
+        if not hasattr(self, "_init_done"):
             logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
             super().__init__()
             self.validator = ModelValidator()
             self.vote_service = VoteService()
             self.eval_requests_path = cache_config.eval_requests_file
+            logger.info(
+                LogFormatter.info(
+                    f"Using eval requests path: {self.eval_requests_path}"
+                )
+            )
             self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
             self.hf_api = HfApi(token=HF_TOKEN)
             self.cached_models = None
             self._init_done = True
             logger.info(LogFormatter.success("Initialization complete"))
+    async def _download_and_process_file(
+        self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker
+    ) -> Optional[Dict]:
         """Download and process a file asynchronously"""
         try:
             # Build file URL
             url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
             headers = build_hf_headers(token=self.token)
             # Download file
             async with session.get(url, headers=headers) as response:
                 if response.status != 200:
+                    logger.error(
+                        LogFormatter.error(
+                            f"Failed to download {file}", f"HTTP {response.status}"
+                        )
+                    )
                     progress.update()
                     return None
                 try:
                     # First read content as text
                     text_content = await response.text()
                     # Then parse JSON
                     content = json.loads(text_content)
                 except json.JSONDecodeError as e:
+                    logger.error(
+                        LogFormatter.error(f"Failed to decode JSON from {file}", e)
+                    )
                     progress.update()
                     return None
             # Get status and determine target status
             status = content.get("status", "PENDING").upper()
             target_status = None
             status_map = {
                 "PENDING": ["PENDING"],
                 "EVALUATING": ["RUNNING"],
+                "FINISHED": ["FINISHED"],
             }
             for target, source_statuses in status_map.items():
                 if status in source_statuses:
                     target_status = target
                     break
             if not target_status:
                 progress.update()
                 return None
             # Calculate wait time
             try:
+                submit_time = datetime.fromisoformat(
+                    content["submitted_time"].replace("Z", "+00:00")
+                )
                 if submit_time.tzinfo is None:
                     submit_time = submit_time.replace(tzinfo=timezone.utc)
                 current_time = datetime.now(timezone.utc)
                 wait_time = current_time - submit_time
                 model_info = {
                     "name": content["model"],
                     "submitter": content.get("sender", "Unknown"),
                     "wait_time": f"{wait_time.total_seconds():.1f}s",
                     "submission_time": content["submitted_time"],
                     "status": target_status,
+                    "precision": content.get("precision", "Unknown"),
                 }
                 progress.update()
                 return model_info
             except (ValueError, TypeError) as e:
                 logger.error(LogFormatter.error(f"Failed to process {file}", e))
                 progress.update()
                 return None
         except Exception as e:
             logger.error(LogFormatter.error(f"Failed to load {file}", e))
             progress.update()
         try:
             logger.info(LogFormatter.section("CACHE REFRESH"))
             self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
             # Initialize models dictionary
+            models = {"finished": [], "evaluating": [], "pending": []}
             try:
                 logger.info(LogFormatter.subsection("DATASET LOADING"))
                 logger.info(LogFormatter.info("Loading dataset..."))
                 # Download entire dataset snapshot
                 with suppress_output():
                     local_dir = self.hf_api.snapshot_download(
+                        repo_id=QUEUE_REPO, repo_type="dataset", token=self.token
                     )
                 # List JSON files in local directory
                 local_path = Path(local_dir)
                 json_files = list(local_path.glob("**/*.json"))
                 total_files = len(json_files)
                 # Log repository stats
                 stats = {
                     "Total_Files": total_files,
                 }
                 for line in LogFormatter.stats(stats, "Repository Statistics"):
                     logger.info(line)
                 if not json_files:
                     raise Exception("No JSON files found in repository")
                 # Initialize progress tracker
                 progress = ProgressTracker(total_files, "PROCESSING FILES")
                 # Process local files
                 model_submissions = {}  # Dict to track latest submission for each (model_id, revision, precision)
                 for file_path in json_files:
                     try:
+                        with open(file_path, "r") as f:
                             content = json.load(f)
                         # Get status and determine target status
                         status = content.get("status", "PENDING").upper()
                         target_status = None
                         status_map = {
                             "PENDING": ["PENDING"],
                             "EVALUATING": ["RUNNING"],
+                            "FINISHED": ["FINISHED"],
                         }
                         for target, source_statuses in status_map.items():
                             if status in source_statuses:
                                 target_status = target
                                 break
                         if not target_status:
                             progress.update()
                             continue
                         # Calculate wait time
                         try:
+                            submit_time = datetime.fromisoformat(
+                                content["submitted_time"].replace("Z", "+00:00")
+                            )
                             if submit_time.tzinfo is None:
                                 submit_time = submit_time.replace(tzinfo=timezone.utc)
                             current_time = datetime.now(timezone.utc)
                             wait_time = current_time - submit_time
                             model_info = {
                                 "name": content["model"],
                                 "submitter": content.get("sender", "Unknown"),
                                 "wait_time": f"{wait_time.total_seconds():.1f}s",
                                 "submission_time": content["submitted_time"],
                                 "status": target_status,
+                                "precision": content.get("precision", "Unknown"),
                             }
                             # Use (model_id, revision, precision) as key to track latest submission
+                            key = (
+                                content["model"],
+                                content["revision"],
+                                content.get("precision", "Unknown"),
+                            )
+                            if (
+                                key not in model_submissions
+                                or submit_time
+                                > datetime.fromisoformat(
+                                    model_submissions[key]["submission_time"].replace(
+                                        "Z", "+00:00"
+                                    )
+                                )
+                            ):
                                 model_submissions[key] = model_info
                         except (ValueError, TypeError) as e:
+                            logger.error(
+                                LogFormatter.error(
+                                    f"Failed to process {file_path.name}", e
+                                )
+                            )
                     except Exception as e:
+                        logger.error(
+                            LogFormatter.error(f"Failed to load {file_path.name}", e)
+                        )
                     finally:
                         progress.update()
                 # Populate models dict with deduplicated submissions
                 for model_info in model_submissions.values():
                     models[model_info["status"].lower()].append(model_info)
                 progress.close()
                 # Final summary with fancy formatting
                 logger.info(LogFormatter.section("CACHE SUMMARY"))
                 stats = {
                     "Finished": len(models["finished"]),
                     "Evaluating": len(models["evaluating"]),
+                    "Pending": len(models["pending"]),
                 }
                 for line in LogFormatter.stats(stats, "Models by Status"):
                     logger.info(line)
+                logger.info("=" * 50)
             except Exception as e:
                 logger.error(LogFormatter.error("Error processing files", e))
                 raise
             # Update cache
             self.cached_models = models
             self.last_cache_update = time.time()
             logger.info(LogFormatter.success("Cache updated successfully"))
             return models
         except Exception as e:
             logger.error(LogFormatter.error("Cache refresh failed", e))
             raise
     async def initialize(self):
         """Initialize the model service"""
         if self._initialized:
+            logger.info(
+                LogFormatter.info("Service already initialized, using cached data")
+            )
             return
         try:
             logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
             # Check if cache already exists
             cache_path = cache_config.get_cache_path("datasets")
             if not cache_path.exists() or not any(cache_path.iterdir()):
+                logger.info(
+                    LogFormatter.info(
+                        "No existing cache found, initializing datasets cache..."
+                    )
+                )
                 cache_config.flush_cache("datasets")
             else:
                 logger.info(LogFormatter.info("Using existing datasets cache"))
             # Ensure eval requests directory exists
             self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
+            logger.info(
+                LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}")
+            )
             # List existing files
             if self.eval_requests_path.exists():
                 files = list(self.eval_requests_path.glob("**/*.json"))
                 stats = {
                     "Total_Files": len(files),
+                    "Directory": str(self.eval_requests_path),
                 }
                 for line in LogFormatter.stats(stats, "Eval Requests"):
                     logger.info(line)
             # Load initial cache
             await self._refresh_models_cache()
             self._initialized = True
             logger.info(LogFormatter.success("Model service initialization complete"))
         except Exception as e:
             logger.error(LogFormatter.error("Initialization failed", e))
             raise
     async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
         """Get all models with their status"""
         if not self._initialized:
+            logger.info(
+                LogFormatter.info("Service not initialized, initializing now...")
+            )
             await self.initialize()
         current_time = time.time()
         cache_age = current_time - self.last_cache_update
         # Check if cache needs refresh
         if not self.cached_models:
+            logger.info(
+                LogFormatter.info("No cached data available, refreshing cache...")
+            )
             return await self._refresh_models_cache()
         elif cache_age > self.cache_ttl:
+            logger.info(
+                LogFormatter.info(
+                    f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"
+                )
+            )
             return await self._refresh_models_cache()
         else:
             logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
             return self.cached_models
     async def submit_model(
+        self, model_data: Dict[str, Any], user_id: str
     ) -> Dict[str, Any]:
         logger.info(LogFormatter.section("MODEL SUBMISSION"))
+        self._log_repo_operation(
+            "write",
+            QUEUE_REPO,
+            f"Submitting model {model_data['model_id']} by {user_id}",
+        )
         stats = {
             "Model": model_data["model_id"],
             "User": user_id,
             "Revision": model_data["revision"],
             "Precision": model_data["precision"],
+            "Type": model_data["model_type"],
         }
         for line in LogFormatter.tree(stats, "Submission Details"):
             logger.info(line)
         # Validate required fields
         required_fields = [
+            "model_id",
+            "base_model",
+            "revision",
+            "precision",
+            "weight_type",
+            "model_type",
+            "use_chat_template",
         ]
         for field in required_fields:
             if field not in model_data:
         # Get model info and validate it exists on HuggingFace
         try:
             logger.info(LogFormatter.subsection("MODEL VALIDATION"))
             # Get the model info to check if it exists
             model_info = self.hf_api.model_info(
                 model_data["model_id"],
                 revision=model_data["revision"],
+                token=self.token,
             )
             if not model_info:
+                raise Exception(
+                    f"Model {model_data['model_id']} not found on HuggingFace Hub"
+                )
             logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
         except Exception as e:
             logger.error(LogFormatter.error("Model validation failed", e))
             raise
         # Update model revision with commit sha
         model_data["revision"] = model_info.sha
         try:
             logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
             existing_models = await self.get_models()
             # Call the official provider status check
+            (
+                is_valid,
+                error_message,
+            ) = await self.validator.check_official_provider_status(
+                model_data["model_id"], existing_models
             )
             if not is_valid:
                 raise ValueError(error_message)
             # Check in all statuses (pending, evaluating, finished)
             for status, models in existing_models.items():
                 for model in models:
+                    if (
+                        model["name"] == model_data["model_id"]
+                        and model["revision"] == model_data["revision"]
+                    ):
                         error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
+                        logger.error(
+                            LogFormatter.error("Submission rejected", error_msg)
+                        )
                         raise ValueError(error_msg)
             logger.info(LogFormatter.success("No existing submission found"))
         except ValueError:
             raise
         # Check that model on hub and valid
         valid, error, model_config = await self.validator.is_model_on_hub(
+            model_data["model_id"], model_data["revision"], test_tokenizer=True
         )
         if not valid:
             logger.error(LogFormatter.error("Model on hub validation failed", error))
             model_info,
             model_data["precision"],
             model_data["base_model"],
+            revision=model_data["revision"],
         )
         if model_size is None:
             logger.error(LogFormatter.error("Model size validation failed", error))
             raise Exception(error)
+        logger.info(
+            LogFormatter.success(f"Model size validation passed: {model_size:.1f}B")
+        )
         # Size limits based on precision
         if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
         # Chat template validation if requested
         if model_data["use_chat_template"]:
             valid, error = await self.validator.check_chat_template(
+                model_data["model_id"], model_data["revision"]
             )
             if not valid:
+                logger.error(
+                    LogFormatter.error("Chat template validation failed", error)
+                )
                 raise Exception(error)
             logger.info(LogFormatter.success("Chat template validation passed"))
+        architectures = model_info.config.get("architectures", "")
         if architectures:
             architectures = ";".join(architectures)
             "job_id": -1,
             "job_start_time": None,
             "use_chat_template": model_data["use_chat_template"],
+            "sender": user_id,
         }
         logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
         for line in LogFormatter.tree(eval_entry):
             logger.info(line)
         try:
             logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
             logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
             # Construct the path in the dataset
+            org_or_user = (
+                model_data["model_id"].split("/")[0]
+                if "/" in model_data["model_id"]
+                else ""
+            )
             model_path = model_data["model_id"].split("/")[-1]
             relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
             # Create a temporary file with the request
+            with tempfile.NamedTemporaryFile(
+                mode="w", suffix=".json", delete=False
+            ) as temp_file:
                 json.dump(eval_entry, temp_file, indent=2)
                 temp_file.flush()
                 temp_path = temp_file.name
             # Upload file directly
             self.hf_api.upload_file(
                 path_or_fileobj=temp_path,
                 repo_id=QUEUE_REPO,
                 repo_type="dataset",
                 commit_message=f"Add {model_data['model_id']} to eval queue",
+                token=self.token,
             )
             # Clean up temp file
             os.unlink(temp_path)
             logger.info(LogFormatter.success("Upload successful"))
         except Exception as e:
             logger.error(LogFormatter.error("Upload failed", e))
             raise
         # Add automatic vote
         try:
             logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
+            logger.info(
+                LogFormatter.info(
+                    f"Adding upvote for {model_data['model_id']} by {user_id}"
+                )
+            )
             await self.vote_service.add_vote(
                 model_data["model_id"],
                 user_id,
                 "up",
                 {
                     "precision": model_data["precision"],
+                    "revision": model_data["revision"],
+                },
             )
             logger.info(LogFormatter.success("Vote recorded successfully"))
         except Exception as e:
         return {
             "status": "success",
+            "message": "The model was submitted successfully, and the vote has been recorded",
         }
     async def get_model_status(self, model_id: str) -> Dict[str, Any]:
         """Get evaluation status of a model"""
         logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
         eval_path = self.eval_requests_path
         for user_folder in eval_path.iterdir():
             if user_folder.is_dir():
                 for file in user_folder.glob("*.json"):
                             status = {
                                 "status": data["status"],
                                 "submitted_time": data["submitted_time"],
+                                "job_id": data.get("job_id", -1),
                             }
                             logger.info(LogFormatter.success("Status found"))
                             for line in LogFormatter.tree(status, "Model Status"):
                                 logger.info(line)
                             return status
         logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
         return {"status": "not_found"}
+    async def get_organization_submissions(
+        self, organization: str, days: int = 7
+    ) -> List[Dict[str, Any]]:
         """Get all submissions from a user in the last n days"""
         try:
             # Get all models
             all_models = await self.get_models()
             current_time = datetime.now(timezone.utc)
             cutoff_time = current_time - timedelta(days=days)
             # Filter models by submitter and submission time
             user_submissions = []
             for status, models in all_models.items():
                         )
                         # Check if within time window
                         if submit_time > cutoff_time:
+                            user_submissions.append(
+                                {
+                                    "name": model["name"],
+                                    "status": status,
+                                    "submission_time": model["submission_time"],
+                                    "precision": model["precision"],
+                                }
+                            )
             return sorted(
+                user_submissions, key=lambda x: x["submission_time"], reverse=True
             )
         except Exception as e:
+            logger.error(
+                LogFormatter.error(f"Failed to get submissions for {organization}", e)
+            )
+            raise

backend/app/services/rate_limiter.py DELETED Viewed

@@ -1,72 +0,0 @@
-"""
-import logging
-from datetime import datetime, timedelta, timezone
-from typing import Tuple, Dict, List
-logger = logging.getLogger(__name__)
-class RateLimiter:
-    def __init__(self, period_days: int = 7, quota: int = 5):
-        self.period_days = period_days
-        self.quota = quota
-        self.submission_history: Dict[str, List[datetime]] = {}
-        self.higher_quota_users = set()  # Users with higher quotas
-        self.unlimited_users = set()  # Users with no quota limits
-    def add_unlimited_user(self, user_id: str):
-        """Add a user to the unlimited users list"""
-        self.unlimited_users.add(user_id)
-    def add_higher_quota_user(self, user_id: str):
-        """Add a user to the higher quota users list"""
-        self.higher_quota_users.add(user_id)
-    def record_submission(self, user_id: str):
-        """Record a new submission for a user"""
-        current_time = datetime.now(timezone.utc)
-        if user_id not in self.submission_history:
-            self.submission_history[user_id] = []
-        self.submission_history[user_id].append(current_time)
-    def clean_old_submissions(self, user_id: str):
-        """Remove submissions older than the period"""
-        if user_id not in self.submission_history:
-            return
-        current_time = datetime.now(timezone.utc)
-        cutoff_time = current_time - timedelta(days=self.period_days)
-        self.submission_history[user_id] = [
-            time for time in self.submission_history[user_id]
-            if time > cutoff_time
-        ]
-    async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
-        """Check if a user has exceeded their rate limit
-        Returns:
-            Tuple[bool, str]: (is_allowed, error_message)
-        """
-        # Unlimited users bypass all checks
-        if user_id in self.unlimited_users:
-            return True, ""
-        # Clean old submissions
-        self.clean_old_submissions(user_id)
-        # Get current submission count
-        submission_count = len(self.submission_history.get(user_id, []))
-        # Calculate user's quota
-        user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
-        # Check if user has exceeded their quota
-        if submission_count >= user_quota:
-            error_msg = (
-                f"User '{user_id}' has reached the limit of {user_quota} submissions "
-                f"in the last {self.period_days} days. Please wait before submitting again."
-            )
-            return False, error_msg
-        return True, ""
-"""

backend/app/services/votes.py CHANGED Viewed

@@ -3,7 +3,6 @@ from typing import Dict, Any, List, Set, Tuple, Optional
 import json
 import logging
 import asyncio
-from pathlib import Path
 import aiohttp
 from huggingface_hub import HfApi
 import tempfile
@@ -11,23 +10,24 @@ import os
 from app.services.hf_service import HuggingFaceService
 from app.config import HF_TOKEN
-from app.config.hf_config import HF_ORGANIZATION, VOTES_REPO
 from app.core.cache import cache_config
 from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class VoteService(HuggingFaceService):
-    _instance: Optional['VoteService'] = None
     _initialized = False
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super(VoteService, cls).__new__(cls)
         return cls._instance
     def __init__(self):
-        if not hasattr(self, '_init_done'):
             super().__init__()
             self.votes_file = cache_config.votes_file
             self.votes_to_upload: List[Dict[str, Any]] = []
@@ -48,41 +48,43 @@ class VoteService(HuggingFaceService):
         if self._initialized:
             await self._check_for_new_votes()
             return
         try:
             logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
             # Ensure votes directory exists
             self.votes_file.parent.mkdir(parents=True, exist_ok=True)
             # Load remote votes
             remote_votes = await self._fetch_remote_votes()
             if remote_votes:
-                logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
                 # Save to local file
-                with open(self.votes_file, 'w') as f:
                     for vote in remote_votes:
                         json.dump(vote, f)
-                        f.write('\n')
                 # Load into memory
                 await self._load_existing_votes()
             else:
                 logger.warning(LogFormatter.warning("No votes found on hub"))
             self._initialized = True
             self._last_sync = datetime.now(timezone.utc)
             # Final summary
             stats = {
                 "Total_Votes": self._total_votes,
-                "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
             }
             logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
             for line in LogFormatter.stats(stats):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Initialization failed", e))
             raise
@@ -91,7 +93,7 @@ class VoteService(HuggingFaceService):
         """Fetch votes from HF hub"""
         url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
         headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
         try:
             async with aiohttp.ClientSession() as session:
                 async with session.get(url, headers=headers) as response:
@@ -106,7 +108,9 @@ class VoteService(HuggingFaceService):
                                     continue
                         return votes
                     else:
-                        logger.error(f"Failed to get remote votes: HTTP {response.status}")
                         return []
         except Exception as e:
             logger.error(f"Error fetching remote votes: {str(e)}")
@@ -117,18 +121,20 @@ class VoteService(HuggingFaceService):
         try:
             remote_votes = await self._fetch_remote_votes()
             if len(remote_votes) != self._total_votes:
-                logger.info(f"Vote count changed: Local ({self._total_votes}) ≠ Remote ({len(remote_votes)})")
                 # Save to local file
-                with open(self.votes_file, 'w') as f:
                     for vote in remote_votes:
                         json.dump(vote, f)
-                        f.write('\n')
                 # Reload into memory
                 await self._load_existing_votes()
             else:
                 logger.info("Votes are in sync")
         except Exception as e:
             logger.error(f"Error checking for new votes: {str(e)}")
@@ -136,25 +142,31 @@ class VoteService(HuggingFaceService):
         """Sync votes with HuggingFace hub"""
         try:
             logger.info(LogFormatter.section("VOTE SYNC"))
             # Get current remote votes
             remote_votes = await self._fetch_remote_votes()
             logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
             # If we have pending votes to upload
             if self.votes_to_upload:
-                logger.info(LogFormatter.info(f"Adding {len(self.votes_to_upload)} pending votes..."))
                 # Add new votes to remote votes
                 remote_votes.extend(self.votes_to_upload)
                 # Create temporary file with all votes
-                with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as temp_file:
                     for vote in remote_votes:
                         json.dump(vote, temp_file)
-                        temp_file.write('\n')
                     temp_path = temp_file.name
                 try:
                     # Upload JSONL file directly
                     self.hf_api.upload_file(
@@ -163,32 +175,34 @@ class VoteService(HuggingFaceService):
                         repo_id=VOTES_REPO,
                         repo_type="dataset",
                         commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
-                        token=self.token
                     )
                     # Clear pending votes only if upload succeeded
                     self.votes_to_upload.clear()
-                    logger.info(LogFormatter.success("Pending votes uploaded successfully"))
                 except Exception as e:
                     logger.error(LogFormatter.error("Failed to upload votes to hub", e))
                     raise
                 finally:
                     # Clean up temp file
                     os.unlink(temp_path)
             # Update local state
-            with open(self.votes_file, 'w') as f:
                 for vote in remote_votes:
                     json.dump(vote, f)
-                    f.write('\n')
             # Reload votes in memory
             await self._load_existing_votes()
             logger.info(LogFormatter.success("Sync completed successfully"))
             self._last_sync = datetime.now(timezone.utc)
         except Exception as e:
             logger.error(LogFormatter.error("Sync failed", e))
             raise
@@ -201,58 +215,73 @@ class VoteService(HuggingFaceService):
         try:
             logger.info(LogFormatter.section("LOADING VOTES"))
             # Clear existing data structures
             self.vote_check_set.clear()
             self._votes_by_model.clear()
             self._votes_by_user.clear()
             vote_count = 0
             latest_timestamp = None
             with open(self.votes_file, "r") as f:
                 for line in f:
                     try:
                         vote = json.loads(line.strip())
                         vote_count += 1
                         # Track latest timestamp
                         try:
-                            vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
-                            if not latest_timestamp or vote_timestamp > latest_timestamp:
                                 latest_timestamp = vote_timestamp
-                            vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
                         except (KeyError, ValueError) as e:
-                            logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
                             continue
                         if vote_count % 1000 == 0:
-                            logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
                         self._add_vote_to_memory(vote)
                     except json.JSONDecodeError as e:
                         logger.error(LogFormatter.error("Vote parsing failed", e))
                         continue
                     except Exception as e:
                         logger.error(LogFormatter.error("Vote processing failed", e))
                         continue
             self._total_votes = vote_count
             self._last_vote_timestamp = latest_timestamp
             # Final summary
             stats = {
                 "Total_Votes": vote_count,
-                "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
                 "Unique_Models": len(self._votes_by_model),
-                "Unique_Users": len(self._votes_by_user)
             }
             logger.info(LogFormatter.section("VOTE SUMMARY"))
             for line in LogFormatter.stats(stats):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Failed to load votes", e))
             raise
@@ -265,25 +294,25 @@ class VoteService(HuggingFaceService):
                 vote["model"],
                 vote.get("revision", "main"),
                 vote["username"],
-                vote.get("precision", "unknown")
             )
             # Skip if we already have this vote
             if check_tuple in self.vote_check_set:
                 return
             self.vote_check_set.add(check_tuple)
             # Update model votes
             if vote["model"] not in self._votes_by_model:
                 self._votes_by_model[vote["model"]] = []
             self._votes_by_model[vote["model"]].append(vote)
             # Update user votes
             if vote["username"] not in self._votes_by_user:
                 self._votes_by_user[vote["username"]] = []
             self._votes_by_user[vote["username"]].append(vote)
         except KeyError as e:
             logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
         except Exception as e:
@@ -292,12 +321,14 @@ class VoteService(HuggingFaceService):
     async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
         """Get all votes from a specific user"""
         logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
         # Check if we need to refresh votes
-        if (datetime.now(timezone.utc) - self._last_sync).total_seconds() > self._sync_interval:
             logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
             await self._check_for_new_votes()
         votes = self._votes_by_user.get(user_id, [])
         logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
         return votes
@@ -305,14 +336,16 @@ class VoteService(HuggingFaceService):
     async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
         """Get all votes for a specific model"""
         logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
         # Check if we need to refresh votes
-        if (datetime.now(timezone.utc) - self._last_sync).total_seconds() > self._sync_interval:
             logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
             await self._check_for_new_votes()
         votes = self._votes_by_model.get(model_id, [])
         # Group votes by revision and precision
         votes_by_config = {}
         for vote in votes:
@@ -323,23 +356,23 @@ class VoteService(HuggingFaceService):
                 votes_by_config[config_key] = {
                     "revision": revision,
                     "precision": precision,
-                    "count": 0
                 }
             votes_by_config[config_key]["count"] += 1
         stats = {
             "Total_Votes": len(votes),
-            **{f"Config_{k}": v["count"] for k, v in votes_by_config.items()}
         }
         logger.info(LogFormatter.section("VOTE STATISTICS"))
         for line in LogFormatter.stats(stats):
             logger.info(line)
         return {
             "total_votes": len(votes),
             "votes_by_config": votes_by_config,
-            "votes": votes
         }
     async def _get_model_revision(self, model_id: str) -> str:
@@ -348,60 +381,86 @@ class VoteService(HuggingFaceService):
         for attempt in range(self._max_retries):
             try:
                 model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
-                logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
                 return model_info.sha
             except Exception as e:
-                logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
                 if attempt < self._max_retries - 1:
                     retry_delay = self._retry_delay * (attempt + 1)
                     logger.info(f"Retrying in {retry_delay} seconds...")
                     await asyncio.sleep(retry_delay)
                 else:
-                    logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
                     return "main"
-    async def add_vote(self, model_id: str, user_id: str, vote_type: str, vote_data: Dict[str, Any] = None) -> Dict[str, Any]:
         """Add a vote for a model"""
         try:
-            self._log_repo_operation("add", VOTES_REPO, f"Adding {vote_type} vote for {model_id} by {user_id}")
             logger.info(LogFormatter.section("NEW VOTE"))
             stats = {
                 "Model": model_id,
                 "User": user_id,
                 "Type": vote_type,
-                "Config": vote_data or {}
             }
             for line in LogFormatter.tree(stats, "Vote Details"):
                 logger.info(line)
             # Use provided configuration or fallback to model info
             precision = None
             revision = None
             if vote_data:
                 precision = vote_data.get("precision")
                 revision = vote_data.get("revision")
             # If any info is missing, try to get it from model info
             if not all([precision, revision]):
                 try:
-                    model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
-                    model_card_data = model_info.cardData if hasattr(model_info, 'cardData') else {}
                     if not precision:
                         precision = model_card_data.get("precision", "unknown")
                     if not revision:
                         revision = model_info.sha
                 except Exception as e:
-                    logger.warning(LogFormatter.warning(f"Failed to get model info: {str(e)}. Using default values."))
                     precision = precision or "unknown"
                     revision = revision or "main"
             # Check if vote already exists with this configuration
             check_tuple = (model_id, revision, user_id, precision)
             if check_tuple in self.vote_check_set:
-                raise ValueError(f"Vote already recorded for this model configuration (precision: {precision}, revision: {revision[:7] if revision else 'unknown'})")
             vote = {
                 "model": model_id,
@@ -409,33 +468,33 @@ class VoteService(HuggingFaceService):
                 "username": user_id,
                 "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 "vote_type": vote_type,
-                "precision": precision
             }
             # Update local storage
             with open(self.votes_file, "a") as f:
                 f.write(json.dumps(vote) + "\n")
             self._add_vote_to_memory(vote)
             self.votes_to_upload.append(vote)
             stats = {
                 "Status": "Success",
                 "Queue_Size": len(self.votes_to_upload),
                 "Model_Config": {
                     "Precision": precision,
-                    "Revision": revision[:7] if revision else "unknown"
-                }
             }
             for line in LogFormatter.stats(stats):
                 logger.info(line)
             # Force immediate sync
             logger.info(LogFormatter.info("Forcing immediate sync with hub"))
             await self._sync_with_hub()
             return {"status": "success", "message": "Vote added successfully"}
         except Exception as e:
             logger.error(LogFormatter.error("Failed to add vote", e))
             raise

 import json
 import logging
 import asyncio
 import aiohttp
 from huggingface_hub import HfApi
 import tempfile
 from app.services.hf_service import HuggingFaceService
 from app.config import HF_TOKEN
+from app.config.hf_config import VOTES_REPO
 from app.core.cache import cache_config
 from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class VoteService(HuggingFaceService):
+    _instance: Optional["VoteService"] = None
     _initialized = False
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super(VoteService, cls).__new__(cls)
         return cls._instance
     def __init__(self):
+        if not hasattr(self, "_init_done"):
             super().__init__()
             self.votes_file = cache_config.votes_file
             self.votes_to_upload: List[Dict[str, Any]] = []
         if self._initialized:
             await self._check_for_new_votes()
             return
         try:
             logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
             # Ensure votes directory exists
             self.votes_file.parent.mkdir(parents=True, exist_ok=True)
             # Load remote votes
             remote_votes = await self._fetch_remote_votes()
             if remote_votes:
+                logger.info(
+                    LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub")
+                )
                 # Save to local file
+                with open(self.votes_file, "w") as f:
                     for vote in remote_votes:
                         json.dump(vote, f)
+                        f.write("\n")
                 # Load into memory
                 await self._load_existing_votes()
             else:
                 logger.warning(LogFormatter.warning("No votes found on hub"))
             self._initialized = True
             self._last_sync = datetime.now(timezone.utc)
             # Final summary
             stats = {
                 "Total_Votes": self._total_votes,
+                "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC"),
             }
             logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
             for line in LogFormatter.stats(stats):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Initialization failed", e))
             raise
         """Fetch votes from HF hub"""
         url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
         headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
         try:
             async with aiohttp.ClientSession() as session:
                 async with session.get(url, headers=headers) as response:
                                     continue
                         return votes
                     else:
+                        logger.error(
+                            f"Failed to get remote votes: HTTP {response.status}"
+                        )
                         return []
         except Exception as e:
             logger.error(f"Error fetching remote votes: {str(e)}")
         try:
             remote_votes = await self._fetch_remote_votes()
             if len(remote_votes) != self._total_votes:
+                logger.info(
+                    f"Vote count changed: Local ({self._total_votes}) ≠ Remote ({len(remote_votes)})"
+                )
                 # Save to local file
+                with open(self.votes_file, "w") as f:
                     for vote in remote_votes:
                         json.dump(vote, f)
+                        f.write("\n")
                 # Reload into memory
                 await self._load_existing_votes()
             else:
                 logger.info("Votes are in sync")
         except Exception as e:
             logger.error(f"Error checking for new votes: {str(e)}")
         """Sync votes with HuggingFace hub"""
         try:
             logger.info(LogFormatter.section("VOTE SYNC"))
             # Get current remote votes
             remote_votes = await self._fetch_remote_votes()
             logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
             # If we have pending votes to upload
             if self.votes_to_upload:
+                logger.info(
+                    LogFormatter.info(
+                        f"Adding {len(self.votes_to_upload)} pending votes..."
+                    )
+                )
                 # Add new votes to remote votes
                 remote_votes.extend(self.votes_to_upload)
                 # Create temporary file with all votes
+                with tempfile.NamedTemporaryFile(
+                    mode="w", suffix=".jsonl", delete=False
+                ) as temp_file:
                     for vote in remote_votes:
                         json.dump(vote, temp_file)
+                        temp_file.write("\n")
                     temp_path = temp_file.name
                 try:
                     # Upload JSONL file directly
                     self.hf_api.upload_file(
                         repo_id=VOTES_REPO,
                         repo_type="dataset",
                         commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
+                        token=self.token,
                     )
                     # Clear pending votes only if upload succeeded
                     self.votes_to_upload.clear()
+                    logger.info(
+                        LogFormatter.success("Pending votes uploaded successfully")
+                    )
                 except Exception as e:
                     logger.error(LogFormatter.error("Failed to upload votes to hub", e))
                     raise
                 finally:
                     # Clean up temp file
                     os.unlink(temp_path)
             # Update local state
+            with open(self.votes_file, "w") as f:
                 for vote in remote_votes:
                     json.dump(vote, f)
+                    f.write("\n")
             # Reload votes in memory
             await self._load_existing_votes()
             logger.info(LogFormatter.success("Sync completed successfully"))
             self._last_sync = datetime.now(timezone.utc)
         except Exception as e:
             logger.error(LogFormatter.error("Sync failed", e))
             raise
         try:
             logger.info(LogFormatter.section("LOADING VOTES"))
             # Clear existing data structures
             self.vote_check_set.clear()
             self._votes_by_model.clear()
             self._votes_by_user.clear()
             vote_count = 0
             latest_timestamp = None
             with open(self.votes_file, "r") as f:
                 for line in f:
                     try:
                         vote = json.loads(line.strip())
                         vote_count += 1
                         # Track latest timestamp
                         try:
+                            vote_timestamp = datetime.fromisoformat(
+                                vote["timestamp"].replace("Z", "+00:00")
+                            )
+                            if (
+                                not latest_timestamp
+                                or vote_timestamp > latest_timestamp
+                            ):
                                 latest_timestamp = vote_timestamp
+                            vote["timestamp"] = vote_timestamp.strftime(
+                                "%Y-%m-%dT%H:%M:%SZ"
+                            )
                         except (KeyError, ValueError) as e:
+                            logger.warning(
+                                LogFormatter.warning(
+                                    f"Invalid timestamp in vote: {str(e)}"
+                                )
+                            )
                             continue
                         if vote_count % 1000 == 0:
+                            logger.info(
+                                LogFormatter.info(f"Processed {vote_count:,} votes...")
+                            )
                         self._add_vote_to_memory(vote)
                     except json.JSONDecodeError as e:
                         logger.error(LogFormatter.error("Vote parsing failed", e))
                         continue
                     except Exception as e:
                         logger.error(LogFormatter.error("Vote processing failed", e))
                         continue
             self._total_votes = vote_count
             self._last_vote_timestamp = latest_timestamp
             # Final summary
             stats = {
                 "Total_Votes": vote_count,
+                "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
+                if latest_timestamp
+                else "None",
                 "Unique_Models": len(self._votes_by_model),
+                "Unique_Users": len(self._votes_by_user),
             }
             logger.info(LogFormatter.section("VOTE SUMMARY"))
             for line in LogFormatter.stats(stats):
                 logger.info(line)
         except Exception as e:
             logger.error(LogFormatter.error("Failed to load votes", e))
             raise
                 vote["model"],
                 vote.get("revision", "main"),
                 vote["username"],
+                vote.get("precision", "unknown"),
             )
             # Skip if we already have this vote
             if check_tuple in self.vote_check_set:
                 return
             self.vote_check_set.add(check_tuple)
             # Update model votes
             if vote["model"] not in self._votes_by_model:
                 self._votes_by_model[vote["model"]] = []
             self._votes_by_model[vote["model"]].append(vote)
             # Update user votes
             if vote["username"] not in self._votes_by_user:
                 self._votes_by_user[vote["username"]] = []
             self._votes_by_user[vote["username"]].append(vote)
         except KeyError as e:
             logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
         except Exception as e:
     async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
         """Get all votes from a specific user"""
         logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
         # Check if we need to refresh votes
+        if (
+            datetime.now(timezone.utc) - self._last_sync
+        ).total_seconds() > self._sync_interval:
             logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
             await self._check_for_new_votes()
         votes = self._votes_by_user.get(user_id, [])
         logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
         return votes
     async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
         """Get all votes for a specific model"""
         logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
         # Check if we need to refresh votes
+        if (
+            datetime.now(timezone.utc) - self._last_sync
+        ).total_seconds() > self._sync_interval:
             logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
             await self._check_for_new_votes()
         votes = self._votes_by_model.get(model_id, [])
         # Group votes by revision and precision
         votes_by_config = {}
         for vote in votes:
                 votes_by_config[config_key] = {
                     "revision": revision,
                     "precision": precision,
+                    "count": 0,
                 }
             votes_by_config[config_key]["count"] += 1
         stats = {
             "Total_Votes": len(votes),
+            **{f"Config_{k}": v["count"] for k, v in votes_by_config.items()},
         }
         logger.info(LogFormatter.section("VOTE STATISTICS"))
         for line in LogFormatter.stats(stats):
             logger.info(line)
         return {
             "total_votes": len(votes),
             "votes_by_config": votes_by_config,
+            "votes": votes,
         }
     async def _get_model_revision(self, model_id: str) -> str:
         for attempt in range(self._max_retries):
             try:
                 model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
+                logger.info(
+                    f"Successfully got revision {model_info.sha} for model {model_id}"
+                )
                 return model_info.sha
             except Exception as e:
+                logger.error(
+                    f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}"
+                )
                 if attempt < self._max_retries - 1:
                     retry_delay = self._retry_delay * (attempt + 1)
                     logger.info(f"Retrying in {retry_delay} seconds...")
                     await asyncio.sleep(retry_delay)
                 else:
+                    logger.warning(
+                        f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts"
+                    )
                     return "main"
+    async def add_vote(
+        self,
+        model_id: str,
+        user_id: str,
+        vote_type: str,
+        vote_data: Dict[str, Any] = None,
+    ) -> Dict[str, Any]:
         """Add a vote for a model"""
         try:
+            self._log_repo_operation(
+                "add",
+                VOTES_REPO,
+                f"Adding {vote_type} vote for {model_id} by {user_id}",
+            )
             logger.info(LogFormatter.section("NEW VOTE"))
             stats = {
                 "Model": model_id,
                 "User": user_id,
                 "Type": vote_type,
+                "Config": vote_data or {},
             }
             for line in LogFormatter.tree(stats, "Vote Details"):
                 logger.info(line)
             # Use provided configuration or fallback to model info
             precision = None
             revision = None
             if vote_data:
                 precision = vote_data.get("precision")
                 revision = vote_data.get("revision")
             # If any info is missing, try to get it from model info
             if not all([precision, revision]):
                 try:
+                    model_info = await asyncio.to_thread(
+                        self.hf_api.model_info, model_id
+                    )
+                    model_card_data = (
+                        model_info.cardData if hasattr(model_info, "cardData") else {}
+                    )
                     if not precision:
                         precision = model_card_data.get("precision", "unknown")
                     if not revision:
                         revision = model_info.sha
                 except Exception as e:
+                    logger.warning(
+                        LogFormatter.warning(
+                            f"Failed to get model info: {str(e)}. Using default values."
+                        )
+                    )
                     precision = precision or "unknown"
                     revision = revision or "main"
             # Check if vote already exists with this configuration
             check_tuple = (model_id, revision, user_id, precision)
             if check_tuple in self.vote_check_set:
+                raise ValueError(
+                    f"Vote already recorded for this model configuration (precision: {precision}, revision: {revision[:7] if revision else 'unknown'})"
+                )
             vote = {
                 "model": model_id,
                 "username": user_id,
                 "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
                 "vote_type": vote_type,
+                "precision": precision,
             }
             # Update local storage
             with open(self.votes_file, "a") as f:
                 f.write(json.dumps(vote) + "\n")
             self._add_vote_to_memory(vote)
             self.votes_to_upload.append(vote)
             stats = {
                 "Status": "Success",
                 "Queue_Size": len(self.votes_to_upload),
                 "Model_Config": {
                     "Precision": precision,
+                    "Revision": revision[:7] if revision else "unknown",
+                },
             }
             for line in LogFormatter.stats(stats):
                 logger.info(line)
             # Force immediate sync
             logger.info(LogFormatter.info("Forcing immediate sync with hub"))
             await self._sync_with_hub()
             return {"status": "success", "message": "Vote added successfully"}
         except Exception as e:
             logger.error(LogFormatter.error("Failed to add vote", e))
             raise

backend/app/utils/logging.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from app.core.formatting import LogFormatter
-__all__ = ['LogFormatter']


1	from app.core.formatting import LogFormatter
2
3	+ __all__ = ["LogFormatter"]

backend/app/utils/model_validation.py CHANGED Viewed

@@ -12,50 +12,56 @@ from app.core.formatting import LogFormatter
 logger = logging.getLogger(__name__)
 class ModelValidator:
     def __init__(self):
         self.token = HF_TOKEN
         self.api = HfApi(token=self.token)
         self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
-    async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
         """Check if model has a valid model card"""
         try:
             logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
             # Get model card content using ModelCard.load
             try:
-                model_card = await asyncio.to_thread(
-                    ModelCard.load,
-                    model_id
-                )
                 logger.info(LogFormatter.success("Model card found"))
             except Exception as e:
                 error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
                 logger.error(LogFormatter.error(error_msg, e))
                 return False, error_msg, None
             # Check license in model card data
-            if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
                 error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
                 logger.warning(LogFormatter.warning(error_msg))
                 return False, error_msg, None
             # Enforce card content length
             if len(model_card.text) < 200:
-                error_msg = "Please add a description to your model card, it is too short."
                 logger.warning(LogFormatter.warning(error_msg))
                 return False, error_msg, None
             logger.info(LogFormatter.success("Model card validation passed"))
             return True, "", model_card
         except Exception as e:
             error_msg = "Failed to validate model card"
             logger.error(LogFormatter.error(error_msg, e))
             return False, str(e), None
-    async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main")  -> Optional[Dict]:
         """Get metadata from a safetensors file"""
         try:
             if is_adapter:
@@ -80,26 +86,32 @@ class ModelValidator:
             return None
     async def get_model_size(
-        self,
-        model_info: Any,
-        precision: str,
-        base_model: str,
-        revision: str
     ) -> Tuple[Optional[float], Optional[str]]:
         """Get model size in billions of parameters"""
         try:
-            logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
             # Check if model is adapter
-            is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
             # Try to get size from safetensors first
             model_size = None
             if is_adapter and base_model:
                 # For adapters, we need both adapter and base model sizes
-                adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
-                base_meta = await self.get_safetensors_metadata(base_model, revision="main")
                 if adapter_meta and base_meta:
                     adapter_size = sum(adapter_meta.parameter_count.values())
@@ -107,16 +119,20 @@ class ModelValidator:
                     model_size = adapter_size + base_size
             else:
                 # For regular models, just get the model size
-                meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
                 if meta:
-                    model_size = sum(meta.parameter_count.values()) # total params
             if model_size is None:
                 # If model size could not be determined, return an error
                 return None, "Model size could not be determined"
             # Adjust size for GPTQ models
-            size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
             model_size = model_size / 1e9  # Convert to billions, assuming float16
             model_size = round(size_factor * model_size, 3)
@@ -127,52 +143,49 @@ class ModelValidator:
             logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
             return None, str(e)
     async def check_chat_template(
-        self,
-        model_id: str,
-        revision: str
     ) -> Tuple[bool, Optional[str]]:
         """Check if model has a valid chat template"""
         try:
             logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
             try:
                 config_file = await asyncio.to_thread(
                     hf_hub_download,
                     repo_id=model_id,
                     filename="tokenizer_config.json",
                     revision=revision,
-                    repo_type="model"
                 )
-                with open(config_file, 'r') as f:
                     tokenizer_config = json.load(f)
-                if 'chat_template' not in tokenizer_config:
                     error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
                     logger.error(LogFormatter.error(error_msg))
                     return False, error_msg
                 logger.info(LogFormatter.success("Valid chat template found"))
                 return True, None
             except Exception as e:
                 error_msg = f"Error checking chat_template: {str(e)}"
                 logger.error(LogFormatter.error(error_msg))
                 return False, error_msg
         except Exception as e:
             error_msg = "Failed to check chat template"
             logger.error(LogFormatter.error(error_msg, e))
             return False, str(e)
     async def is_model_on_hub(
         self,
         model_name: str,
         revision: str,
         test_tokenizer: bool = False,
-        trust_remote_code: bool = False
     ) -> Tuple[bool, Optional[str], Optional[Any]]:
         """Check if model exists and is properly configured on the Hub"""
         try:
@@ -182,9 +195,9 @@ class ModelValidator:
                 revision=revision,
                 trust_remote_code=trust_remote_code,
                 token=self.token,
-                force_download=True
             )
             if test_tokenizer:
                 try:
                     await asyncio.to_thread(
@@ -192,56 +205,80 @@ class ModelValidator:
                         model_name,
                         revision=revision,
                         trust_remote_code=trust_remote_code,
-                        token=self.token
                     )
                 except ValueError as e:
-                    return False, f"The tokenizer is not available in an official Transformers release: {e}", None
                 except Exception:
-                    return False, "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.", None
             return True, None, config
         except ValueError:
-            return False, "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.", None
         except Exception as e:
             if "You are trying to access a gated repo." in str(e):
-                return True, "The model is gated and requires special access permissions.", None
-            return False, f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}", None
     async def check_official_provider_status(
-        self,
-        model_id: str,
-        existing_models: Dict[str, list]
     ) -> Tuple[bool, Optional[str]]:
         """
         Check if model is from official provider and has finished submission.
         Args:
             model_id: The model identifier (org/model-name)
             existing_models: Dictionary of models by status from get_models()
         Returns:
             Tuple[bool, Optional[str]]: (is_valid, error_message)
         """
         try:
-            logger.info(LogFormatter.info(f"Checking official provider status for {model_id}"))
             # Get model organization
-            model_org = model_id.split('/')[0] if '/' in model_id else None
             if not model_org:
                 return True, None
             # Load official providers dataset
             dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
             official_providers = dataset["train"][0]["CURATED_SET"]
             # Check if model org is in official providers
             is_official = model_org in official_providers
             if is_official:
-                logger.info(LogFormatter.info(f"Model organization '{model_org}' is an official provider"))
                 # Check for finished submissions
                 if "finished" in existing_models:
                     for model in existing_models["finished"]:
@@ -251,15 +288,25 @@ class ModelValidator:
                                 f"with a completed evaluation. "
                                 f"To re-evaluate, please open a discussion."
                             )
-                            logger.error(LogFormatter.error("Validation failed", error_msg))
                             return False, error_msg
-                logger.info(LogFormatter.success("No finished submission found for this official provider model"))
             else:
-                logger.info(LogFormatter.info(f"Model organization '{model_org}' is not an official provider"))
             return True, None
         except Exception as e:
             error_msg = f"Failed to check official provider status: {str(e)}"
             logger.error(LogFormatter.error(error_msg))

 logger = logging.getLogger(__name__)
 class ModelValidator:
     def __init__(self):
         self.token = HF_TOKEN
         self.api = HfApi(token=self.token)
         self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
+    async def check_model_card(
+        self, model_id: str
+    ) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
         """Check if model has a valid model card"""
         try:
             logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
             # Get model card content using ModelCard.load
             try:
+                model_card = await asyncio.to_thread(ModelCard.load, model_id)
                 logger.info(LogFormatter.success("Model card found"))
             except Exception as e:
                 error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
                 logger.error(LogFormatter.error(error_msg, e))
                 return False, error_msg, None
             # Check license in model card data
+            if model_card.data.license is None and not (
+                "license_name" in model_card.data and "license_link" in model_card.data
+            ):
                 error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
                 logger.warning(LogFormatter.warning(error_msg))
                 return False, error_msg, None
             # Enforce card content length
             if len(model_card.text) < 200:
+                error_msg = (
+                    "Please add a description to your model card, it is too short."
+                )
                 logger.warning(LogFormatter.warning(error_msg))
                 return False, error_msg, None
             logger.info(LogFormatter.success("Model card validation passed"))
             return True, "", model_card
         except Exception as e:
             error_msg = "Failed to validate model card"
             logger.error(LogFormatter.error(error_msg, e))
             return False, str(e), None
+    async def get_safetensors_metadata(
+        self, model_id: str, is_adapter: bool = False, revision: str = "main"
+    ) -> Optional[Dict]:
         """Get metadata from a safetensors file"""
         try:
             if is_adapter:
             return None
     async def get_model_size(
+        self, model_info: Any, precision: str, base_model: str, revision: str
     ) -> Tuple[Optional[float], Optional[str]]:
         """Get model size in billions of parameters"""
         try:
+            logger.info(
+                LogFormatter.info(f"Checking model size for {model_info.modelId}")
+            )
             # Check if model is adapter
+            is_adapter = any(
+                s.rfilename == "adapter_config.json"
+                for s in model_info.siblings
+                if hasattr(s, "rfilename")
+            )
             # Try to get size from safetensors first
             model_size = None
             if is_adapter and base_model:
                 # For adapters, we need both adapter and base model sizes
+                adapter_meta = await self.get_safetensors_metadata(
+                    model_info.id, is_adapter=True, revision=revision
+                )
+                base_meta = await self.get_safetensors_metadata(
+                    base_model, revision="main"
+                )
                 if adapter_meta and base_meta:
                     adapter_size = sum(adapter_meta.parameter_count.values())
                     model_size = adapter_size + base_size
             else:
                 # For regular models, just get the model size
+                meta = await self.get_safetensors_metadata(
+                    model_info.id, revision=revision
+                )
                 if meta:
+                    model_size = sum(meta.parameter_count.values())  # total params
             if model_size is None:
                 # If model size could not be determined, return an error
                 return None, "Model size could not be determined"
             # Adjust size for GPTQ models
+            size_factor = (
+                8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
+            )
             model_size = model_size / 1e9  # Convert to billions, assuming float16
             model_size = round(size_factor * model_size, 3)
             logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
             return None, str(e)
     async def check_chat_template(
+        self, model_id: str, revision: str
     ) -> Tuple[bool, Optional[str]]:
         """Check if model has a valid chat template"""
         try:
             logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
             try:
                 config_file = await asyncio.to_thread(
                     hf_hub_download,
                     repo_id=model_id,
                     filename="tokenizer_config.json",
                     revision=revision,
+                    repo_type="model",
                 )
+                with open(config_file, "r") as f:
                     tokenizer_config = json.load(f)
+                if "chat_template" not in tokenizer_config:
                     error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
                     logger.error(LogFormatter.error(error_msg))
                     return False, error_msg
                 logger.info(LogFormatter.success("Valid chat template found"))
                 return True, None
             except Exception as e:
                 error_msg = f"Error checking chat_template: {str(e)}"
                 logger.error(LogFormatter.error(error_msg))
                 return False, error_msg
         except Exception as e:
             error_msg = "Failed to check chat template"
             logger.error(LogFormatter.error(error_msg, e))
             return False, str(e)
     async def is_model_on_hub(
         self,
         model_name: str,
         revision: str,
         test_tokenizer: bool = False,
+        trust_remote_code: bool = False,
     ) -> Tuple[bool, Optional[str], Optional[Any]]:
         """Check if model exists and is properly configured on the Hub"""
         try:
                 revision=revision,
                 trust_remote_code=trust_remote_code,
                 token=self.token,
+                force_download=True,
             )
             if test_tokenizer:
                 try:
                     await asyncio.to_thread(
                         model_name,
                         revision=revision,
                         trust_remote_code=trust_remote_code,
+                        token=self.token,
                     )
                 except ValueError as e:
+                    return (
+                        False,
+                        f"The tokenizer is not available in an official Transformers release: {e}",
+                        None,
+                    )
                 except Exception:
+                    return (
+                        False,
+                        "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.",
+                        None,
+                    )
             return True, None, config
         except ValueError:
+            return (
+                False,
+                "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.",
+                None,
+            )
         except Exception as e:
             if "You are trying to access a gated repo." in str(e):
+                return (
+                    True,
+                    "The model is gated and requires special access permissions.",
+                    None,
+                )
+            return (
+                False,
+                f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}",
+                None,
+            )
     async def check_official_provider_status(
+        self, model_id: str, existing_models: Dict[str, list]
     ) -> Tuple[bool, Optional[str]]:
         """
         Check if model is from official provider and has finished submission.
         Args:
             model_id: The model identifier (org/model-name)
             existing_models: Dictionary of models by status from get_models()
         Returns:
             Tuple[bool, Optional[str]]: (is_valid, error_message)
         """
         try:
+            logger.info(
+                LogFormatter.info(f"Checking official provider status for {model_id}")
+            )
             # Get model organization
+            model_org = model_id.split("/")[0] if "/" in model_id else None
             if not model_org:
                 return True, None
             # Load official providers dataset
             dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
             official_providers = dataset["train"][0]["CURATED_SET"]
             # Check if model org is in official providers
             is_official = model_org in official_providers
             if is_official:
+                logger.info(
+                    LogFormatter.info(
+                        f"Model organization '{model_org}' is an official provider"
+                    )
+                )
                 # Check for finished submissions
                 if "finished" in existing_models:
                     for model in existing_models["finished"]:
                                 f"with a completed evaluation. "
                                 f"To re-evaluate, please open a discussion."
                             )
+                            logger.error(
+                                LogFormatter.error("Validation failed", error_msg)
+                            )
                             return False, error_msg
+                logger.info(
+                    LogFormatter.success(
+                        "No finished submission found for this official provider model"
+                    )
+                )
             else:
+                logger.info(
+                    LogFormatter.info(
+                        f"Model organization '{model_org}' is not an official provider"
+                    )
+                )
             return True, None
         except Exception as e:
             error_msg = f"Failed to check official provider status: {str(e)}"
             logger.error(LogFormatter.error(error_msg))

backend/utils/analyze_prod_datasets.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import logging
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Any, List
 from huggingface_hub import HfApi
 from dotenv import load_dotenv
 from app.config.hf_config import HF_ORGANIZATION
@@ -16,10 +16,7 @@ ROOT_DIR = BACKEND_DIR.parent
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
@@ -28,53 +25,50 @@ if not HF_TOKEN:
     raise ValueError("HF_TOKEN not found in environment variables")
 api = HfApi(token=HF_TOKEN)
 def analyze_dataset(repo_id: str) -> Dict[str, Any]:
     """Analyze a dataset and return statistics"""
     try:
         # Get dataset info
         dataset_info = api.dataset_info(repo_id=repo_id)
         # Get file list
         files = api.list_repo_files(repo_id, repo_type="dataset")
         # Get last commit info
         commits = api.list_repo_commits(repo_id, repo_type="dataset")
         last_commit = next(commits, None)
         # Count lines in jsonl files
         total_entries = 0
         for file in files:
-            if file.endswith('.jsonl'):
                 try:
                     # Download file content
                     content = api.hf_hub_download(
-                        repo_id=repo_id,
-                        filename=file,
-                        repo_type="dataset"
                     )
                     # Count lines
-                    with open(content, 'r') as f:
                         for _ in f:
                             total_entries += 1
                 except Exception as e:
                     logger.error(f"Error processing file {file}: {str(e)}")
                     continue
         # Special handling for requests dataset
         if repo_id == f"{HF_ORGANIZATION}/requests":
             pending_count = 0
             completed_count = 0
             try:
                 content = api.hf_hub_download(
-                    repo_id=repo_id,
-                    filename="eval_requests.jsonl",
-                    repo_type="dataset"
                 )
-                with open(content, 'r') as f:
                     for line in f:
                         try:
                             entry = json.loads(line)
@@ -84,10 +78,10 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
                                 completed_count += 1
                         except json.JSONDecodeError:
                             continue
             except Exception as e:
                 logger.error(f"Error analyzing requests: {str(e)}")
         # Build response
         response = {
             "id": repo_id,
@@ -97,22 +91,22 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
             "size_bytes": dataset_info.size_in_bytes,
             "downloads": dataset_info.downloads,
         }
         # Add request-specific info if applicable
         if repo_id == f"{HF_ORGANIZATION}/requests":
-            response.update({
-                "pending_requests": pending_count,
-                "completed_requests": completed_count
-            })
         return response
     except Exception as e:
         logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
-        return {
-            "id": repo_id,
-            "error": str(e)
-        }
 def main():
     """Main function to analyze all datasets"""
@@ -121,50 +115,49 @@ def main():
         datasets = [
             {
                 "id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
-                "description": "Aggregated results"
-            },
-            {
-                "id": f"{HF_ORGANIZATION}/requests",
-                "description": "Evaluation requests"
             },
             {
-                "id": f"{HF_ORGANIZATION}/votes",
-                "description": "User votes"
             },
-            {
-                "id": f"open-llm-leaderboard/official-providers",
-                "description": "Highlighted models"
-            }
         ]
         # Analyze each dataset
         results = []
         for dataset in datasets:
             logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
-            result = analyze_dataset(dataset['id'])
             results.append(result)
-            if 'error' in result:
                 logger.error(f"❌ Error: {result['error']}")
             else:
                 logger.info(f"✓ {result['total_entries']} entries")
                 logger.info(f"✓ {result['file_count']} files")
                 logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
                 logger.info(f"✓ {result['downloads']} downloads")
-                if 'pending_requests' in result:
                     logger.info(f"✓ {result['pending_requests']} pending requests")
                     logger.info(f"✓ {result['completed_requests']} completed requests")
-                if result['last_modified']:
-                    last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
-                    logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
         return results
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
         return []
 if __name__ == "__main__":
-    main()

 import logging
 from datetime import datetime
 from pathlib import Path
+from typing import Dict, Any
 from huggingface_hub import HfApi
 from dotenv import load_dotenv
 from app.config.hf_config import HF_ORGANIZATION
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
     raise ValueError("HF_TOKEN not found in environment variables")
 api = HfApi(token=HF_TOKEN)
 def analyze_dataset(repo_id: str) -> Dict[str, Any]:
     """Analyze a dataset and return statistics"""
     try:
         # Get dataset info
         dataset_info = api.dataset_info(repo_id=repo_id)
         # Get file list
         files = api.list_repo_files(repo_id, repo_type="dataset")
         # Get last commit info
         commits = api.list_repo_commits(repo_id, repo_type="dataset")
         last_commit = next(commits, None)
         # Count lines in jsonl files
         total_entries = 0
         for file in files:
+            if file.endswith(".jsonl"):
                 try:
                     # Download file content
                     content = api.hf_hub_download(
+                        repo_id=repo_id, filename=file, repo_type="dataset"
                     )
                     # Count lines
+                    with open(content, "r") as f:
                         for _ in f:
                             total_entries += 1
                 except Exception as e:
                     logger.error(f"Error processing file {file}: {str(e)}")
                     continue
         # Special handling for requests dataset
         if repo_id == f"{HF_ORGANIZATION}/requests":
             pending_count = 0
             completed_count = 0
             try:
                 content = api.hf_hub_download(
+                    repo_id=repo_id, filename="eval_requests.jsonl", repo_type="dataset"
                 )
+                with open(content, "r") as f:
                     for line in f:
                         try:
                             entry = json.loads(line)
                                 completed_count += 1
                         except json.JSONDecodeError:
                             continue
             except Exception as e:
                 logger.error(f"Error analyzing requests: {str(e)}")
         # Build response
         response = {
             "id": repo_id,
             "size_bytes": dataset_info.size_in_bytes,
             "downloads": dataset_info.downloads,
         }
         # Add request-specific info if applicable
         if repo_id == f"{HF_ORGANIZATION}/requests":
+            response.update(
+                {
+                    "pending_requests": pending_count,
+                    "completed_requests": completed_count,
+                }
+            )
         return response
     except Exception as e:
         logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
+        return {"id": repo_id, "error": str(e)}
 def main():
     """Main function to analyze all datasets"""
         datasets = [
             {
                 "id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
+                "description": "Aggregated results",
             },
+            {"id": f"{HF_ORGANIZATION}/requests", "description": "Evaluation requests"},
+            {"id": f"{HF_ORGANIZATION}/votes", "description": "User votes"},
             {
+                "id": "open-llm-leaderboard/official-providers",
+                "description": "Highlighted models",
             },
         ]
         # Analyze each dataset
         results = []
         for dataset in datasets:
             logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
+            result = analyze_dataset(dataset["id"])
             results.append(result)
+            if "error" in result:
                 logger.error(f"❌ Error: {result['error']}")
             else:
                 logger.info(f"✓ {result['total_entries']} entries")
                 logger.info(f"✓ {result['file_count']} files")
                 logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
                 logger.info(f"✓ {result['downloads']} downloads")
+                if "pending_requests" in result:
                     logger.info(f"✓ {result['pending_requests']} pending requests")
                     logger.info(f"✓ {result['completed_requests']} completed requests")
+                if result["last_modified"]:
+                    last_modified = datetime.fromisoformat(
+                        result["last_modified"].replace("Z", "+00:00")
+                    )
+                    logger.info(
+                        f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
+                    )
         return results
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
         return []
 if __name__ == "__main__":
+    main()

backend/utils/analyze_prod_models.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import json
 import logging
 from datetime import datetime
 from pathlib import Path
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
@@ -27,80 +23,92 @@ if not HF_TOKEN:
     raise ValueError("HF_TOKEN not found in environment variables")
 api = HfApi(token=HF_TOKEN)
 def count_evaluated_models():
     """Count the number of evaluated models"""
     try:
         # Get dataset info
-        dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
         # Get file list
-        files = api.list_repo_files(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
         # Get last commit info
-        commits = api.list_repo_commits(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
         last_commit = next(commits, None)
         # Count lines in jsonl files
         total_entries = 0
         for file in files:
-            if file.endswith('.jsonl'):
                 try:
                     # Download file content
                     content = api.hf_hub_download(
                         repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
                         filename=file,
-                        repo_type="dataset"
                     )
                     # Count lines
-                    with open(content, 'r') as f:
                         for _ in f:
                             total_entries += 1
                 except Exception as e:
                     logger.error(f"Error processing file {file}: {str(e)}")
                     continue
         # Build response
         response = {
             "total_models": total_entries,
             "last_modified": last_commit.created_at if last_commit else None,
             "file_count": len(files),
             "size_bytes": dataset_info.size_in_bytes,
-            "downloads": dataset_info.downloads
         }
         return response
     except Exception as e:
         logger.error(f"Error counting evaluated models: {str(e)}")
-        return {
-            "error": str(e)
-        }
 def main():
     """Main function to count evaluated models"""
     try:
         logger.info("\nAnalyzing evaluated models...")
         result = count_evaluated_models()
-        if 'error' in result:
             logger.error(f"❌ Error: {result['error']}")
         else:
             logger.info(f"✓ {result['total_models']} models evaluated")
             logger.info(f"✓ {result['file_count']} files")
             logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
             logger.info(f"✓ {result['downloads']} downloads")
-            if result['last_modified']:
-                last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
-                logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
         return result
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
         return {"error": str(e)}
 if __name__ == "__main__":
-    main()

 import os
 import logging
 from datetime import datetime
 from pathlib import Path
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
     raise ValueError("HF_TOKEN not found in environment variables")
 api = HfApi(token=HF_TOKEN)
 def count_evaluated_models():
     """Count the number of evaluated models"""
     try:
         # Get dataset info
+        dataset_info = api.dataset_info(
+            repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
+            repo_type="dataset",
+        )
         # Get file list
+        files = api.list_repo_files(
+            f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
+        )
         # Get last commit info
+        commits = api.list_repo_commits(
+            f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
+        )
         last_commit = next(commits, None)
         # Count lines in jsonl files
         total_entries = 0
         for file in files:
+            if file.endswith(".jsonl"):
                 try:
                     # Download file content
                     content = api.hf_hub_download(
                         repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
                         filename=file,
+                        repo_type="dataset",
                     )
                     # Count lines
+                    with open(content, "r") as f:
                         for _ in f:
                             total_entries += 1
                 except Exception as e:
                     logger.error(f"Error processing file {file}: {str(e)}")
                     continue
         # Build response
         response = {
             "total_models": total_entries,
             "last_modified": last_commit.created_at if last_commit else None,
             "file_count": len(files),
             "size_bytes": dataset_info.size_in_bytes,
+            "downloads": dataset_info.downloads,
         }
         return response
     except Exception as e:
         logger.error(f"Error counting evaluated models: {str(e)}")
+        return {"error": str(e)}
 def main():
     """Main function to count evaluated models"""
     try:
         logger.info("\nAnalyzing evaluated models...")
         result = count_evaluated_models()
+        if "error" in result:
             logger.error(f"❌ Error: {result['error']}")
         else:
             logger.info(f"✓ {result['total_models']} models evaluated")
             logger.info(f"✓ {result['file_count']} files")
             logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
             logger.info(f"✓ {result['downloads']} downloads")
+            if result["last_modified"]:
+                last_modified = datetime.fromisoformat(
+                    result["last_modified"].replace("Z", "+00:00")
+                )
+                logger.info(
+                    f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
+                )
         return result
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
         return {"error": str(e)}
 if __name__ == "__main__":
+    main()

backend/utils/fix_wrong_model_size.py CHANGED Viewed

@@ -1,15 +1,11 @@
-import os
 import json
-import pytz
 import logging
 import asyncio
 from datetime import datetime
-from pathlib import Path
 import huggingface_hub
 from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
 from dotenv import load_dotenv
 from git import Repo
-from datetime import datetime
 from tqdm.auto import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
@@ -20,22 +16,22 @@ from app.utils.model_validation import ModelValidator
 huggingface_hub.logging.set_verbosity_error()
 huggingface_hub.utils.disable_progress_bars()
-logging.basicConfig(
-    level=logging.ERROR,
-    format='%(message)s'
-)
 logger = logging.getLogger(__name__)
 load_dotenv()
 validator = ModelValidator()
 def get_changed_files(repo_path, start_date, end_date):
     repo = Repo(repo_path)
-    start = datetime.strptime(start_date, '%Y-%m-%d')
-    end = datetime.strptime(end_date, '%Y-%m-%d')
     changed_files = set()
-    pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}")
     for commit in pbar:
         commit_date = datetime.fromtimestamp(commit.committed_date)
         pbar.set_postfix_str(f"Commit date: {commit_date}")
@@ -62,44 +58,52 @@ def main():
     requests_path = "/requests"
     start_date = "2024-12-09"
     end_date = "2025-01-07"
     changed_files = get_changed_files(requests_path, start_date, end_date)
     for file in tqdm(changed_files):
         try:
             request_data = read_json(requests_path, file)
-        except FileNotFoundError as e:
             tqdm.write(f"File {file} not found")
             continue
         try:
             model_info = API.model_info(
                 repo_id=request_data["model"],
                 revision=request_data["revision"],
-                token=HF_TOKEN
             )
-        except (RepositoryNotFoundError, RevisionNotFoundError) as e:
-            tqdm.write(f"Model info for {request_data["model"]} not found")
             continue
         with logging_redirect_tqdm():
-            new_model_size, error = asyncio.run(validator.get_model_size(
-                model_info=model_info,
-                precision=request_data["precision"],
-                base_model=request_data["base_model"],
-                revision=request_data["revision"]
-            ))
         if error:
-            tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}")
             continue
         old_model_size = request_data["params"]
         if old_model_size != new_model_size:
             if new_model_size > 100:
-                tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}")
-            tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}")
             tqdm.write(f"Updating request file {file}")
             request_data["params"] = new_model_size

 import json
 import logging
 import asyncio
 from datetime import datetime
 import huggingface_hub
 from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
 from dotenv import load_dotenv
 from git import Repo
 from tqdm.auto import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
 huggingface_hub.logging.set_verbosity_error()
 huggingface_hub.utils.disable_progress_bars()
+logging.basicConfig(level=logging.ERROR, format="%(message)s")
 logger = logging.getLogger(__name__)
 load_dotenv()
 validator = ModelValidator()
 def get_changed_files(repo_path, start_date, end_date):
     repo = Repo(repo_path)
+    start = datetime.strptime(start_date, "%Y-%m-%d")
+    end = datetime.strptime(end_date, "%Y-%m-%d")
     changed_files = set()
+    pbar = tqdm(
+        repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}"
+    )
     for commit in pbar:
         commit_date = datetime.fromtimestamp(commit.committed_date)
         pbar.set_postfix_str(f"Commit date: {commit_date}")
     requests_path = "/requests"
     start_date = "2024-12-09"
     end_date = "2025-01-07"
     changed_files = get_changed_files(requests_path, start_date, end_date)
     for file in tqdm(changed_files):
         try:
             request_data = read_json(requests_path, file)
+        except FileNotFoundError:
             tqdm.write(f"File {file} not found")
             continue
         try:
             model_info = API.model_info(
                 repo_id=request_data["model"],
                 revision=request_data["revision"],
+                token=HF_TOKEN,
             )
+        except (RepositoryNotFoundError, RevisionNotFoundError):
+            tqdm.write(f"Model info for {request_data['model']} not found")
             continue
         with logging_redirect_tqdm():
+            new_model_size, error = asyncio.run(
+                validator.get_model_size(
+                    model_info=model_info,
+                    precision=request_data["precision"],
+                    base_model=request_data["base_model"],
+                    revision=request_data["revision"],
+                )
+            )
         if error:
+            tqdm.write(
+                f"Error getting model size info for {request_data['model']}, {error}"
+            )
             continue
         old_model_size = request_data["params"]
         if old_model_size != new_model_size:
             if new_model_size > 100:
+                tqdm.write(
+                    f"Model: {request_data['model']}, size is more 100B: {new_model_size}"
+                )
+            tqdm.write(
+                f"Model: {request_data['model']}, old size: {request_data['params']} new size: {new_model_size}"
+            )
             tqdm.write(f"Updating request file {file}")
             request_data["params"] = new_model_size

backend/utils/last_activity.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import os
 import json
 import logging
-from datetime import datetime
 from pathlib import Path
-from typing import Dict, Any, List, Tuple
 from huggingface_hub import HfApi
 from dotenv import load_dotenv
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
@@ -28,107 +24,117 @@ if not HF_TOKEN:
 api = HfApi(token=HF_TOKEN)
 # Default organization
-HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'stacklok')
 def get_last_votes(limit: int = 5) -> List[Dict]:
     """Get the last votes from the votes dataset"""
     try:
         logger.info("\nFetching last votes...")
         # Download and read votes file
         logger.info("Downloading votes file...")
         votes_file = api.hf_hub_download(
             repo_id=f"{HF_ORGANIZATION}/votes",
             filename="votes_data.jsonl",
-            repo_type="dataset"
         )
         logger.info("Reading votes file...")
         votes = []
-        with open(votes_file, 'r') as f:
             for line in f:
                 try:
                     vote = json.loads(line)
                     votes.append(vote)
                 except json.JSONDecodeError:
                     continue
         # Sort by timestamp and get last n votes
         logger.info("Sorting votes...")
-        votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
         last_votes = votes[:limit]
         logger.info(f"✓ Found {len(last_votes)} recent votes")
         return last_votes
     except Exception as e:
         logger.error(f"Error reading votes: {str(e)}")
         return []
 def get_last_models(limit: int = 5) -> List[Dict]:
     """Get the last models from the requests dataset using commit history"""
     try:
         logger.info("\nFetching last model submissions...")
         # Get commit history
         logger.info("Getting commit history...")
-        commits = list(api.list_repo_commits(
-            repo_id=f"{HF_ORGANIZATION}/requests",
-            repo_type="dataset"
-        ))
         logger.info(f"Found {len(commits)} commits")
         # Track processed files to avoid duplicates
         processed_files = set()
         models = []
         # Process commits until we have enough models
         for i, commit in enumerate(commits):
-            logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
             # Look at added/modified files in this commit
-            files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
             if files_to_process:
                 logger.info(f"Found {len(files_to_process)} JSON files in commit")
             for file in files_to_process:
                 if file in processed_files:
                     continue
                 processed_files.add(file)
                 logger.info(f"Downloading {file}...")
                 try:
                     # Download and read the file
                     content = api.hf_hub_download(
                         repo_id=f"{HF_ORGANIZATION}/requests",
                         filename=file,
-                        repo_type="dataset"
                     )
-                    with open(content, 'r') as f:
                         model_data = json.load(f)
                         models.append(model_data)
-                        logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
                         if len(models) >= limit:
                             logger.info("Reached desired number of models")
                             break
                 except Exception as e:
                     logger.error(f"Error reading file {file}: {str(e)}")
                     continue
             if len(models) >= limit:
                 break
         logger.info(f"✓ Found {len(models)} recent model submissions")
         return models
     except Exception as e:
         logger.error(f"Error reading models: {str(e)}")
         return []
 def main():
     """Display last activities from the leaderboard"""
     try:
@@ -142,7 +148,7 @@ def main():
                 logger.info(f"Timestamp: {vote.get('timestamp')}")
         else:
             logger.info("No votes found")
         # Get last model submissions
         logger.info("\n=== Last Model Submissions ===")
         last_models = get_last_models()
@@ -151,14 +157,17 @@ def main():
                 logger.info(f"\nModel: {model.get('model')}")
                 logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
                 logger.info(f"Status: {model.get('status', 'Unknown')}")
-                logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
                 logger.info(f"Precision: {model.get('precision', 'Unknown')}")
                 logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
         else:
             logger.info("No models found")
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
 if __name__ == "__main__":
-    main()

 import os
 import json
 import logging
 from pathlib import Path
+from typing import Dict, List
 from huggingface_hub import HfApi
 from dotenv import load_dotenv
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 # Initialize Hugging Face API
 api = HfApi(token=HF_TOKEN)
 # Default organization
+HF_ORGANIZATION = os.getenv("HF_ORGANIZATION", "stacklok")
 def get_last_votes(limit: int = 5) -> List[Dict]:
     """Get the last votes from the votes dataset"""
     try:
         logger.info("\nFetching last votes...")
         # Download and read votes file
         logger.info("Downloading votes file...")
         votes_file = api.hf_hub_download(
             repo_id=f"{HF_ORGANIZATION}/votes",
             filename="votes_data.jsonl",
+            repo_type="dataset",
         )
         logger.info("Reading votes file...")
         votes = []
+        with open(votes_file, "r") as f:
             for line in f:
                 try:
                     vote = json.loads(line)
                     votes.append(vote)
                 except json.JSONDecodeError:
                     continue
         # Sort by timestamp and get last n votes
         logger.info("Sorting votes...")
+        votes.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
         last_votes = votes[:limit]
         logger.info(f"✓ Found {len(last_votes)} recent votes")
         return last_votes
     except Exception as e:
         logger.error(f"Error reading votes: {str(e)}")
         return []
 def get_last_models(limit: int = 5) -> List[Dict]:
     """Get the last models from the requests dataset using commit history"""
     try:
         logger.info("\nFetching last model submissions...")
         # Get commit history
         logger.info("Getting commit history...")
+        commits = list(
+            api.list_repo_commits(
+                repo_id=f"{HF_ORGANIZATION}/requests", repo_type="dataset"
+            )
+        )
         logger.info(f"Found {len(commits)} commits")
         # Track processed files to avoid duplicates
         processed_files = set()
         models = []
         # Process commits until we have enough models
         for i, commit in enumerate(commits):
+            logger.info(
+                f"Processing commit {i + 1}/{len(commits)} ({commit.created_at})"
+            )
             # Look at added/modified files in this commit
+            files_to_process = [
+                f for f in (commit.added + commit.modified) if f.endswith(".json")
+            ]
             if files_to_process:
                 logger.info(f"Found {len(files_to_process)} JSON files in commit")
             for file in files_to_process:
                 if file in processed_files:
                     continue
                 processed_files.add(file)
                 logger.info(f"Downloading {file}...")
                 try:
                     # Download and read the file
                     content = api.hf_hub_download(
                         repo_id=f"{HF_ORGANIZATION}/requests",
                         filename=file,
+                        repo_type="dataset",
                     )
+                    with open(content, "r") as f:
                         model_data = json.load(f)
                         models.append(model_data)
+                        logger.info(
+                            f"✓ Added model {model_data.get('model', 'Unknown')}"
+                        )
                         if len(models) >= limit:
                             logger.info("Reached desired number of models")
                             break
                 except Exception as e:
                     logger.error(f"Error reading file {file}: {str(e)}")
                     continue
             if len(models) >= limit:
                 break
         logger.info(f"✓ Found {len(models)} recent model submissions")
         return models
     except Exception as e:
         logger.error(f"Error reading models: {str(e)}")
         return []
 def main():
     """Display last activities from the leaderboard"""
     try:
                 logger.info(f"Timestamp: {vote.get('timestamp')}")
         else:
             logger.info("No votes found")
         # Get last model submissions
         logger.info("\n=== Last Model Submissions ===")
         last_models = get_last_models()
                 logger.info(f"\nModel: {model.get('model')}")
                 logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
                 logger.info(f"Status: {model.get('status', 'Unknown')}")
+                logger.info(
+                    f"Submission Time: {model.get('submitted_time', 'Unknown')}"
+                )
                 logger.info(f"Precision: {model.get('precision', 'Unknown')}")
                 logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
         else:
             logger.info("No models found")
     except Exception as e:
         logger.error(f"Global error: {str(e)}")
 if __name__ == "__main__":
+    main()

backend/utils/sync_datasets_locally.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import os
-import shutil
 import tempfile
 import logging
 from pathlib import Path
-from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
 from dotenv import load_dotenv
 # Configure source and destination usernames
@@ -18,10 +17,7 @@ ROOT_DIR = BACKEND_DIR.parent
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s'
-)
 logger = logging.getLogger(__name__)
 # List of dataset names to sync
@@ -38,12 +34,17 @@ DATASETS = [
     (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
     for name in DATASET_NAMES
 ] + [
-    (name, f"open-llm-leaderboard/official-providers", f"{DESTINATION_USERNAME}/official-providers")
-    ]
 # Initialize Hugging Face API
 api = HfApi()
 def ensure_repo_exists(repo_id, token):
     """Ensure the repository exists, create it if it doesn't"""
     try:
@@ -51,23 +52,19 @@ def ensure_repo_exists(repo_id, token):
         logger.info(f"✓ Repository {repo_id} already exists")
     except Exception:
         logger.info(f"Creating repository {repo_id}...")
-        create_repo(
-            repo_id=repo_id,
-            repo_type="dataset",
-            token=token,
-            private=True
-        )
         logger.info(f"✓ Repository {repo_id} created")
 def process_dataset(dataset_info, token):
     """Process a single dataset"""
     name, source_dataset, destination_dataset = dataset_info
     try:
         logger.info(f"\n📥 Processing dataset: {name}")
         # Ensure destination repository exists
         ensure_repo_exists(destination_dataset, token)
         # Create a temporary directory for this dataset
         with tempfile.TemporaryDirectory() as temp_dir:
             try:
@@ -75,28 +72,28 @@ def process_dataset(dataset_info, token):
                 logger.info(f"Listing files in {source_dataset}...")
                 files = api.list_repo_files(source_dataset, repo_type="dataset")
                 logger.info(f"Detected structure: {len(files)} files")
                 # Download dataset
                 logger.info(f"Downloading from {source_dataset}...")
                 local_dir = snapshot_download(
                     repo_id=source_dataset,
                     repo_type="dataset",
                     local_dir=temp_dir,
-                    token=token
                 )
-                logger.info(f"✓ Download complete")
                 # Upload to destination while preserving structure
                 logger.info(f"📤 Uploading to {destination_dataset}...")
                 api.upload_folder(
                     folder_path=local_dir,
                     repo_id=destination_dataset,
                     repo_type="dataset",
-                    token=token
                 )
                 logger.info(f"✅ {name} copied successfully!")
                 return True
             except Exception as e:
                 logger.error(f"❌ Error processing {name}: {str(e)}")
                 return False
@@ -105,6 +102,7 @@ def process_dataset(dataset_info, token):
         logger.error(f"❌ Error for {name}: {str(e)}")
         return False
 def copy_datasets():
     try:
         logger.info("🔑 Checking authentication...")
@@ -112,21 +110,22 @@ def copy_datasets():
         token = os.getenv("HF_TOKEN")
         if not token:
             raise ValueError("HF_TOKEN not found in .env file")
         # Process datasets sequentially
         results = []
         for dataset_info in DATASETS:
             success = process_dataset(dataset_info, token)
             results.append((dataset_info[0], success))
         # Print final summary
         logger.info("\n📊 Final summary:")
         for dataset, success in results:
             status = "✅ Success" if success else "❌ Failure"
             logger.info(f"{dataset}: {status}")
     except Exception as e:
         logger.error(f"❌ Global error: {str(e)}")
 if __name__ == "__main__":
-    copy_datasets()

 import os
 import tempfile
 import logging
 from pathlib import Path
+from huggingface_hub import HfApi, snapshot_download, create_repo
 from dotenv import load_dotenv
 # Configure source and destination usernames
 load_dotenv(ROOT_DIR / ".env")
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 # List of dataset names to sync
     (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
     for name in DATASET_NAMES
 ] + [
+    (
+        "official-providers",
+        "open-llm-leaderboard/official-providers",
+        f"{DESTINATION_USERNAME}/official-providers",
+    )
+]
 # Initialize Hugging Face API
 api = HfApi()
 def ensure_repo_exists(repo_id, token):
     """Ensure the repository exists, create it if it doesn't"""
     try:
         logger.info(f"✓ Repository {repo_id} already exists")
     except Exception:
         logger.info(f"Creating repository {repo_id}...")
+        create_repo(repo_id=repo_id, repo_type="dataset", token=token, private=True)
         logger.info(f"✓ Repository {repo_id} created")
 def process_dataset(dataset_info, token):
     """Process a single dataset"""
     name, source_dataset, destination_dataset = dataset_info
     try:
         logger.info(f"\n📥 Processing dataset: {name}")
         # Ensure destination repository exists
         ensure_repo_exists(destination_dataset, token)
         # Create a temporary directory for this dataset
         with tempfile.TemporaryDirectory() as temp_dir:
             try:
                 logger.info(f"Listing files in {source_dataset}...")
                 files = api.list_repo_files(source_dataset, repo_type="dataset")
                 logger.info(f"Detected structure: {len(files)} files")
                 # Download dataset
                 logger.info(f"Downloading from {source_dataset}...")
                 local_dir = snapshot_download(
                     repo_id=source_dataset,
                     repo_type="dataset",
                     local_dir=temp_dir,
+                    token=token,
                 )
+                logger.info("✓ Download complete")
                 # Upload to destination while preserving structure
                 logger.info(f"📤 Uploading to {destination_dataset}...")
                 api.upload_folder(
                     folder_path=local_dir,
                     repo_id=destination_dataset,
                     repo_type="dataset",
+                    token=token,
                 )
                 logger.info(f"✅ {name} copied successfully!")
                 return True
             except Exception as e:
                 logger.error(f"❌ Error processing {name}: {str(e)}")
                 return False
         logger.error(f"❌ Error for {name}: {str(e)}")
         return False
 def copy_datasets():
     try:
         logger.info("🔑 Checking authentication...")
         token = os.getenv("HF_TOKEN")
         if not token:
             raise ValueError("HF_TOKEN not found in .env file")
         # Process datasets sequentially
         results = []
         for dataset_info in DATASETS:
             success = process_dataset(dataset_info, token)
             results.append((dataset_info[0], success))
         # Print final summary
         logger.info("\n📊 Final summary:")
         for dataset, success in results:
             status = "✅ Success" if success else "❌ Failure"
             logger.info(f"{dataset}: {status}")
     except Exception as e:
         logger.error(f"❌ Global error: {str(e)}")
 if __name__ == "__main__":
+    copy_datasets()

docker-compose.yml CHANGED Viewed

@@ -30,4 +30,4 @@ services:
       - PORT=${FRONTEND_PORT:-7860}
     command: npm start
     stdin_open: true
-    tty: true

       - PORT=${FRONTEND_PORT:-7860}
     command: npm start
     stdin_open: true
+    tty: true

frontend/Dockerfile.dev CHANGED Viewed

@@ -12,4 +12,4 @@ COPY package*.json ./
 RUN npm install
 # Volume will be mounted here, no need for COPY
-CMD ["npm", "start"]

 RUN npm install
 # Volume will be mounted here, no need for COPY
+CMD ["npm", "start"]

frontend/src/components/Logo/HFLogo.js CHANGED Viewed

@@ -16,4 +16,4 @@ const HFLogo = () => (
 </svg>
 );
-export default HFLogo;

 </svg>
 );
+export default HFLogo;

frontend/src/components/shared/CodeBlock.js CHANGED Viewed

@@ -34,4 +34,4 @@ const CodeBlock = ({ code }) => (
   </Box>
 );
-export default CodeBlock;

   </Box>
 );
+export default CodeBlock;

frontend/src/config/auth.js CHANGED Viewed

@@ -4,4 +4,4 @@ export const HF_CONFIG = {
   SCOPE: "openid profile",
   PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
   DEV_URL: "http://localhost:7860"
-};

   SCOPE: "openid profile",
   PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
   DEV_URL: "http://localhost:7860"
+};

frontend/src/hooks/useThemeMode.js CHANGED Viewed

@@ -15,7 +15,7 @@ export const useThemeMode = () => {
     const handleChange = (e) => {
       setMode(e.matches ? 'dark' : 'light');
     };
     mediaQuery.addEventListener('change', handleChange);
     return () => mediaQuery.removeEventListener('change', handleChange);
   }, []);
@@ -25,4 +25,4 @@ export const useThemeMode = () => {
   };
   return { mode, toggleTheme };
-};

     const handleChange = (e) => {
       setMode(e.matches ? 'dark' : 'light');
     };
     mediaQuery.addEventListener('change', handleChange);
     return () => mediaQuery.removeEventListener('change', handleChange);
   }, []);
   };
   return { mode, toggleTheme };
+};

frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js CHANGED Viewed

@@ -48,7 +48,7 @@ export const MODEL_TYPES = {
 export const getModelTypeIcon = (type) => {
   const cleanType = type.toLowerCase().trim();
-  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].icon : '❓';
@@ -56,7 +56,7 @@ export const getModelTypeIcon = (type) => {
 export const getModelTypeLabel = (type) => {
   const cleanType = type.toLowerCase().trim();
-  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].label : type;
@@ -64,7 +64,7 @@ export const getModelTypeLabel = (type) => {
 export const getModelTypeDescription = (type) => {
   const cleanType = type.toLowerCase().trim();
-  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].description : 'Unknown model type';
@@ -72,8 +72,8 @@ export const getModelTypeDescription = (type) => {
 export const getModelTypeOrder = (type) => {
   const cleanType = type.toLowerCase().trim();
-  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].order : Infinity;
-};

 export const getModelTypeIcon = (type) => {
   const cleanType = type.toLowerCase().trim();
+  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].icon : '❓';
 export const getModelTypeLabel = (type) => {
   const cleanType = type.toLowerCase().trim();
+  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].label : type;
 export const getModelTypeDescription = (type) => {
   const cleanType = type.toLowerCase().trim();
+  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].description : 'Unknown model type';
 export const getModelTypeOrder = (type) => {
   const cleanType = type.toLowerCase().trim();
+  const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
     cleanType.includes(key)
   );
   return matchedType ? matchedType[1].order : Infinity;
+};

frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js CHANGED Viewed

@@ -48,4 +48,4 @@ export const QUICK_FILTER_PRESETS = [
       selectedBooleanFilters: ['is_official_provider']
     }
   }
-];

       selectedBooleanFilters: ['is_official_provider']
     }
   }
+];

frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js CHANGED Viewed

@@ -28,4 +28,4 @@ export const useBatchedState = (initialState, options = {}) => {
   }, [batchDelay, useTransitions]);
   return [state, setBatchedState, isPending];
-};

   }, [batchDelay, useTransitions]);
   return [state, setBatchedState, isPending];
+};