Daniel Kantor commited on
Commit
58582d3
·
1 Parent(s): 39651ed

set up ruff for formatting

Browse files
Files changed (37) hide show
  1. .gitignore +0 -1
  2. Dockerfile +1 -1
  3. backend/Dockerfile.dev +1 -1
  4. backend/app/api/dependencies.py +4 -2
  5. backend/app/api/endpoints/leaderboard.py +7 -2
  6. backend/app/api/endpoints/models.py +27 -22
  7. backend/app/api/endpoints/votes.py +34 -34
  8. backend/app/api/router.py +1 -1
  9. backend/app/asgi.py +20 -11
  10. backend/app/config/base.py +4 -2
  11. backend/app/config/hf_config.py +4 -4
  12. backend/app/config/logging_config.py +11 -7
  13. backend/app/core/cache.py +23 -21
  14. backend/app/core/fastapi_cache.py +15 -13
  15. backend/app/core/formatting.py +22 -21
  16. backend/app/main.py +4 -3
  17. backend/app/services/hf_service.py +10 -4
  18. backend/app/services/leaderboard.py +73 -50
  19. backend/app/services/models.py +242 -166
  20. backend/app/services/rate_limiter.py +0 -72
  21. backend/app/services/votes.py +161 -102
  22. backend/app/utils/logging.py +1 -1
  23. backend/app/utils/model_validation.py +118 -71
  24. backend/utils/analyze_prod_datasets.py +52 -59
  25. backend/utils/analyze_prod_models.py +40 -32
  26. backend/utils/fix_wrong_model_size.py +34 -30
  27. backend/utils/last_activity.py +50 -41
  28. backend/utils/sync_datasets_locally.py +25 -26
  29. docker-compose.yml +1 -1
  30. frontend/Dockerfile.dev +1 -1
  31. frontend/src/components/Logo/HFLogo.js +1 -1
  32. frontend/src/components/shared/CodeBlock.js +1 -1
  33. frontend/src/config/auth.js +1 -1
  34. frontend/src/hooks/useThemeMode.js +2 -2
  35. frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js +5 -5
  36. frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js +1 -1
  37. frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js +1 -1
.gitignore CHANGED
@@ -42,4 +42,3 @@ package-lock.json
42
  .env
43
  .env.*
44
  !.env.example
45
-
 
42
  .env
43
  .env.*
44
  !.env.example
 
Dockerfile CHANGED
@@ -59,4 +59,4 @@ USER user
59
  EXPOSE 7860
60
 
61
  # Start both servers with wait-for
62
- CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
 
59
  EXPOSE 7860
60
 
61
  # Start both servers with wait-for
62
+ CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
backend/Dockerfile.dev CHANGED
@@ -22,4 +22,4 @@ ENV PYTHONUNBUFFERED=1
22
  ENV LOG_LEVEL=INFO
23
 
24
  # In dev, mount volume directly
25
- CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
 
22
  ENV LOG_LEVEL=INFO
23
 
24
  # In dev, mount volume directly
25
+ CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
backend/app/api/dependencies.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import Depends, HTTPException
2
  import logging
3
  from app.services.models import ModelService
4
  from app.services.votes import VoteService
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
9
  model_service = ModelService()
10
  vote_service = VoteService()
11
 
 
12
  async def get_model_service() -> ModelService:
13
  """Dependency to get ModelService instance"""
14
  try:
@@ -21,6 +22,7 @@ async def get_model_service() -> ModelService:
21
  logger.error(LogFormatter.error(error_msg, e))
22
  raise HTTPException(status_code=500, detail=str(e))
23
 
 
24
  async def get_vote_service() -> VoteService:
25
  """Dependency to get VoteService instance"""
26
  try:
@@ -31,4 +33,4 @@ async def get_vote_service() -> VoteService:
31
  except Exception as e:
32
  error_msg = "Failed to initialize vote service"
33
  logger.error(LogFormatter.error(error_msg, e))
34
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import HTTPException
2
  import logging
3
  from app.services.models import ModelService
4
  from app.services.votes import VoteService
 
9
  model_service = ModelService()
10
  vote_service = VoteService()
11
 
12
+
13
  async def get_model_service() -> ModelService:
14
  """Dependency to get ModelService instance"""
15
  try:
 
22
  logger.error(LogFormatter.error(error_msg, e))
23
  raise HTTPException(status_code=500, detail=str(e))
24
 
25
+
26
  async def get_vote_service() -> VoteService:
27
  """Dependency to get VoteService instance"""
28
  try:
 
33
  except Exception as e:
34
  error_msg = "Failed to initialize vote service"
35
  logger.error(LogFormatter.error(error_msg, e))
36
+ raise HTTPException(status_code=500, detail=str(e))
backend/app/api/endpoints/leaderboard.py CHANGED
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
9
  router = APIRouter()
10
  leaderboard_service = LeaderboardService()
11
 
 
12
  def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
13
  """Build cache key for leaderboard data"""
14
  key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
@@ -16,6 +17,7 @@ def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
16
  logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
17
  return key
18
 
 
19
  @router.get("")
20
  @cached(expire=300, key_builder=leaderboard_key_builder)
21
  async def get_leaderboard() -> List[Dict[str, Any]]:
@@ -32,6 +34,7 @@ async def get_leaderboard() -> List[Dict[str, Any]]:
32
  logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
33
  raise
34
 
 
35
  @router.get("/formatted")
36
  @cached(expire=300, key_builder=leaderboard_key_builder)
37
  async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
@@ -45,5 +48,7 @@ async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
45
  logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
46
  return data
47
  except Exception as e:
48
- logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
49
- raise
 
 
 
9
  router = APIRouter()
10
  leaderboard_service = LeaderboardService()
11
 
12
+
13
  def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
14
  """Build cache key for leaderboard data"""
15
  key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
 
17
  logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
18
  return key
19
 
20
+
21
  @router.get("")
22
  @cached(expire=300, key_builder=leaderboard_key_builder)
23
  async def get_leaderboard() -> List[Dict[str, Any]]:
 
34
  logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
35
  raise
36
 
37
+
38
  @router.get("/formatted")
39
  @cached(expire=300, key_builder=leaderboard_key_builder)
40
  async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
 
48
  logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
49
  return data
50
  except Exception as e:
51
+ logger.error(
52
+ LogFormatter.error("Failed to fetch formatted leaderboard data", e)
53
+ )
54
+ raise
backend/app/api/endpoints/models.py CHANGED
@@ -9,18 +9,17 @@ from app.core.formatting import LogFormatter
9
  logger = logging.getLogger(__name__)
10
  router = APIRouter(tags=["models"])
11
 
 
12
  @router.get("/status")
13
  @cached(expire=300)
14
  async def get_models_status(
15
- model_service: ModelService = Depends(get_model_service)
16
  ) -> Dict[str, List[Dict[str, Any]]]:
17
  """Get all models grouped by status"""
18
  try:
19
  logger.info(LogFormatter.info("Fetching status for all models"))
20
  result = await model_service.get_models()
21
- stats = {
22
- status: len(models) for status, models in result.items()
23
- }
24
  for line in LogFormatter.stats(stats, "Models by Status"):
25
  logger.info(line)
26
  return result
@@ -28,10 +27,11 @@ async def get_models_status(
28
  logger.error(LogFormatter.error("Failed to get models status", e))
29
  raise HTTPException(status_code=500, detail=str(e))
30
 
 
31
  @router.get("/pending")
32
  @cached(expire=60)
33
  async def get_pending_models(
34
- model_service: ModelService = Depends(get_model_service)
35
  ) -> List[Dict[str, Any]]:
36
  """Get all models waiting for evaluation"""
37
  try:
@@ -44,35 +44,35 @@ async def get_pending_models(
44
  logger.error(LogFormatter.error("Failed to get pending models", e))
45
  raise HTTPException(status_code=500, detail=str(e))
46
 
 
47
  @router.post("/submit")
48
  async def submit_model(
49
- model_data: Dict[str, Any],
50
- model_service: ModelService = Depends(get_model_service)
51
  ) -> Dict[str, Any]:
52
  try:
53
  logger.info(LogFormatter.section("MODEL SUBMISSION"))
54
-
55
- user_id = model_data.pop('user_id', None)
56
  if not user_id:
57
  error_msg = "user_id is required"
58
  logger.error(LogFormatter.error("Validation failed", error_msg))
59
  raise ValueError(error_msg)
60
-
61
  # Log submission details
62
  submission_info = {
63
  "Model_ID": model_data.get("model_id"),
64
  "User": user_id,
65
  "Base_Model": model_data.get("base_model"),
66
  "Precision": model_data.get("precision"),
67
- "Model_Type": model_data.get("model_type")
68
  }
69
  for line in LogFormatter.tree(submission_info, "Submission Details"):
70
  logger.info(line)
71
-
72
  result = await model_service.submit_model(model_data, user_id)
73
  logger.info(LogFormatter.success("Model submitted successfully"))
74
  return result
75
-
76
  except ValueError as e:
77
  logger.error(LogFormatter.error("Invalid submission data", e))
78
  raise HTTPException(status_code=400, detail=str(e))
@@ -80,37 +80,42 @@ async def submit_model(
80
  logger.error(LogFormatter.error("Submission failed", e))
81
  raise HTTPException(status_code=500, detail=str(e))
82
 
 
83
  @router.get("/organization/{organization}/submissions")
84
  async def get_organization_submissions(
85
  organization: str,
86
  days: int = Query(default=7, ge=1, le=30),
87
- model_service: ModelService = Depends(get_model_service)
88
  ) -> List[Dict[str, Any]]:
89
  """Get all submissions from an organization in the last n days"""
90
  try:
91
- submissions = await model_service.get_organization_submissions(organization, days)
 
 
92
  return submissions
93
  except Exception as e:
94
  raise HTTPException(status_code=500, detail=str(e))
95
 
 
96
  @router.get("/{model_id}/status")
97
  async def get_model_status(
98
- model_id: str,
99
- model_service: ModelService = Depends(get_model_service)
100
  ) -> Dict[str, Any]:
101
  try:
102
  logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
103
  status = await model_service.get_model_status(model_id)
104
-
105
  if status["status"] != "not_found":
106
  logger.info(LogFormatter.success("Status found"))
107
  for line in LogFormatter.tree(status, "Model Status"):
108
  logger.info(line)
109
  else:
110
- logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
111
-
 
 
112
  return status
113
-
114
  except Exception as e:
115
  logger.error(LogFormatter.error("Failed to get model status", e))
116
- raise HTTPException(status_code=500, detail=str(e))
 
9
  logger = logging.getLogger(__name__)
10
  router = APIRouter(tags=["models"])
11
 
12
+
13
  @router.get("/status")
14
  @cached(expire=300)
15
  async def get_models_status(
16
+ model_service: ModelService = Depends(get_model_service),
17
  ) -> Dict[str, List[Dict[str, Any]]]:
18
  """Get all models grouped by status"""
19
  try:
20
  logger.info(LogFormatter.info("Fetching status for all models"))
21
  result = await model_service.get_models()
22
+ stats = {status: len(models) for status, models in result.items()}
 
 
23
  for line in LogFormatter.stats(stats, "Models by Status"):
24
  logger.info(line)
25
  return result
 
27
  logger.error(LogFormatter.error("Failed to get models status", e))
28
  raise HTTPException(status_code=500, detail=str(e))
29
 
30
+
31
  @router.get("/pending")
32
  @cached(expire=60)
33
  async def get_pending_models(
34
+ model_service: ModelService = Depends(get_model_service),
35
  ) -> List[Dict[str, Any]]:
36
  """Get all models waiting for evaluation"""
37
  try:
 
44
  logger.error(LogFormatter.error("Failed to get pending models", e))
45
  raise HTTPException(status_code=500, detail=str(e))
46
 
47
+
48
  @router.post("/submit")
49
  async def submit_model(
50
+ model_data: Dict[str, Any], model_service: ModelService = Depends(get_model_service)
 
51
  ) -> Dict[str, Any]:
52
  try:
53
  logger.info(LogFormatter.section("MODEL SUBMISSION"))
54
+
55
+ user_id = model_data.pop("user_id", None)
56
  if not user_id:
57
  error_msg = "user_id is required"
58
  logger.error(LogFormatter.error("Validation failed", error_msg))
59
  raise ValueError(error_msg)
60
+
61
  # Log submission details
62
  submission_info = {
63
  "Model_ID": model_data.get("model_id"),
64
  "User": user_id,
65
  "Base_Model": model_data.get("base_model"),
66
  "Precision": model_data.get("precision"),
67
+ "Model_Type": model_data.get("model_type"),
68
  }
69
  for line in LogFormatter.tree(submission_info, "Submission Details"):
70
  logger.info(line)
71
+
72
  result = await model_service.submit_model(model_data, user_id)
73
  logger.info(LogFormatter.success("Model submitted successfully"))
74
  return result
75
+
76
  except ValueError as e:
77
  logger.error(LogFormatter.error("Invalid submission data", e))
78
  raise HTTPException(status_code=400, detail=str(e))
 
80
  logger.error(LogFormatter.error("Submission failed", e))
81
  raise HTTPException(status_code=500, detail=str(e))
82
 
83
+
84
  @router.get("/organization/{organization}/submissions")
85
  async def get_organization_submissions(
86
  organization: str,
87
  days: int = Query(default=7, ge=1, le=30),
88
+ model_service: ModelService = Depends(get_model_service),
89
  ) -> List[Dict[str, Any]]:
90
  """Get all submissions from an organization in the last n days"""
91
  try:
92
+ submissions = await model_service.get_organization_submissions(
93
+ organization, days
94
+ )
95
  return submissions
96
  except Exception as e:
97
  raise HTTPException(status_code=500, detail=str(e))
98
 
99
+
100
  @router.get("/{model_id}/status")
101
  async def get_model_status(
102
+ model_id: str, model_service: ModelService = Depends(get_model_service)
 
103
  ) -> Dict[str, Any]:
104
  try:
105
  logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
106
  status = await model_service.get_model_status(model_id)
107
+
108
  if status["status"] != "not_found":
109
  logger.info(LogFormatter.success("Status found"))
110
  for line in LogFormatter.tree(status, "Model Status"):
111
  logger.info(line)
112
  else:
113
+ logger.warning(
114
+ LogFormatter.warning(f"No status found for model: {model_id}")
115
+ )
116
+
117
  return status
118
+
119
  except Exception as e:
120
  logger.error(LogFormatter.error("Failed to get model status", e))
121
+ raise HTTPException(status_code=500, detail=str(e))
backend/app/api/endpoints/votes.py CHANGED
@@ -1,10 +1,9 @@
1
- from fastapi import APIRouter, HTTPException, Query, Depends, Response
2
  from typing import Dict, Any, List
3
  from app.services.votes import VoteService
4
  from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
5
  import logging
6
  from app.core.formatting import LogFormatter
7
- from datetime import datetime, timezone
8
 
9
  logger = logging.getLogger(__name__)
10
  router = APIRouter()
@@ -12,28 +11,31 @@ vote_service = VoteService()
12
 
13
  CACHE_TTL = 30 # 30 seconds cache
14
 
 
15
  def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
16
  """Build cache key for model votes"""
17
- provider = kwargs.get('provider')
18
- model = kwargs.get('model')
19
  key = build_cache_key(namespace, provider, model)
20
  logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
21
  return key
22
 
 
23
  def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
24
  """Build cache key for user votes"""
25
- user_id = kwargs.get('user_id')
26
  key = build_cache_key(namespace, user_id)
27
  logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
28
  return key
29
 
 
30
  @router.post("/{model_id:path}")
31
  async def add_vote(
32
  response: Response,
33
  model_id: str,
34
  vote_type: str = Query(..., description="Type of vote (up/down)"),
35
  user_id: str = Query(..., description="HuggingFace username"),
36
- vote_data: Dict[str, Any] = None
37
  ) -> Dict[str, Any]:
38
  try:
39
  logger.info(LogFormatter.section("ADDING VOTE"))
@@ -41,50 +43,46 @@ async def add_vote(
41
  "Model": model_id,
42
  "User": user_id,
43
  "Type": vote_type,
44
- "Config": vote_data or {}
45
  }
46
  for line in LogFormatter.tree(stats, "Vote Details"):
47
  logger.info(line)
48
-
49
  await vote_service.initialize()
50
  result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
51
-
52
  # Invalidate affected caches
53
  try:
54
  logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
55
- provider, model = model_id.split('/', 1)
56
-
57
  # Build and invalidate cache keys
58
  model_cache_key = build_cache_key("model_votes", provider, model)
59
  user_cache_key = build_cache_key("user_votes", user_id)
60
-
61
  await invalidate_cache_key(model_cache_key)
62
  await invalidate_cache_key(user_cache_key)
63
-
64
- cache_stats = {
65
- "Model_Cache": model_cache_key,
66
- "User_Cache": user_cache_key
67
- }
68
  for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
69
  logger.info(line)
70
-
71
  except Exception as e:
72
  logger.error(LogFormatter.error("Failed to invalidate cache", e))
73
-
74
  # Add cache control headers
75
  response.headers["Cache-Control"] = "no-cache"
76
-
77
  return result
78
  except Exception as e:
79
  logger.error(LogFormatter.error("Failed to add vote", e))
80
  raise HTTPException(status_code=400, detail=str(e))
81
 
 
82
  @router.get("/model/{provider}/{model}")
83
  @cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
84
  async def get_model_votes(
85
- response: Response,
86
- provider: str,
87
- model: str
88
  ) -> Dict[str, Any]:
89
  """Get all votes for a specific model"""
90
  try:
@@ -92,35 +90,37 @@ async def get_model_votes(
92
  await vote_service.initialize()
93
  model_id = f"{provider}/{model}"
94
  result = await vote_service.get_model_votes(model_id)
95
-
96
  # Add cache control headers
97
  response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
98
- response.headers["Last-Modified"] = vote_service._last_sync.strftime("%a, %d %b %Y %H:%M:%S GMT")
99
-
 
 
100
  logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
101
  return result
102
  except Exception as e:
103
  logger.error(LogFormatter.error("Failed to get model votes", e))
104
  raise HTTPException(status_code=400, detail=str(e))
105
 
 
106
  @router.get("/user/{user_id}")
107
  @cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
108
- async def get_user_votes(
109
- response: Response,
110
- user_id: str
111
- ) -> List[Dict[str, Any]]:
112
  """Get all votes from a specific user"""
113
  try:
114
  logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
115
  await vote_service.initialize()
116
  votes = await vote_service.get_user_votes(user_id)
117
-
118
  # Add cache control headers
119
  response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
120
- response.headers["Last-Modified"] = vote_service._last_sync.strftime("%a, %d %b %Y %H:%M:%S GMT")
121
-
 
 
122
  logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
123
  return votes
124
  except Exception as e:
125
  logger.error(LogFormatter.error("Failed to get user votes", e))
126
- raise HTTPException(status_code=400, detail=str(e))
 
1
+ from fastapi import APIRouter, HTTPException, Query, Response
2
  from typing import Dict, Any, List
3
  from app.services.votes import VoteService
4
  from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
5
  import logging
6
  from app.core.formatting import LogFormatter
 
7
 
8
  logger = logging.getLogger(__name__)
9
  router = APIRouter()
 
11
 
12
  CACHE_TTL = 30 # 30 seconds cache
13
 
14
+
15
  def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
16
  """Build cache key for model votes"""
17
+ provider = kwargs.get("provider")
18
+ model = kwargs.get("model")
19
  key = build_cache_key(namespace, provider, model)
20
  logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
21
  return key
22
 
23
+
24
  def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
25
  """Build cache key for user votes"""
26
+ user_id = kwargs.get("user_id")
27
  key = build_cache_key(namespace, user_id)
28
  logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
29
  return key
30
 
31
+
32
  @router.post("/{model_id:path}")
33
  async def add_vote(
34
  response: Response,
35
  model_id: str,
36
  vote_type: str = Query(..., description="Type of vote (up/down)"),
37
  user_id: str = Query(..., description="HuggingFace username"),
38
+ vote_data: Dict[str, Any] = None,
39
  ) -> Dict[str, Any]:
40
  try:
41
  logger.info(LogFormatter.section("ADDING VOTE"))
 
43
  "Model": model_id,
44
  "User": user_id,
45
  "Type": vote_type,
46
+ "Config": vote_data or {},
47
  }
48
  for line in LogFormatter.tree(stats, "Vote Details"):
49
  logger.info(line)
50
+
51
  await vote_service.initialize()
52
  result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
53
+
54
  # Invalidate affected caches
55
  try:
56
  logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
57
+ provider, model = model_id.split("/", 1)
58
+
59
  # Build and invalidate cache keys
60
  model_cache_key = build_cache_key("model_votes", provider, model)
61
  user_cache_key = build_cache_key("user_votes", user_id)
62
+
63
  await invalidate_cache_key(model_cache_key)
64
  await invalidate_cache_key(user_cache_key)
65
+
66
+ cache_stats = {"Model_Cache": model_cache_key, "User_Cache": user_cache_key}
 
 
 
67
  for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
68
  logger.info(line)
69
+
70
  except Exception as e:
71
  logger.error(LogFormatter.error("Failed to invalidate cache", e))
72
+
73
  # Add cache control headers
74
  response.headers["Cache-Control"] = "no-cache"
75
+
76
  return result
77
  except Exception as e:
78
  logger.error(LogFormatter.error("Failed to add vote", e))
79
  raise HTTPException(status_code=400, detail=str(e))
80
 
81
+
82
  @router.get("/model/{provider}/{model}")
83
  @cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
84
  async def get_model_votes(
85
+ response: Response, provider: str, model: str
 
 
86
  ) -> Dict[str, Any]:
87
  """Get all votes for a specific model"""
88
  try:
 
90
  await vote_service.initialize()
91
  model_id = f"{provider}/{model}"
92
  result = await vote_service.get_model_votes(model_id)
93
+
94
  # Add cache control headers
95
  response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
96
+ response.headers["Last-Modified"] = vote_service._last_sync.strftime(
97
+ "%a, %d %b %Y %H:%M:%S GMT"
98
+ )
99
+
100
  logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
101
  return result
102
  except Exception as e:
103
  logger.error(LogFormatter.error("Failed to get model votes", e))
104
  raise HTTPException(status_code=400, detail=str(e))
105
 
106
+
107
  @router.get("/user/{user_id}")
108
  @cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
109
+ async def get_user_votes(response: Response, user_id: str) -> List[Dict[str, Any]]:
 
 
 
110
  """Get all votes from a specific user"""
111
  try:
112
  logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
113
  await vote_service.initialize()
114
  votes = await vote_service.get_user_votes(user_id)
115
+
116
  # Add cache control headers
117
  response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
118
+ response.headers["Last-Modified"] = vote_service._last_sync.strftime(
119
+ "%a, %d %b %Y %H:%M:%S GMT"
120
+ )
121
+
122
  logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
123
  return votes
124
  except Exception as e:
125
  logger.error(LogFormatter.error("Failed to get user votes", e))
126
+ raise HTTPException(status_code=400, detail=str(e))
backend/app/api/router.py CHANGED
@@ -6,4 +6,4 @@ router = APIRouter()
6
 
7
  router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
8
  router.include_router(votes.router, prefix="/votes", tags=["votes"])
9
- router.include_router(models.router, prefix="/models", tags=["models"])
 
6
 
7
  router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
8
  router.include_router(votes.router, prefix="/votes", tags=["votes"])
9
+ router.include_router(models.router, prefix="/models", tags=["models"])
backend/app/asgi.py CHANGED
@@ -1,14 +1,12 @@
1
  """
2
  ASGI entry point for the Open LLM Leaderboard API.
3
  """
4
- import os
5
- import uvicorn
6
  import logging
7
  import logging.config
8
  from fastapi import FastAPI
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.middleware.gzip import GZipMiddleware
11
- import sys
12
 
13
  from app.api.router import router
14
  from app.core.fastapi_cache import setup_cache
@@ -51,12 +49,12 @@ LOGGING_CONFIG = {
51
  "handlers": ["default"],
52
  "level": "WARNING",
53
  "propagate": False,
54
- }
55
  },
56
  "root": {
57
  "handlers": ["default"],
58
  "level": "WARNING",
59
- }
60
  }
61
 
62
  # Apply logging configuration
@@ -85,22 +83,33 @@ app.add_middleware(GZipMiddleware, minimum_size=500)
85
  # Include API router
86
  app.include_router(router, prefix="/api")
87
 
 
88
  @app.on_event("startup")
89
  async def startup_event():
90
  """Initialize services on startup"""
91
  logger.info("\n")
92
  logger.info(LogFormatter.section("APPLICATION STARTUP"))
93
-
94
  # Log HF configuration
95
  logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
96
  logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
97
- logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
98
- logger.info(LogFormatter.info(f"Using repositories:"))
 
 
 
 
99
  logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
100
  logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
101
  logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
102
- logger.info(LogFormatter.info(f" - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"))
103
-
 
 
 
 
104
  # Setup cache
105
  setup_cache()
106
- logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
 
 
 
1
  """
2
  ASGI entry point for the Open LLM Leaderboard API.
3
  """
4
+
 
5
  import logging
6
  import logging.config
7
  from fastapi import FastAPI
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.middleware.gzip import GZipMiddleware
 
10
 
11
  from app.api.router import router
12
  from app.core.fastapi_cache import setup_cache
 
49
  "handlers": ["default"],
50
  "level": "WARNING",
51
  "propagate": False,
52
+ },
53
  },
54
  "root": {
55
  "handlers": ["default"],
56
  "level": "WARNING",
57
+ },
58
  }
59
 
60
  # Apply logging configuration
 
83
  # Include API router
84
  app.include_router(router, prefix="/api")
85
 
86
+
87
  @app.on_event("startup")
88
  async def startup_event():
89
  """Initialize services on startup"""
90
  logger.info("\n")
91
  logger.info(LogFormatter.section("APPLICATION STARTUP"))
92
+
93
  # Log HF configuration
94
  logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
95
  logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
96
+ logger.info(
97
+ LogFormatter.info(
98
+ f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"
99
+ )
100
+ )
101
+ logger.info(LogFormatter.info("Using repositories:"))
102
  logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
103
  logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
104
  logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
105
+ logger.info(
106
+ LogFormatter.info(
107
+ f" - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"
108
+ )
109
+ )
110
+
111
  # Setup cache
112
  setup_cache()
113
+ logger.info(
114
+ LogFormatter.success("FastAPI Cache initialized with in-memory backend")
115
+ )
backend/app/config/base.py CHANGED
@@ -8,7 +8,9 @@ WORKERS = 4
8
  RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
9
 
10
  # CORS configuration
11
- ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
 
 
12
 
13
  # Cache configuration
14
  CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
@@ -23,7 +25,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
23
  HF_ORGANIZATION = "stacklok"
24
  API = {
25
  "INFERENCE": "https://api-inference.huggingface.co/models",
26
- "HUB": "https://huggingface.co"
27
  }
28
 
29
  # Cache paths
 
8
  RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
9
 
10
  # CORS configuration
11
+ ORIGINS = (
12
+ ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
13
+ )
14
 
15
  # Cache configuration
16
  CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
 
25
  HF_ORGANIZATION = "stacklok"
26
  API = {
27
  "INFERENCE": "https://api-inference.huggingface.co/models",
28
+ "HUB": "https://huggingface.co",
29
  }
30
 
31
  # Cache paths
backend/app/config/hf_config.py CHANGED
@@ -1,8 +1,6 @@
1
  import os
2
  import logging
3
- from typing import Optional
4
  from huggingface_hub import HfApi
5
- from pathlib import Path
6
  from app.core.cache import cache_config
7
 
8
  logger = logging.getLogger(__name__)
@@ -13,7 +11,9 @@ HF_ORGANIZATION = "stacklok"
13
  # Get HF token directly from environment
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  if not HF_TOKEN:
16
- logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
 
 
17
 
18
  # Initialize HF API
19
  API = HfApi(token=HF_TOKEN)
@@ -22,7 +22,7 @@ API = HfApi(token=HF_TOKEN)
22
  QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
23
  AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
25
- OFFICIAL_PROVIDERS_REPO = f"open-llm-leaderboard/official-providers"
26
 
27
  logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
28
  logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")
 
1
  import os
2
  import logging
 
3
  from huggingface_hub import HfApi
 
4
  from app.core.cache import cache_config
5
 
6
  logger = logging.getLogger(__name__)
 
11
  # Get HF token directly from environment
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
  if not HF_TOKEN:
14
+ logger.warning(
15
+ "HF_TOKEN not found in environment variables. Some features may be limited."
16
+ )
17
 
18
  # Initialize HF API
19
  API = HfApi(token=HF_TOKEN)
 
22
  QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
23
  AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
25
+ OFFICIAL_PROVIDERS_REPO = "open-llm-leaderboard/official-providers"
26
 
27
  logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
28
  logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")
backend/app/config/logging_config.py CHANGED
@@ -1,11 +1,12 @@
1
  import logging
2
- import sys
3
  from tqdm import tqdm
4
 
 
5
  def get_tqdm_handler():
6
  """
7
  Creates a special handler for tqdm that doesn't interfere with other logs.
8
  """
 
9
  class TqdmLoggingHandler(logging.Handler):
10
  def emit(self, record):
11
  try:
@@ -17,22 +18,25 @@ def get_tqdm_handler():
17
 
18
  return TqdmLoggingHandler()
19
 
 
20
  def setup_service_logger(service_name: str) -> logging.Logger:
21
  """
22
  Configure a specific logger for a given service.
23
  """
24
  logger = logging.getLogger(f"app.services.{service_name}")
25
-
26
  # If the logger already has handlers, don't reconfigure it
27
  if logger.handlers:
28
  return logger
29
-
30
  # Add tqdm handler for this service
31
  tqdm_handler = get_tqdm_handler()
32
- tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
 
 
33
  logger.addHandler(tqdm_handler)
34
-
35
  # Don't propagate logs to parent loggers
36
  logger.propagate = False
37
-
38
- return logger
 
1
  import logging
 
2
  from tqdm import tqdm
3
 
4
+
5
  def get_tqdm_handler():
6
  """
7
  Creates a special handler for tqdm that doesn't interfere with other logs.
8
  """
9
+
10
  class TqdmLoggingHandler(logging.Handler):
11
  def emit(self, record):
12
  try:
 
18
 
19
  return TqdmLoggingHandler()
20
 
21
+
22
  def setup_service_logger(service_name: str) -> logging.Logger:
23
  """
24
  Configure a specific logger for a given service.
25
  """
26
  logger = logging.getLogger(f"app.services.{service_name}")
27
+
28
  # If the logger already has handlers, don't reconfigure it
29
  if logger.handlers:
30
  return logger
31
+
32
  # Add tqdm handler for this service
33
  tqdm_handler = get_tqdm_handler()
34
+ tqdm_handler.setFormatter(
35
+ logging.Formatter("%(name)s - %(levelname)s - %(message)s")
36
+ )
37
  logger.addHandler(tqdm_handler)
38
+
39
  # Don't propagate logs to parent loggers
40
  logger.propagate = False
41
+
42
+ return logger
backend/app/core/cache.py CHANGED
@@ -10,11 +10,12 @@ from app.config.base import (
10
  MODELS_CACHE,
11
  VOTES_CACHE,
12
  EVAL_CACHE,
13
- CACHE_TTL
14
  )
15
 
16
  logger = logging.getLogger(__name__)
17
 
 
18
  class CacheConfig:
19
  def __init__(self):
20
  # Get cache paths from config
@@ -23,59 +24,60 @@ class CacheConfig:
23
  self.models_cache = MODELS_CACHE
24
  self.votes_cache = VOTES_CACHE
25
  self.eval_cache = EVAL_CACHE
26
-
27
  # Specific files
28
  self.votes_file = self.votes_cache / "votes_data.jsonl"
29
  self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
30
-
31
  # Cache TTL
32
  self.cache_ttl = timedelta(seconds=CACHE_TTL)
33
-
34
  self._initialize_cache_dirs()
35
  self._setup_environment()
36
-
37
  def _initialize_cache_dirs(self):
38
  """Initialize all necessary cache directories"""
39
  try:
40
  logger.info(LogFormatter.section("CACHE INITIALIZATION"))
41
-
42
  cache_dirs = {
43
  "Root": self.cache_root,
44
  "Datasets": self.datasets_cache,
45
  "Models": self.models_cache,
46
  "Votes": self.votes_cache,
47
- "Eval": self.eval_cache
48
  }
49
-
50
  for name, cache_dir in cache_dirs.items():
51
  cache_dir.mkdir(parents=True, exist_ok=True)
52
- logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
53
-
 
 
54
  except Exception as e:
55
  logger.error(LogFormatter.error("Failed to create cache directories", e))
56
  raise
57
-
58
  def _setup_environment(self):
59
  """Configure HuggingFace environment variables"""
60
  logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
61
 
62
  env_vars = {
63
  "HF_HOME": str(self.cache_root),
64
- "HF_DATASETS_CACHE": str(self.datasets_cache)
65
  }
66
 
67
  for var, value in env_vars.items():
68
  os.environ[var] = value
69
  logger.info(LogFormatter.info(f"Set {var}={value}"))
70
 
71
-
72
  def get_cache_path(self, cache_type: str) -> Path:
73
  """Returns the path for a specific cache type"""
74
  cache_paths = {
75
  "datasets": self.datasets_cache,
76
  "models": self.models_cache,
77
  "votes": self.votes_cache,
78
- "eval": self.eval_cache
79
  }
80
  return cache_paths.get(cache_type, self.cache_root)
81
 
@@ -83,13 +85,12 @@ class CacheConfig:
83
  """Flush specified cache or all caches if no type is specified"""
84
  try:
85
  if cache_type:
86
- logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
 
 
87
  cache_dir = self.get_cache_path(cache_type)
88
  if cache_dir.exists():
89
- stats = {
90
- "Cache_Type": cache_type,
91
- "Directory": str(cache_dir)
92
- }
93
  for line in LogFormatter.tree(stats, "Cache Details"):
94
  logger.info(line)
95
  shutil.rmtree(cache_dir)
@@ -100,10 +101,11 @@ class CacheConfig:
100
  for cache_type in ["datasets", "models", "votes", "eval"]:
101
  self.flush_cache(cache_type)
102
  logger.info(LogFormatter.success("All caches cleared successfully"))
103
-
104
  except Exception as e:
105
  logger.error(LogFormatter.error("Failed to flush cache", e))
106
  raise
107
 
 
108
  # Singleton instance of cache configuration
109
- cache_config = CacheConfig()
 
10
  MODELS_CACHE,
11
  VOTES_CACHE,
12
  EVAL_CACHE,
13
+ CACHE_TTL,
14
  )
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
+
19
  class CacheConfig:
20
  def __init__(self):
21
  # Get cache paths from config
 
24
  self.models_cache = MODELS_CACHE
25
  self.votes_cache = VOTES_CACHE
26
  self.eval_cache = EVAL_CACHE
27
+
28
  # Specific files
29
  self.votes_file = self.votes_cache / "votes_data.jsonl"
30
  self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
31
+
32
  # Cache TTL
33
  self.cache_ttl = timedelta(seconds=CACHE_TTL)
34
+
35
  self._initialize_cache_dirs()
36
  self._setup_environment()
37
+
38
  def _initialize_cache_dirs(self):
39
  """Initialize all necessary cache directories"""
40
  try:
41
  logger.info(LogFormatter.section("CACHE INITIALIZATION"))
42
+
43
  cache_dirs = {
44
  "Root": self.cache_root,
45
  "Datasets": self.datasets_cache,
46
  "Models": self.models_cache,
47
  "Votes": self.votes_cache,
48
+ "Eval": self.eval_cache,
49
  }
50
+
51
  for name, cache_dir in cache_dirs.items():
52
  cache_dir.mkdir(parents=True, exist_ok=True)
53
+ logger.info(
54
+ LogFormatter.success(f"{name} cache directory: {cache_dir}")
55
+ )
56
+
57
  except Exception as e:
58
  logger.error(LogFormatter.error("Failed to create cache directories", e))
59
  raise
60
+
61
  def _setup_environment(self):
62
  """Configure HuggingFace environment variables"""
63
  logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
64
 
65
  env_vars = {
66
  "HF_HOME": str(self.cache_root),
67
+ "HF_DATASETS_CACHE": str(self.datasets_cache),
68
  }
69
 
70
  for var, value in env_vars.items():
71
  os.environ[var] = value
72
  logger.info(LogFormatter.info(f"Set {var}={value}"))
73
 
 
74
  def get_cache_path(self, cache_type: str) -> Path:
75
  """Returns the path for a specific cache type"""
76
  cache_paths = {
77
  "datasets": self.datasets_cache,
78
  "models": self.models_cache,
79
  "votes": self.votes_cache,
80
+ "eval": self.eval_cache,
81
  }
82
  return cache_paths.get(cache_type, self.cache_root)
83
 
 
85
  """Flush specified cache or all caches if no type is specified"""
86
  try:
87
  if cache_type:
88
+ logger.info(
89
+ LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE")
90
+ )
91
  cache_dir = self.get_cache_path(cache_type)
92
  if cache_dir.exists():
93
+ stats = {"Cache_Type": cache_type, "Directory": str(cache_dir)}
 
 
 
94
  for line in LogFormatter.tree(stats, "Cache Details"):
95
  logger.info(line)
96
  shutil.rmtree(cache_dir)
 
101
  for cache_type in ["datasets", "models", "votes", "eval"]:
102
  self.flush_cache(cache_type)
103
  logger.info(LogFormatter.success("All caches cleared successfully"))
104
+
105
  except Exception as e:
106
  logger.error(LogFormatter.error("Failed to flush cache", e))
107
  raise
108
 
109
+
110
  # Singleton instance of cache configuration
111
+ cache_config = CacheConfig()
backend/app/core/fastapi_cache.py CHANGED
@@ -1,7 +1,6 @@
1
  from fastapi_cache import FastAPICache
2
  from fastapi_cache.backends.inmemory import InMemoryBackend
3
  from fastapi_cache.decorator import cache
4
- from datetime import timedelta
5
  from app.config import CACHE_TTL
6
  import logging
7
  from app.core.formatting import LogFormatter
@@ -9,6 +8,7 @@ from typing import Optional, Any
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
12
  class CustomInMemoryBackend(InMemoryBackend):
13
  def __init__(self):
14
  """Initialize the cache backend"""
@@ -23,7 +23,9 @@ class CustomInMemoryBackend(InMemoryBackend):
23
  return True
24
  return False
25
  except Exception as e:
26
- logger.error(LogFormatter.error(f"Failed to delete key {key} from cache", e))
 
 
27
  return False
28
 
29
  async def get(self, key: str) -> Any:
@@ -34,43 +36,43 @@ class CustomInMemoryBackend(InMemoryBackend):
34
  """Set a value in the cache"""
35
  self.cache[key] = value
36
 
 
37
  def setup_cache():
38
  """Initialize FastAPI Cache with in-memory backend"""
39
  try:
40
  logger.info(LogFormatter.section("CACHE INITIALIZATION"))
41
- FastAPICache.init(
42
- backend=CustomInMemoryBackend(),
43
- prefix="fastapi-cache"
44
- )
45
  logger.info(LogFormatter.success("Cache initialized successfully"))
46
  except Exception as e:
47
  logger.error(LogFormatter.error("Failed to initialize cache", e))
48
  raise
49
 
 
50
  async def invalidate_cache_key(key: str):
51
  """Invalidate a specific cache key"""
52
  try:
53
  backend = FastAPICache.get_backend()
54
- if hasattr(backend, 'delete'):
55
  await backend.delete(key)
56
  logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
57
  else:
58
- logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
 
 
59
  except Exception as e:
60
  logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
61
 
 
62
  def build_cache_key(*args) -> str:
63
  """Build a cache key from multiple arguments"""
64
  return ":".join(str(arg) for arg in args if arg is not None)
65
 
 
66
  def cached(expire: int = CACHE_TTL, key_builder=None):
67
  """Decorator for caching endpoint responses
68
-
69
  Args:
70
  expire (int): Cache TTL in seconds
71
  key_builder (callable, optional): Custom key builder function
72
  """
73
- return cache(
74
- expire=expire,
75
- key_builder=key_builder
76
- )
 
1
  from fastapi_cache import FastAPICache
2
  from fastapi_cache.backends.inmemory import InMemoryBackend
3
  from fastapi_cache.decorator import cache
 
4
  from app.config import CACHE_TTL
5
  import logging
6
  from app.core.formatting import LogFormatter
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
11
+
12
  class CustomInMemoryBackend(InMemoryBackend):
13
  def __init__(self):
14
  """Initialize the cache backend"""
 
23
  return True
24
  return False
25
  except Exception as e:
26
+ logger.error(
27
+ LogFormatter.error(f"Failed to delete key {key} from cache", e)
28
+ )
29
  return False
30
 
31
  async def get(self, key: str) -> Any:
 
36
  """Set a value in the cache"""
37
  self.cache[key] = value
38
 
39
+
40
  def setup_cache():
41
  """Initialize FastAPI Cache with in-memory backend"""
42
  try:
43
  logger.info(LogFormatter.section("CACHE INITIALIZATION"))
44
+ FastAPICache.init(backend=CustomInMemoryBackend(), prefix="fastapi-cache")
 
 
 
45
  logger.info(LogFormatter.success("Cache initialized successfully"))
46
  except Exception as e:
47
  logger.error(LogFormatter.error("Failed to initialize cache", e))
48
  raise
49
 
50
+
51
  async def invalidate_cache_key(key: str):
52
  """Invalidate a specific cache key"""
53
  try:
54
  backend = FastAPICache.get_backend()
55
+ if hasattr(backend, "delete"):
56
  await backend.delete(key)
57
  logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
58
  else:
59
+ logger.warning(
60
+ LogFormatter.warning("Cache backend does not support deletion")
61
+ )
62
  except Exception as e:
63
  logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
64
 
65
+
66
  def build_cache_key(*args) -> str:
67
  """Build a cache key from multiple arguments"""
68
  return ":".join(str(arg) for arg in args if arg is not None)
69
 
70
+
71
  def cached(expire: int = CACHE_TTL, key_builder=None):
72
  """Decorator for caching endpoint responses
73
+
74
  Args:
75
  expire (int): Cache TTL in seconds
76
  key_builder (callable, optional): Custom key builder function
77
  """
78
+ return cache(expire=expire, key_builder=key_builder)
 
 
 
backend/app/core/formatting.py CHANGED
@@ -3,48 +3,49 @@ from typing import Dict, Any, List, Optional
3
 
4
  logger = logging.getLogger(__name__)
5
 
 
6
  class LogFormatter:
7
  """Utility class for consistent log formatting across the application"""
8
-
9
  @staticmethod
10
  def section(title: str) -> str:
11
  """Create a section header"""
12
- return f"\n{'='*20} {title.upper()} {'='*20}"
13
-
14
  @staticmethod
15
  def subsection(title: str) -> str:
16
  """Create a subsection header"""
17
- return f"\n{'─'*20} {title} {'─'*20}"
18
-
19
  @staticmethod
20
  def tree(items: Dict[str, Any], title: str = None) -> List[str]:
21
  """Create a tree view of dictionary data"""
22
  lines = []
23
  if title:
24
  lines.append(f"📊 {title}:")
25
-
26
  # Get the maximum length for alignment
27
  max_key_length = max(len(str(k)) for k in items.keys())
28
-
29
  # Format each item
30
  for i, (key, value) in enumerate(items.items()):
31
  prefix = "└──" if i == len(items) - 1 else "├──"
32
  if isinstance(value, (int, float)):
33
  value = f"{value:,}" # Add thousand separators
34
  lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
35
-
36
  return lines
37
-
38
  @staticmethod
39
  def stats(stats: Dict[str, int], title: str = None) -> List[str]:
40
  """Format statistics with icons"""
41
  lines = []
42
  if title:
43
  lines.append(f"📊 {title}:")
44
-
45
  # Get the maximum length for alignment
46
  max_key_length = max(len(str(k)) for k in stats.keys())
47
-
48
  # Format each stat with an appropriate icon
49
  icons = {
50
  "total": "📌",
@@ -59,19 +60,19 @@ class LogFormatter:
59
  "cached": "💾",
60
  "size": "📏",
61
  "time": "⏱️",
62
- "rate": "🚀"
63
  }
64
-
65
  # Format each item
66
  for i, (key, value) in enumerate(stats.items()):
67
  prefix = "└──" if i == len(stats) - 1 else "├──"
68
- icon = icons.get(key.lower().split('_')[0], "•")
69
  if isinstance(value, (int, float)):
70
  value = f"{value:,}" # Add thousand separators
71
  lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
72
-
73
  return lines
74
-
75
  @staticmethod
76
  def progress_bar(current: int, total: int, width: int = 20) -> str:
77
  """Create a progress bar"""
@@ -79,7 +80,7 @@ class LogFormatter:
79
  filled = "█" * (percentage * width // 100)
80
  empty = "░" * (width - len(filled))
81
  return f"{filled}{empty} {percentage:3d}%"
82
-
83
  @staticmethod
84
  def error(message: str, error: Optional[Exception] = None) -> str:
85
  """Format error message"""
@@ -87,18 +88,18 @@ class LogFormatter:
87
  if error:
88
  error_msg += f"\n └── Details: {str(error)}"
89
  return error_msg
90
-
91
  @staticmethod
92
  def success(message: str) -> str:
93
  """Format success message"""
94
  return f"✅ {message}"
95
-
96
  @staticmethod
97
  def warning(message: str) -> str:
98
  """Format warning message"""
99
  return f"⚠️ {message}"
100
-
101
  @staticmethod
102
  def info(message: str) -> str:
103
  """Format info message"""
104
- return f"ℹ️ {message}"
 
3
 
4
  logger = logging.getLogger(__name__)
5
 
6
+
7
  class LogFormatter:
8
  """Utility class for consistent log formatting across the application"""
9
+
10
  @staticmethod
11
  def section(title: str) -> str:
12
  """Create a section header"""
13
+ return f"\n{'=' * 20} {title.upper()} {'=' * 20}"
14
+
15
  @staticmethod
16
  def subsection(title: str) -> str:
17
  """Create a subsection header"""
18
+ return f"\n{'─' * 20} {title} {'─' * 20}"
19
+
20
  @staticmethod
21
  def tree(items: Dict[str, Any], title: str = None) -> List[str]:
22
  """Create a tree view of dictionary data"""
23
  lines = []
24
  if title:
25
  lines.append(f"📊 {title}:")
26
+
27
  # Get the maximum length for alignment
28
  max_key_length = max(len(str(k)) for k in items.keys())
29
+
30
  # Format each item
31
  for i, (key, value) in enumerate(items.items()):
32
  prefix = "└──" if i == len(items) - 1 else "├──"
33
  if isinstance(value, (int, float)):
34
  value = f"{value:,}" # Add thousand separators
35
  lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
36
+
37
  return lines
38
+
39
  @staticmethod
40
  def stats(stats: Dict[str, int], title: str = None) -> List[str]:
41
  """Format statistics with icons"""
42
  lines = []
43
  if title:
44
  lines.append(f"📊 {title}:")
45
+
46
  # Get the maximum length for alignment
47
  max_key_length = max(len(str(k)) for k in stats.keys())
48
+
49
  # Format each stat with an appropriate icon
50
  icons = {
51
  "total": "📌",
 
60
  "cached": "💾",
61
  "size": "📏",
62
  "time": "⏱️",
63
+ "rate": "🚀",
64
  }
65
+
66
  # Format each item
67
  for i, (key, value) in enumerate(stats.items()):
68
  prefix = "└──" if i == len(stats) - 1 else "├──"
69
+ icon = icons.get(key.lower().split("_")[0], "•")
70
  if isinstance(value, (int, float)):
71
  value = f"{value:,}" # Add thousand separators
72
  lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
73
+
74
  return lines
75
+
76
  @staticmethod
77
  def progress_bar(current: int, total: int, width: int = 20) -> str:
78
  """Create a progress bar"""
 
80
  filled = "█" * (percentage * width // 100)
81
  empty = "░" * (width - len(filled))
82
  return f"{filled}{empty} {percentage:3d}%"
83
+
84
  @staticmethod
85
  def error(message: str, error: Optional[Exception] = None) -> str:
86
  """Format error message"""
 
88
  if error:
89
  error_msg += f"\n └── Details: {str(error)}"
90
  return error_msg
91
+
92
  @staticmethod
93
  def success(message: str) -> str:
94
  """Format success message"""
95
  return f"✅ {message}"
96
+
97
  @staticmethod
98
  def warning(message: str) -> str:
99
  """Format warning message"""
100
  return f"⚠️ {message}"
101
+
102
  @staticmethod
103
  def info(message: str) -> str:
104
  """Format info message"""
105
+ return f"ℹ️ {message}"
backend/app/main.py CHANGED
@@ -1,5 +1,6 @@
1
  from fastapi import FastAPI
2
  from app.config.logging_config import setup_logging
 
3
  import logging
4
 
5
  # Initialize logging configuration
@@ -8,11 +9,11 @@ logger = logging.getLogger(__name__)
8
 
9
  app = FastAPI(title="Open LLM Leaderboard API")
10
 
 
11
  @app.on_event("startup")
12
  async def startup_event():
13
  logger.info("Starting up the application...")
14
 
15
- # Import and include routers after app initialization
16
- from app.api import models, votes
17
  app.include_router(models.router, prefix="/api", tags=["models"])
18
- app.include_router(votes.router, prefix="/api", tags=["votes"])
 
1
  from fastapi import FastAPI
2
  from app.config.logging_config import setup_logging
3
+ from app.api import models, votes
4
  import logging
5
 
6
  # Initialize logging configuration
 
9
 
10
  app = FastAPI(title="Open LLM Leaderboard API")
11
 
12
+
13
  @app.on_event("startup")
14
  async def startup_event():
15
  logger.info("Starting up the application...")
16
 
17
+
 
18
  app.include_router(models.router, prefix="/api", tags=["models"])
19
+ app.include_router(votes.router, prefix="/api", tags=["votes"])
backend/app/services/hf_service.py CHANGED
@@ -1,5 +1,4 @@
1
  from typing import Optional
2
- from huggingface_hub import HfApi
3
  from app.config import HF_TOKEN, API
4
  from app.core.cache import cache_config
5
  from app.core.formatting import LogFormatter
@@ -7,6 +6,7 @@ import logging
7
 
8
  logger = logging.getLogger(__name__)
9
 
 
10
  class HuggingFaceService:
11
  def __init__(self):
12
  self.api = API
@@ -31,7 +31,11 @@ class HuggingFaceService:
31
  try:
32
  logger.info(LogFormatter.info("Fetching user information..."))
33
  info = self.api.get_token_permission()
34
- logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
 
 
 
 
35
  return info
36
  except Exception as e:
37
  logger.error(LogFormatter.error("Failed to get user info", e))
@@ -39,7 +43,9 @@ class HuggingFaceService:
39
 
40
  def _log_repo_operation(self, operation: str, repo: str, details: str = None):
41
  """Helper to log repository operations"""
42
- logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
 
 
43
  stats = {
44
  "Operation": operation,
45
  "Repository": repo,
@@ -47,4 +53,4 @@ class HuggingFaceService:
47
  if details:
48
  stats["Details"] = details
49
  for line in LogFormatter.tree(stats):
50
- logger.info(line)
 
1
  from typing import Optional
 
2
  from app.config import HF_TOKEN, API
3
  from app.core.cache import cache_config
4
  from app.core.formatting import LogFormatter
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
+
10
  class HuggingFaceService:
11
  def __init__(self):
12
  self.api = API
 
31
  try:
32
  logger.info(LogFormatter.info("Fetching user information..."))
33
  info = self.api.get_token_permission()
34
+ logger.info(
35
+ LogFormatter.success(
36
+ f"User info retrieved for: {info.get('user', 'Unknown')}"
37
+ )
38
+ )
39
  return info
40
  except Exception as e:
41
  logger.error(LogFormatter.error("Failed to get user info", e))
 
43
 
44
  def _log_repo_operation(self, operation: str, repo: str, details: str = None):
45
  """Helper to log repository operations"""
46
+ logger.info(
47
+ LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}")
48
+ )
49
  stats = {
50
  "Operation": operation,
51
  "Repository": repo,
 
53
  if details:
54
  stats["Details"] = details
55
  for line in LogFormatter.tree(stats):
56
+ logger.info(line)
backend/app/services/leaderboard.py CHANGED
@@ -1,5 +1,4 @@
1
  from app.core.cache import cache_config
2
- from datetime import datetime
3
  from typing import List, Dict, Any
4
  import datasets
5
  from fastapi import HTTPException
@@ -9,33 +8,38 @@ from app.core.formatting import LogFormatter
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
12
  class LeaderboardService:
13
  def __init__(self):
14
  pass
15
-
16
  async def fetch_raw_data(self) -> List[Dict[str, Any]]:
17
  """Fetch raw leaderboard data from HuggingFace dataset"""
18
  try:
19
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
- logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"))
21
-
 
 
 
 
22
  dataset = datasets.load_dataset(
23
  f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
24
- cache_dir=cache_config.get_cache_path("datasets")
25
  )["train"]
26
-
27
  df = dataset.to_pandas()
28
- data = df.to_dict('records')
29
-
30
  stats = {
31
  "Total_Entries": len(data),
32
- "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
33
  }
34
  for line in LogFormatter.stats(stats, "Dataset Statistics"):
35
  logger.info(line)
36
-
37
  return data
38
-
39
  except Exception as e:
40
  logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
41
  raise HTTPException(status_code=500, detail=str(e))
@@ -44,53 +48,60 @@ class LeaderboardService:
44
  """Get formatted leaderboard data"""
45
  try:
46
  logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
47
-
48
  raw_data = await self.fetch_raw_data()
49
  formatted_data = []
50
  type_counts = {}
51
  error_count = 0
52
-
53
  # Initialize progress tracking
54
  total_items = len(raw_data)
55
  logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
56
-
57
  for i, item in enumerate(raw_data, 1):
58
  try:
59
  formatted_item = await self.transform_data(item)
60
  formatted_data.append(formatted_item)
61
-
62
  # Count model types
63
  model_type = formatted_item["model"]["type"]
64
  type_counts[model_type] = type_counts.get(model_type, 0) + 1
65
-
66
  except Exception as e:
67
  error_count += 1
68
- logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
 
 
 
 
69
  continue
70
-
71
  # Log progress every 10%
72
  if i % max(1, total_items // 10) == 0:
73
- progress = (i / total_items) * 100
74
- logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
75
-
 
 
 
76
  # Log final statistics
77
  stats = {
78
  "Total_Processed": total_items,
79
  "Successful": len(formatted_data),
80
- "Failed": error_count
81
  }
82
  logger.info(LogFormatter.section("PROCESSING SUMMARY"))
83
  for line in LogFormatter.stats(stats, "Processing Statistics"):
84
  logger.info(line)
85
-
86
  # Log model type distribution
87
  type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
88
  logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
89
  for line in LogFormatter.stats(type_stats):
90
  logger.info(line)
91
-
92
  return formatted_data
93
-
94
  except Exception as e:
95
  logger.error(LogFormatter.error("Failed to format leaderboard data", e))
96
  raise HTTPException(status_code=500, detail=str(e))
@@ -100,42 +111,44 @@ class LeaderboardService:
100
  try:
101
  # Extract model name for logging
102
  model_name = data.get("fullname", "Unknown")
103
- logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
104
-
 
 
105
  # Create unique ID combining model name, precision, sha and chat template status
106
  unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
107
-
108
  evaluations = {
109
  "ifeval": {
110
  "name": "IFEval",
111
  "value": data.get("IFEval Raw", 0),
112
- "normalized_score": data.get("IFEval", 0)
113
  },
114
  "bbh": {
115
  "name": "BBH",
116
  "value": data.get("BBH Raw", 0),
117
- "normalized_score": data.get("BBH", 0)
118
  },
119
  "math": {
120
  "name": "MATH Level 5",
121
  "value": data.get("MATH Lvl 5 Raw", 0),
122
- "normalized_score": data.get("MATH Lvl 5", 0)
123
  },
124
  "gpqa": {
125
  "name": "GPQA",
126
  "value": data.get("GPQA Raw", 0),
127
- "normalized_score": data.get("GPQA", 0)
128
  },
129
  "musr": {
130
  "name": "MUSR",
131
  "value": data.get("MUSR Raw", 0),
132
- "normalized_score": data.get("MUSR", 0)
133
  },
134
  "mmlu_pro": {
135
  "name": "MMLU-PRO",
136
  "value": data.get("MMLU-PRO Raw", 0),
137
- "normalized_score": data.get("MMLU-PRO", 0)
138
- }
139
  }
140
 
141
  features = {
@@ -143,7 +156,7 @@ class LeaderboardService:
143
  "is_merged": data.get("Merged", False),
144
  "is_moe": data.get("MoE", False),
145
  "is_flagged": data.get("Flagged", False),
146
- "is_official_provider": data.get("Official Providers", False)
147
  }
148
 
149
  metadata = {
@@ -154,18 +167,18 @@ class LeaderboardService:
154
  "hub_license": data.get("Hub License"),
155
  "hub_hearts": data.get("Hub ❤️"),
156
  "params_billions": data.get("#Params (B)"),
157
- "co2_cost": data.get("CO₂ cost (kg)", 0)
158
  }
159
 
160
  # Clean model type by removing emojis if present
161
  original_type = data.get("Type", "")
162
  model_type = original_type.lower().strip()
163
-
164
  # Remove emojis and parentheses
165
  if "(" in model_type:
166
  model_type = model_type.split("(")[0].strip()
167
- model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
168
-
169
  # Map old model types to new ones
170
  model_type_mapping = {
171
  "fine-tuned": "fined-tuned-on-domain-specific-dataset",
@@ -175,14 +188,18 @@ class LeaderboardService:
175
  "ft": "fined-tuned-on-domain-specific-dataset",
176
  "finetuning": "fined-tuned-on-domain-specific-dataset",
177
  "fine tuning": "fined-tuned-on-domain-specific-dataset",
178
- "fine-tuning": "fined-tuned-on-domain-specific-dataset"
179
  }
180
 
181
  mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
182
-
183
  if mapped_type != model_type:
184
- logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
185
-
 
 
 
 
186
  transformed_data = {
187
  "id": unique_id,
188
  "model": {
@@ -193,16 +210,22 @@ class LeaderboardService:
193
  "weight_type": data.get("Weight type"),
194
  "architecture": data.get("Architecture"),
195
  "average_score": data.get("Average ⬆️"),
196
- "has_chat_template": data.get("Chat Template", False)
197
  },
198
  "evaluations": evaluations,
199
  "features": features,
200
- "metadata": metadata
201
  }
202
-
203
- logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
 
 
204
  return transformed_data
205
-
206
  except Exception as e:
207
- logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
 
 
 
 
208
  raise
 
1
  from app.core.cache import cache_config
 
2
  from typing import List, Dict, Any
3
  import datasets
4
  from fastapi import HTTPException
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
11
+
12
  class LeaderboardService:
13
  def __init__(self):
14
  pass
15
+
16
  async def fetch_raw_data(self) -> List[Dict[str, Any]]:
17
  """Fetch raw leaderboard data from HuggingFace dataset"""
18
  try:
19
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
+ logger.info(
21
+ LogFormatter.info(
22
+ f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"
23
+ )
24
+ )
25
+
26
  dataset = datasets.load_dataset(
27
  f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
28
+ cache_dir=cache_config.get_cache_path("datasets"),
29
  )["train"]
30
+
31
  df = dataset.to_pandas()
32
+ data = df.to_dict("records")
33
+
34
  stats = {
35
  "Total_Entries": len(data),
36
+ "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB",
37
  }
38
  for line in LogFormatter.stats(stats, "Dataset Statistics"):
39
  logger.info(line)
40
+
41
  return data
42
+
43
  except Exception as e:
44
  logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
45
  raise HTTPException(status_code=500, detail=str(e))
 
48
  """Get formatted leaderboard data"""
49
  try:
50
  logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
51
+
52
  raw_data = await self.fetch_raw_data()
53
  formatted_data = []
54
  type_counts = {}
55
  error_count = 0
56
+
57
  # Initialize progress tracking
58
  total_items = len(raw_data)
59
  logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
60
+
61
  for i, item in enumerate(raw_data, 1):
62
  try:
63
  formatted_item = await self.transform_data(item)
64
  formatted_data.append(formatted_item)
65
+
66
  # Count model types
67
  model_type = formatted_item["model"]["type"]
68
  type_counts[model_type] = type_counts.get(model_type, 0) + 1
69
+
70
  except Exception as e:
71
  error_count += 1
72
+ logger.error(
73
+ LogFormatter.error(
74
+ f"Failed to format entry {i}/{total_items}", e
75
+ )
76
+ )
77
  continue
78
+
79
  # Log progress every 10%
80
  if i % max(1, total_items // 10) == 0:
81
+ logger.info(
82
+ LogFormatter.info(
83
+ f"Progress: {LogFormatter.progress_bar(i, total_items)}"
84
+ )
85
+ )
86
+
87
  # Log final statistics
88
  stats = {
89
  "Total_Processed": total_items,
90
  "Successful": len(formatted_data),
91
+ "Failed": error_count,
92
  }
93
  logger.info(LogFormatter.section("PROCESSING SUMMARY"))
94
  for line in LogFormatter.stats(stats, "Processing Statistics"):
95
  logger.info(line)
96
+
97
  # Log model type distribution
98
  type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
99
  logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
100
  for line in LogFormatter.stats(type_stats):
101
  logger.info(line)
102
+
103
  return formatted_data
104
+
105
  except Exception as e:
106
  logger.error(LogFormatter.error("Failed to format leaderboard data", e))
107
  raise HTTPException(status_code=500, detail=str(e))
 
111
  try:
112
  # Extract model name for logging
113
  model_name = data.get("fullname", "Unknown")
114
+ logger.debug(
115
+ LogFormatter.info(f"Transforming data for model: {model_name}")
116
+ )
117
+
118
  # Create unique ID combining model name, precision, sha and chat template status
119
  unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
120
+
121
  evaluations = {
122
  "ifeval": {
123
  "name": "IFEval",
124
  "value": data.get("IFEval Raw", 0),
125
+ "normalized_score": data.get("IFEval", 0),
126
  },
127
  "bbh": {
128
  "name": "BBH",
129
  "value": data.get("BBH Raw", 0),
130
+ "normalized_score": data.get("BBH", 0),
131
  },
132
  "math": {
133
  "name": "MATH Level 5",
134
  "value": data.get("MATH Lvl 5 Raw", 0),
135
+ "normalized_score": data.get("MATH Lvl 5", 0),
136
  },
137
  "gpqa": {
138
  "name": "GPQA",
139
  "value": data.get("GPQA Raw", 0),
140
+ "normalized_score": data.get("GPQA", 0),
141
  },
142
  "musr": {
143
  "name": "MUSR",
144
  "value": data.get("MUSR Raw", 0),
145
+ "normalized_score": data.get("MUSR", 0),
146
  },
147
  "mmlu_pro": {
148
  "name": "MMLU-PRO",
149
  "value": data.get("MMLU-PRO Raw", 0),
150
+ "normalized_score": data.get("MMLU-PRO", 0),
151
+ },
152
  }
153
 
154
  features = {
 
156
  "is_merged": data.get("Merged", False),
157
  "is_moe": data.get("MoE", False),
158
  "is_flagged": data.get("Flagged", False),
159
+ "is_official_provider": data.get("Official Providers", False),
160
  }
161
 
162
  metadata = {
 
167
  "hub_license": data.get("Hub License"),
168
  "hub_hearts": data.get("Hub ❤️"),
169
  "params_billions": data.get("#Params (B)"),
170
+ "co2_cost": data.get("CO₂ cost (kg)", 0),
171
  }
172
 
173
  # Clean model type by removing emojis if present
174
  original_type = data.get("Type", "")
175
  model_type = original_type.lower().strip()
176
+
177
  # Remove emojis and parentheses
178
  if "(" in model_type:
179
  model_type = model_type.split("(")[0].strip()
180
+ model_type = "".join(c for c in model_type if c not in "🔶🟢🟩💬🤝🌸 ")
181
+
182
  # Map old model types to new ones
183
  model_type_mapping = {
184
  "fine-tuned": "fined-tuned-on-domain-specific-dataset",
 
188
  "ft": "fined-tuned-on-domain-specific-dataset",
189
  "finetuning": "fined-tuned-on-domain-specific-dataset",
190
  "fine tuning": "fined-tuned-on-domain-specific-dataset",
191
+ "fine-tuning": "fined-tuned-on-domain-specific-dataset",
192
  }
193
 
194
  mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
195
+
196
  if mapped_type != model_type:
197
+ logger.debug(
198
+ LogFormatter.info(
199
+ f"Model type mapped: {original_type} -> {mapped_type}"
200
+ )
201
+ )
202
+
203
  transformed_data = {
204
  "id": unique_id,
205
  "model": {
 
210
  "weight_type": data.get("Weight type"),
211
  "architecture": data.get("Architecture"),
212
  "average_score": data.get("Average ⬆️"),
213
+ "has_chat_template": data.get("Chat Template", False),
214
  },
215
  "evaluations": evaluations,
216
  "features": features,
217
+ "metadata": metadata,
218
  }
219
+
220
+ logger.debug(
221
+ LogFormatter.success(f"Successfully transformed data for {model_name}")
222
+ )
223
  return transformed_data
224
+
225
  except Exception as e:
226
+ logger.error(
227
+ LogFormatter.error(
228
+ f"Failed to transform data for {data.get('fullname', 'Unknown')}", e
229
+ )
230
+ )
231
  raise
backend/app/services/models.py CHANGED
@@ -5,22 +5,16 @@ import os
5
  from pathlib import Path
6
  import logging
7
  import aiohttp
8
- import asyncio
9
  import time
10
- from huggingface_hub import HfApi, CommitOperationAdd
11
  from huggingface_hub.utils import build_hf_headers
12
  from datasets import disable_progress_bar
13
  import sys
14
  import contextlib
15
- from concurrent.futures import ThreadPoolExecutor
16
  import tempfile
17
 
18
- from app.config import (
19
- QUEUE_REPO,
20
- HF_TOKEN,
21
- EVAL_REQUESTS_PATH
22
- )
23
- from app.config.hf_config import HF_ORGANIZATION, QUEUE_REPO
24
  from app.services.hf_service import HuggingFaceService
25
  from app.utils.model_validation import ModelValidator
26
  from app.services.votes import VoteService
@@ -32,12 +26,13 @@ disable_progress_bar()
32
 
33
  logger = logging.getLogger(__name__)
34
 
 
35
  # Context manager to temporarily disable stdout and stderr
36
  @contextlib.contextmanager
37
  def suppress_output():
38
  stdout = sys.stdout
39
  stderr = sys.stderr
40
- devnull = open(os.devnull, 'w')
41
  try:
42
  sys.stdout = devnull
43
  sys.stderr = devnull
@@ -47,6 +42,7 @@ def suppress_output():
47
  sys.stderr = stderr
48
  devnull.close()
49
 
 
50
  class ProgressTracker:
51
  def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
52
  self.total = total
@@ -55,57 +51,63 @@ class ProgressTracker:
55
  self.start_time = time.time()
56
  self.update_frequency = update_frequency # Percentage steps
57
  self.last_update = -1
58
-
59
  # Initial log with fancy formatting
60
  logger.info(LogFormatter.section(desc))
61
  logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
62
  sys.stdout.flush()
63
-
64
  def update(self, n: int = 1):
65
  self.current += n
66
  current_percentage = (self.current * 100) // self.total
67
-
68
  # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
69
- if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
 
 
 
70
  elapsed = time.time() - self.start_time
71
  rate = self.current / elapsed if elapsed > 0 else 0
72
  remaining = (self.total - self.current) / rate if rate > 0 else 0
73
-
74
  # Create progress stats
75
  stats = {
76
  "Progress": LogFormatter.progress_bar(self.current, self.total),
77
  "Items": f"{self.current:,}/{self.total:,}",
78
  "Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
79
- "Rate": f"🚀 {rate:.1f} items/s"
80
  }
81
-
82
  # Log progress using tree format
83
  for line in LogFormatter.tree(stats):
84
  logger.info(line)
85
  sys.stdout.flush()
86
-
87
- self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
88
-
 
 
89
  def close(self):
90
  elapsed = time.time() - self.start_time
91
  rate = self.total / elapsed if elapsed > 0 else 0
92
-
93
  # Final summary with fancy formatting
94
  logger.info(LogFormatter.section("COMPLETED"))
95
  stats = {
96
  "Total": f"{self.total:,} items",
97
  "Time": f"{elapsed:.1f}s",
98
- "Rate": f"{rate:.1f} items/s"
99
  }
100
  for line in LogFormatter.stats(stats):
101
  logger.info(line)
102
- logger.info("="*50)
103
  sys.stdout.flush()
104
 
 
105
  class ModelService(HuggingFaceService):
106
- _instance: Optional['ModelService'] = None
107
  _initialized = False
108
-
109
  def __new__(cls):
110
  if cls._instance is None:
111
  logger.info(LogFormatter.info("Creating new ModelService instance"))
@@ -113,14 +115,18 @@ class ModelService(HuggingFaceService):
113
  return cls._instance
114
 
115
  def __init__(self):
116
- if not hasattr(self, '_init_done'):
117
  logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
118
  super().__init__()
119
  self.validator = ModelValidator()
120
  self.vote_service = VoteService()
121
  self.eval_requests_path = cache_config.eval_requests_file
122
- logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
123
-
 
 
 
 
124
  self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
125
  self.hf_api = HfApi(token=HF_TOKEN)
126
  self.cached_models = None
@@ -129,56 +135,66 @@ class ModelService(HuggingFaceService):
129
  self._init_done = True
130
  logger.info(LogFormatter.success("Initialization complete"))
131
 
132
- async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
 
 
133
  """Download and process a file asynchronously"""
134
  try:
135
  # Build file URL
136
  url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
137
  headers = build_hf_headers(token=self.token)
138
-
139
  # Download file
140
  async with session.get(url, headers=headers) as response:
141
  if response.status != 200:
142
- logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
 
 
 
 
143
  progress.update()
144
  return None
145
-
146
  try:
147
  # First read content as text
148
  text_content = await response.text()
149
  # Then parse JSON
150
  content = json.loads(text_content)
151
  except json.JSONDecodeError as e:
152
- logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
 
 
153
  progress.update()
154
  return None
155
-
156
  # Get status and determine target status
157
  status = content.get("status", "PENDING").upper()
158
  target_status = None
159
  status_map = {
160
  "PENDING": ["PENDING"],
161
  "EVALUATING": ["RUNNING"],
162
- "FINISHED": ["FINISHED"]
163
  }
164
-
165
  for target, source_statuses in status_map.items():
166
  if status in source_statuses:
167
  target_status = target
168
  break
169
-
170
  if not target_status:
171
  progress.update()
172
  return None
173
-
174
  # Calculate wait time
175
  try:
176
- submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
 
 
177
  if submit_time.tzinfo is None:
178
  submit_time = submit_time.replace(tzinfo=timezone.utc)
179
  current_time = datetime.now(timezone.utc)
180
  wait_time = current_time - submit_time
181
-
182
  model_info = {
183
  "name": content["model"],
184
  "submitter": content.get("sender", "Unknown"),
@@ -186,17 +202,17 @@ class ModelService(HuggingFaceService):
186
  "wait_time": f"{wait_time.total_seconds():.1f}s",
187
  "submission_time": content["submitted_time"],
188
  "status": target_status,
189
- "precision": content.get("precision", "Unknown")
190
  }
191
-
192
  progress.update()
193
  return model_info
194
-
195
  except (ValueError, TypeError) as e:
196
  logger.error(LogFormatter.error(f"Failed to process {file}", e))
197
  progress.update()
198
  return None
199
-
200
  except Exception as e:
201
  logger.error(LogFormatter.error(f"Failed to load {file}", e))
202
  progress.update()
@@ -207,31 +223,25 @@ class ModelService(HuggingFaceService):
207
  try:
208
  logger.info(LogFormatter.section("CACHE REFRESH"))
209
  self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
210
-
211
  # Initialize models dictionary
212
- models = {
213
- "finished": [],
214
- "evaluating": [],
215
- "pending": []
216
- }
217
-
218
  try:
219
  logger.info(LogFormatter.subsection("DATASET LOADING"))
220
  logger.info(LogFormatter.info("Loading dataset..."))
221
-
222
  # Download entire dataset snapshot
223
  with suppress_output():
224
  local_dir = self.hf_api.snapshot_download(
225
- repo_id=QUEUE_REPO,
226
- repo_type="dataset",
227
- token=self.token
228
  )
229
-
230
  # List JSON files in local directory
231
  local_path = Path(local_dir)
232
  json_files = list(local_path.glob("**/*.json"))
233
  total_files = len(json_files)
234
-
235
  # Log repository stats
236
  stats = {
237
  "Total_Files": total_files,
@@ -239,46 +249,48 @@ class ModelService(HuggingFaceService):
239
  }
240
  for line in LogFormatter.stats(stats, "Repository Statistics"):
241
  logger.info(line)
242
-
243
  if not json_files:
244
  raise Exception("No JSON files found in repository")
245
-
246
  # Initialize progress tracker
247
  progress = ProgressTracker(total_files, "PROCESSING FILES")
248
-
249
  # Process local files
250
  model_submissions = {} # Dict to track latest submission for each (model_id, revision, precision)
251
  for file_path in json_files:
252
  try:
253
- with open(file_path, 'r') as f:
254
  content = json.load(f)
255
-
256
  # Get status and determine target status
257
  status = content.get("status", "PENDING").upper()
258
  target_status = None
259
  status_map = {
260
  "PENDING": ["PENDING"],
261
  "EVALUATING": ["RUNNING"],
262
- "FINISHED": ["FINISHED"]
263
  }
264
-
265
  for target, source_statuses in status_map.items():
266
  if status in source_statuses:
267
  target_status = target
268
  break
269
-
270
  if not target_status:
271
  progress.update()
272
  continue
273
-
274
  # Calculate wait time
275
  try:
276
- submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
 
 
277
  if submit_time.tzinfo is None:
278
  submit_time = submit_time.replace(tzinfo=timezone.utc)
279
  current_time = datetime.now(timezone.utc)
280
  wait_time = current_time - submit_time
281
-
282
  model_info = {
283
  "name": content["model"],
284
  "submitter": content.get("sender", "Unknown"),
@@ -286,50 +298,68 @@ class ModelService(HuggingFaceService):
286
  "wait_time": f"{wait_time.total_seconds():.1f}s",
287
  "submission_time": content["submitted_time"],
288
  "status": target_status,
289
- "precision": content.get("precision", "Unknown")
290
  }
291
-
292
  # Use (model_id, revision, precision) as key to track latest submission
293
- key = (content["model"], content["revision"], content.get("precision", "Unknown"))
294
- if key not in model_submissions or submit_time > datetime.fromisoformat(model_submissions[key]["submission_time"].replace("Z", "+00:00")):
 
 
 
 
 
 
 
 
 
 
 
 
295
  model_submissions[key] = model_info
296
-
297
  except (ValueError, TypeError) as e:
298
- logger.error(LogFormatter.error(f"Failed to process {file_path.name}", e))
299
-
 
 
 
 
300
  except Exception as e:
301
- logger.error(LogFormatter.error(f"Failed to load {file_path.name}", e))
 
 
302
  finally:
303
  progress.update()
304
-
305
  # Populate models dict with deduplicated submissions
306
  for model_info in model_submissions.values():
307
  models[model_info["status"].lower()].append(model_info)
308
-
309
  progress.close()
310
-
311
  # Final summary with fancy formatting
312
  logger.info(LogFormatter.section("CACHE SUMMARY"))
313
  stats = {
314
  "Finished": len(models["finished"]),
315
  "Evaluating": len(models["evaluating"]),
316
- "Pending": len(models["pending"])
317
  }
318
  for line in LogFormatter.stats(stats, "Models by Status"):
319
  logger.info(line)
320
- logger.info("="*50)
321
-
322
  except Exception as e:
323
  logger.error(LogFormatter.error("Error processing files", e))
324
  raise
325
-
326
  # Update cache
327
  self.cached_models = models
328
  self.last_cache_update = time.time()
329
  logger.info(LogFormatter.success("Cache updated successfully"))
330
-
331
  return models
332
-
333
  except Exception as e:
334
  logger.error(LogFormatter.error("Cache refresh failed", e))
335
  raise
@@ -337,40 +367,48 @@ class ModelService(HuggingFaceService):
337
  async def initialize(self):
338
  """Initialize the model service"""
339
  if self._initialized:
340
- logger.info(LogFormatter.info("Service already initialized, using cached data"))
 
 
341
  return
342
-
343
  try:
344
  logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
345
-
346
  # Check if cache already exists
347
  cache_path = cache_config.get_cache_path("datasets")
348
  if not cache_path.exists() or not any(cache_path.iterdir()):
349
- logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
 
 
 
 
350
  cache_config.flush_cache("datasets")
351
  else:
352
  logger.info(LogFormatter.info("Using existing datasets cache"))
353
-
354
  # Ensure eval requests directory exists
355
  self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
356
- logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
357
-
 
 
358
  # List existing files
359
  if self.eval_requests_path.exists():
360
  files = list(self.eval_requests_path.glob("**/*.json"))
361
  stats = {
362
  "Total_Files": len(files),
363
- "Directory": str(self.eval_requests_path)
364
  }
365
  for line in LogFormatter.stats(stats, "Eval Requests"):
366
  logger.info(line)
367
-
368
  # Load initial cache
369
  await self._refresh_models_cache()
370
-
371
  self._initialized = True
372
  logger.info(LogFormatter.success("Model service initialization complete"))
373
-
374
  except Exception as e:
375
  logger.error(LogFormatter.error("Initialization failed", e))
376
  raise
@@ -378,44 +416,59 @@ class ModelService(HuggingFaceService):
378
  async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
379
  """Get all models with their status"""
380
  if not self._initialized:
381
- logger.info(LogFormatter.info("Service not initialized, initializing now..."))
 
 
382
  await self.initialize()
383
-
384
  current_time = time.time()
385
  cache_age = current_time - self.last_cache_update
386
-
387
  # Check if cache needs refresh
388
  if not self.cached_models:
389
- logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
 
 
390
  return await self._refresh_models_cache()
391
  elif cache_age > self.cache_ttl:
392
- logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
 
 
 
 
393
  return await self._refresh_models_cache()
394
  else:
395
  logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
396
  return self.cached_models
397
 
398
  async def submit_model(
399
- self,
400
- model_data: Dict[str, Any],
401
- user_id: str
402
  ) -> Dict[str, Any]:
403
  logger.info(LogFormatter.section("MODEL SUBMISSION"))
404
- self._log_repo_operation("write", QUEUE_REPO, f"Submitting model {model_data['model_id']} by {user_id}")
 
 
 
 
405
  stats = {
406
  "Model": model_data["model_id"],
407
  "User": user_id,
408
  "Revision": model_data["revision"],
409
  "Precision": model_data["precision"],
410
- "Type": model_data["model_type"]
411
  }
412
  for line in LogFormatter.tree(stats, "Submission Details"):
413
  logger.info(line)
414
-
415
  # Validate required fields
416
  required_fields = [
417
- "model_id", "base_model", "revision", "precision",
418
- "weight_type", "model_type", "use_chat_template"
 
 
 
 
 
419
  ]
420
  for field in required_fields:
421
  if field not in model_data:
@@ -424,23 +477,25 @@ class ModelService(HuggingFaceService):
424
  # Get model info and validate it exists on HuggingFace
425
  try:
426
  logger.info(LogFormatter.subsection("MODEL VALIDATION"))
427
-
428
  # Get the model info to check if it exists
429
  model_info = self.hf_api.model_info(
430
  model_data["model_id"],
431
  revision=model_data["revision"],
432
- token=self.token
433
  )
434
-
435
  if not model_info:
436
- raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
437
-
 
 
438
  logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
439
-
440
  except Exception as e:
441
  logger.error(LogFormatter.error("Model validation failed", e))
442
  raise
443
-
444
  # Update model revision with commit sha
445
  model_data["revision"] = model_info.sha
446
 
@@ -448,11 +503,13 @@ class ModelService(HuggingFaceService):
448
  try:
449
  logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
450
  existing_models = await self.get_models()
451
-
452
  # Call the official provider status check
453
- is_valid, error_message = await self.validator.check_official_provider_status(
454
- model_data["model_id"],
455
- existing_models
 
 
456
  )
457
  if not is_valid:
458
  raise ValueError(error_message)
@@ -460,11 +517,16 @@ class ModelService(HuggingFaceService):
460
  # Check in all statuses (pending, evaluating, finished)
461
  for status, models in existing_models.items():
462
  for model in models:
463
- if model["name"] == model_data["model_id"] and model["revision"] == model_data["revision"]:
 
 
 
464
  error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
465
- logger.error(LogFormatter.error("Submission rejected", error_msg))
 
 
466
  raise ValueError(error_msg)
467
-
468
  logger.info(LogFormatter.success("No existing submission found"))
469
  except ValueError:
470
  raise
@@ -474,9 +536,7 @@ class ModelService(HuggingFaceService):
474
 
475
  # Check that model on hub and valid
476
  valid, error, model_config = await self.validator.is_model_on_hub(
477
- model_data["model_id"],
478
- model_data["revision"],
479
- test_tokenizer=True
480
  )
481
  if not valid:
482
  logger.error(LogFormatter.error("Model on hub validation failed", error))
@@ -497,12 +557,14 @@ class ModelService(HuggingFaceService):
497
  model_info,
498
  model_data["precision"],
499
  model_data["base_model"],
500
- revision=model_data["revision"]
501
  )
502
  if model_size is None:
503
  logger.error(LogFormatter.error("Model size validation failed", error))
504
  raise Exception(error)
505
- logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}B"))
 
 
506
 
507
  # Size limits based on precision
508
  if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
@@ -513,16 +575,16 @@ class ModelService(HuggingFaceService):
513
  # Chat template validation if requested
514
  if model_data["use_chat_template"]:
515
  valid, error = await self.validator.check_chat_template(
516
- model_data["model_id"],
517
- model_data["revision"]
518
  )
519
  if not valid:
520
- logger.error(LogFormatter.error("Chat template validation failed", error))
 
 
521
  raise Exception(error)
522
  logger.info(LogFormatter.success("Chat template validation passed"))
523
 
524
-
525
- architectures = model_info.config.get("architectures", "")
526
  if architectures:
527
  architectures = ";".join(architectures)
528
 
@@ -541,9 +603,9 @@ class ModelService(HuggingFaceService):
541
  "job_id": -1,
542
  "job_start_time": None,
543
  "use_chat_template": model_data["use_chat_template"],
544
- "sender": user_id
545
  }
546
-
547
  logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
548
  for line in LogFormatter.tree(eval_entry):
549
  logger.info(line)
@@ -552,18 +614,24 @@ class ModelService(HuggingFaceService):
552
  try:
553
  logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
554
  logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
555
-
556
  # Construct the path in the dataset
557
- org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
 
 
 
 
558
  model_path = model_data["model_id"].split("/")[-1]
559
  relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
560
-
561
  # Create a temporary file with the request
562
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
 
 
563
  json.dump(eval_entry, temp_file, indent=2)
564
  temp_file.flush()
565
  temp_path = temp_file.name
566
-
567
  # Upload file directly
568
  self.hf_api.upload_file(
569
  path_or_fileobj=temp_path,
@@ -571,14 +639,14 @@ class ModelService(HuggingFaceService):
571
  repo_id=QUEUE_REPO,
572
  repo_type="dataset",
573
  commit_message=f"Add {model_data['model_id']} to eval queue",
574
- token=self.token
575
  )
576
-
577
  # Clean up temp file
578
  os.unlink(temp_path)
579
-
580
  logger.info(LogFormatter.success("Upload successful"))
581
-
582
  except Exception as e:
583
  logger.error(LogFormatter.error("Upload failed", e))
584
  raise
@@ -586,15 +654,19 @@ class ModelService(HuggingFaceService):
586
  # Add automatic vote
587
  try:
588
  logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
589
- logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
 
 
 
 
590
  await self.vote_service.add_vote(
591
  model_data["model_id"],
592
  user_id,
593
  "up",
594
  {
595
  "precision": model_data["precision"],
596
- "revision": model_data["revision"]
597
- }
598
  )
599
  logger.info(LogFormatter.success("Vote recorded successfully"))
600
  except Exception as e:
@@ -603,14 +675,14 @@ class ModelService(HuggingFaceService):
603
 
604
  return {
605
  "status": "success",
606
- "message": "The model was submitted successfully, and the vote has been recorded"
607
  }
608
 
609
  async def get_model_status(self, model_id: str) -> Dict[str, Any]:
610
  """Get evaluation status of a model"""
611
  logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
612
  eval_path = self.eval_requests_path
613
-
614
  for user_folder in eval_path.iterdir():
615
  if user_folder.is_dir():
616
  for file in user_folder.glob("*.json"):
@@ -620,24 +692,26 @@ class ModelService(HuggingFaceService):
620
  status = {
621
  "status": data["status"],
622
  "submitted_time": data["submitted_time"],
623
- "job_id": data.get("job_id", -1)
624
  }
625
  logger.info(LogFormatter.success("Status found"))
626
  for line in LogFormatter.tree(status, "Model Status"):
627
  logger.info(line)
628
  return status
629
-
630
  logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
631
  return {"status": "not_found"}
632
 
633
- async def get_organization_submissions(self, organization: str, days: int = 7) -> List[Dict[str, Any]]:
 
 
634
  """Get all submissions from a user in the last n days"""
635
  try:
636
  # Get all models
637
  all_models = await self.get_models()
638
  current_time = datetime.now(timezone.utc)
639
  cutoff_time = current_time - timedelta(days=days)
640
-
641
  # Filter models by submitter and submission time
642
  user_submissions = []
643
  for status, models in all_models.items():
@@ -650,19 +724,21 @@ class ModelService(HuggingFaceService):
650
  )
651
  # Check if within time window
652
  if submit_time > cutoff_time:
653
- user_submissions.append({
654
- "name": model["name"],
655
- "status": status,
656
- "submission_time": model["submission_time"],
657
- "precision": model["precision"]
658
- })
659
-
 
 
660
  return sorted(
661
- user_submissions,
662
- key=lambda x: x["submission_time"],
663
- reverse=True
664
  )
665
-
666
  except Exception as e:
667
- logger.error(LogFormatter.error(f"Failed to get submissions for {organization}", e))
668
- raise
 
 
 
5
  from pathlib import Path
6
  import logging
7
  import aiohttp
 
8
  import time
9
+ from huggingface_hub import HfApi
10
  from huggingface_hub.utils import build_hf_headers
11
  from datasets import disable_progress_bar
12
  import sys
13
  import contextlib
 
14
  import tempfile
15
 
16
+ from app.config import HF_TOKEN
17
+ from app.config.hf_config import QUEUE_REPO
 
 
 
 
18
  from app.services.hf_service import HuggingFaceService
19
  from app.utils.model_validation import ModelValidator
20
  from app.services.votes import VoteService
 
26
 
27
  logger = logging.getLogger(__name__)
28
 
29
+
30
  # Context manager to temporarily disable stdout and stderr
31
  @contextlib.contextmanager
32
  def suppress_output():
33
  stdout = sys.stdout
34
  stderr = sys.stderr
35
+ devnull = open(os.devnull, "w")
36
  try:
37
  sys.stdout = devnull
38
  sys.stderr = devnull
 
42
  sys.stderr = stderr
43
  devnull.close()
44
 
45
+
46
  class ProgressTracker:
47
  def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
48
  self.total = total
 
51
  self.start_time = time.time()
52
  self.update_frequency = update_frequency # Percentage steps
53
  self.last_update = -1
54
+
55
  # Initial log with fancy formatting
56
  logger.info(LogFormatter.section(desc))
57
  logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
58
  sys.stdout.flush()
59
+
60
  def update(self, n: int = 1):
61
  self.current += n
62
  current_percentage = (self.current * 100) // self.total
63
+
64
  # Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
65
+ if (
66
+ current_percentage >= self.last_update + self.update_frequency
67
+ or current_percentage == 100
68
+ ):
69
  elapsed = time.time() - self.start_time
70
  rate = self.current / elapsed if elapsed > 0 else 0
71
  remaining = (self.total - self.current) / rate if rate > 0 else 0
72
+
73
  # Create progress stats
74
  stats = {
75
  "Progress": LogFormatter.progress_bar(self.current, self.total),
76
  "Items": f"{self.current:,}/{self.total:,}",
77
  "Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
78
+ "Rate": f"🚀 {rate:.1f} items/s",
79
  }
80
+
81
  # Log progress using tree format
82
  for line in LogFormatter.tree(stats):
83
  logger.info(line)
84
  sys.stdout.flush()
85
+
86
+ self.last_update = (
87
+ current_percentage // self.update_frequency
88
+ ) * self.update_frequency
89
+
90
  def close(self):
91
  elapsed = time.time() - self.start_time
92
  rate = self.total / elapsed if elapsed > 0 else 0
93
+
94
  # Final summary with fancy formatting
95
  logger.info(LogFormatter.section("COMPLETED"))
96
  stats = {
97
  "Total": f"{self.total:,} items",
98
  "Time": f"{elapsed:.1f}s",
99
+ "Rate": f"{rate:.1f} items/s",
100
  }
101
  for line in LogFormatter.stats(stats):
102
  logger.info(line)
103
+ logger.info("=" * 50)
104
  sys.stdout.flush()
105
 
106
+
107
  class ModelService(HuggingFaceService):
108
+ _instance: Optional["ModelService"] = None
109
  _initialized = False
110
+
111
  def __new__(cls):
112
  if cls._instance is None:
113
  logger.info(LogFormatter.info("Creating new ModelService instance"))
 
115
  return cls._instance
116
 
117
  def __init__(self):
118
+ if not hasattr(self, "_init_done"):
119
  logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
120
  super().__init__()
121
  self.validator = ModelValidator()
122
  self.vote_service = VoteService()
123
  self.eval_requests_path = cache_config.eval_requests_file
124
+ logger.info(
125
+ LogFormatter.info(
126
+ f"Using eval requests path: {self.eval_requests_path}"
127
+ )
128
+ )
129
+
130
  self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
131
  self.hf_api = HfApi(token=HF_TOKEN)
132
  self.cached_models = None
 
135
  self._init_done = True
136
  logger.info(LogFormatter.success("Initialization complete"))
137
 
138
+ async def _download_and_process_file(
139
+ self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker
140
+ ) -> Optional[Dict]:
141
  """Download and process a file asynchronously"""
142
  try:
143
  # Build file URL
144
  url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
145
  headers = build_hf_headers(token=self.token)
146
+
147
  # Download file
148
  async with session.get(url, headers=headers) as response:
149
  if response.status != 200:
150
+ logger.error(
151
+ LogFormatter.error(
152
+ f"Failed to download {file}", f"HTTP {response.status}"
153
+ )
154
+ )
155
  progress.update()
156
  return None
157
+
158
  try:
159
  # First read content as text
160
  text_content = await response.text()
161
  # Then parse JSON
162
  content = json.loads(text_content)
163
  except json.JSONDecodeError as e:
164
+ logger.error(
165
+ LogFormatter.error(f"Failed to decode JSON from {file}", e)
166
+ )
167
  progress.update()
168
  return None
169
+
170
  # Get status and determine target status
171
  status = content.get("status", "PENDING").upper()
172
  target_status = None
173
  status_map = {
174
  "PENDING": ["PENDING"],
175
  "EVALUATING": ["RUNNING"],
176
+ "FINISHED": ["FINISHED"],
177
  }
178
+
179
  for target, source_statuses in status_map.items():
180
  if status in source_statuses:
181
  target_status = target
182
  break
183
+
184
  if not target_status:
185
  progress.update()
186
  return None
187
+
188
  # Calculate wait time
189
  try:
190
+ submit_time = datetime.fromisoformat(
191
+ content["submitted_time"].replace("Z", "+00:00")
192
+ )
193
  if submit_time.tzinfo is None:
194
  submit_time = submit_time.replace(tzinfo=timezone.utc)
195
  current_time = datetime.now(timezone.utc)
196
  wait_time = current_time - submit_time
197
+
198
  model_info = {
199
  "name": content["model"],
200
  "submitter": content.get("sender", "Unknown"),
 
202
  "wait_time": f"{wait_time.total_seconds():.1f}s",
203
  "submission_time": content["submitted_time"],
204
  "status": target_status,
205
+ "precision": content.get("precision", "Unknown"),
206
  }
207
+
208
  progress.update()
209
  return model_info
210
+
211
  except (ValueError, TypeError) as e:
212
  logger.error(LogFormatter.error(f"Failed to process {file}", e))
213
  progress.update()
214
  return None
215
+
216
  except Exception as e:
217
  logger.error(LogFormatter.error(f"Failed to load {file}", e))
218
  progress.update()
 
223
  try:
224
  logger.info(LogFormatter.section("CACHE REFRESH"))
225
  self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
226
+
227
  # Initialize models dictionary
228
+ models = {"finished": [], "evaluating": [], "pending": []}
229
+
 
 
 
 
230
  try:
231
  logger.info(LogFormatter.subsection("DATASET LOADING"))
232
  logger.info(LogFormatter.info("Loading dataset..."))
233
+
234
  # Download entire dataset snapshot
235
  with suppress_output():
236
  local_dir = self.hf_api.snapshot_download(
237
+ repo_id=QUEUE_REPO, repo_type="dataset", token=self.token
 
 
238
  )
239
+
240
  # List JSON files in local directory
241
  local_path = Path(local_dir)
242
  json_files = list(local_path.glob("**/*.json"))
243
  total_files = len(json_files)
244
+
245
  # Log repository stats
246
  stats = {
247
  "Total_Files": total_files,
 
249
  }
250
  for line in LogFormatter.stats(stats, "Repository Statistics"):
251
  logger.info(line)
252
+
253
  if not json_files:
254
  raise Exception("No JSON files found in repository")
255
+
256
  # Initialize progress tracker
257
  progress = ProgressTracker(total_files, "PROCESSING FILES")
258
+
259
  # Process local files
260
  model_submissions = {} # Dict to track latest submission for each (model_id, revision, precision)
261
  for file_path in json_files:
262
  try:
263
+ with open(file_path, "r") as f:
264
  content = json.load(f)
265
+
266
  # Get status and determine target status
267
  status = content.get("status", "PENDING").upper()
268
  target_status = None
269
  status_map = {
270
  "PENDING": ["PENDING"],
271
  "EVALUATING": ["RUNNING"],
272
+ "FINISHED": ["FINISHED"],
273
  }
274
+
275
  for target, source_statuses in status_map.items():
276
  if status in source_statuses:
277
  target_status = target
278
  break
279
+
280
  if not target_status:
281
  progress.update()
282
  continue
283
+
284
  # Calculate wait time
285
  try:
286
+ submit_time = datetime.fromisoformat(
287
+ content["submitted_time"].replace("Z", "+00:00")
288
+ )
289
  if submit_time.tzinfo is None:
290
  submit_time = submit_time.replace(tzinfo=timezone.utc)
291
  current_time = datetime.now(timezone.utc)
292
  wait_time = current_time - submit_time
293
+
294
  model_info = {
295
  "name": content["model"],
296
  "submitter": content.get("sender", "Unknown"),
 
298
  "wait_time": f"{wait_time.total_seconds():.1f}s",
299
  "submission_time": content["submitted_time"],
300
  "status": target_status,
301
+ "precision": content.get("precision", "Unknown"),
302
  }
303
+
304
  # Use (model_id, revision, precision) as key to track latest submission
305
+ key = (
306
+ content["model"],
307
+ content["revision"],
308
+ content.get("precision", "Unknown"),
309
+ )
310
+ if (
311
+ key not in model_submissions
312
+ or submit_time
313
+ > datetime.fromisoformat(
314
+ model_submissions[key]["submission_time"].replace(
315
+ "Z", "+00:00"
316
+ )
317
+ )
318
+ ):
319
  model_submissions[key] = model_info
320
+
321
  except (ValueError, TypeError) as e:
322
+ logger.error(
323
+ LogFormatter.error(
324
+ f"Failed to process {file_path.name}", e
325
+ )
326
+ )
327
+
328
  except Exception as e:
329
+ logger.error(
330
+ LogFormatter.error(f"Failed to load {file_path.name}", e)
331
+ )
332
  finally:
333
  progress.update()
334
+
335
  # Populate models dict with deduplicated submissions
336
  for model_info in model_submissions.values():
337
  models[model_info["status"].lower()].append(model_info)
338
+
339
  progress.close()
340
+
341
  # Final summary with fancy formatting
342
  logger.info(LogFormatter.section("CACHE SUMMARY"))
343
  stats = {
344
  "Finished": len(models["finished"]),
345
  "Evaluating": len(models["evaluating"]),
346
+ "Pending": len(models["pending"]),
347
  }
348
  for line in LogFormatter.stats(stats, "Models by Status"):
349
  logger.info(line)
350
+ logger.info("=" * 50)
351
+
352
  except Exception as e:
353
  logger.error(LogFormatter.error("Error processing files", e))
354
  raise
355
+
356
  # Update cache
357
  self.cached_models = models
358
  self.last_cache_update = time.time()
359
  logger.info(LogFormatter.success("Cache updated successfully"))
360
+
361
  return models
362
+
363
  except Exception as e:
364
  logger.error(LogFormatter.error("Cache refresh failed", e))
365
  raise
 
367
  async def initialize(self):
368
  """Initialize the model service"""
369
  if self._initialized:
370
+ logger.info(
371
+ LogFormatter.info("Service already initialized, using cached data")
372
+ )
373
  return
374
+
375
  try:
376
  logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
377
+
378
  # Check if cache already exists
379
  cache_path = cache_config.get_cache_path("datasets")
380
  if not cache_path.exists() or not any(cache_path.iterdir()):
381
+ logger.info(
382
+ LogFormatter.info(
383
+ "No existing cache found, initializing datasets cache..."
384
+ )
385
+ )
386
  cache_config.flush_cache("datasets")
387
  else:
388
  logger.info(LogFormatter.info("Using existing datasets cache"))
389
+
390
  # Ensure eval requests directory exists
391
  self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
392
+ logger.info(
393
+ LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}")
394
+ )
395
+
396
  # List existing files
397
  if self.eval_requests_path.exists():
398
  files = list(self.eval_requests_path.glob("**/*.json"))
399
  stats = {
400
  "Total_Files": len(files),
401
+ "Directory": str(self.eval_requests_path),
402
  }
403
  for line in LogFormatter.stats(stats, "Eval Requests"):
404
  logger.info(line)
405
+
406
  # Load initial cache
407
  await self._refresh_models_cache()
408
+
409
  self._initialized = True
410
  logger.info(LogFormatter.success("Model service initialization complete"))
411
+
412
  except Exception as e:
413
  logger.error(LogFormatter.error("Initialization failed", e))
414
  raise
 
416
  async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
417
  """Get all models with their status"""
418
  if not self._initialized:
419
+ logger.info(
420
+ LogFormatter.info("Service not initialized, initializing now...")
421
+ )
422
  await self.initialize()
423
+
424
  current_time = time.time()
425
  cache_age = current_time - self.last_cache_update
426
+
427
  # Check if cache needs refresh
428
  if not self.cached_models:
429
+ logger.info(
430
+ LogFormatter.info("No cached data available, refreshing cache...")
431
+ )
432
  return await self._refresh_models_cache()
433
  elif cache_age > self.cache_ttl:
434
+ logger.info(
435
+ LogFormatter.info(
436
+ f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"
437
+ )
438
+ )
439
  return await self._refresh_models_cache()
440
  else:
441
  logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
442
  return self.cached_models
443
 
444
  async def submit_model(
445
+ self, model_data: Dict[str, Any], user_id: str
 
 
446
  ) -> Dict[str, Any]:
447
  logger.info(LogFormatter.section("MODEL SUBMISSION"))
448
+ self._log_repo_operation(
449
+ "write",
450
+ QUEUE_REPO,
451
+ f"Submitting model {model_data['model_id']} by {user_id}",
452
+ )
453
  stats = {
454
  "Model": model_data["model_id"],
455
  "User": user_id,
456
  "Revision": model_data["revision"],
457
  "Precision": model_data["precision"],
458
+ "Type": model_data["model_type"],
459
  }
460
  for line in LogFormatter.tree(stats, "Submission Details"):
461
  logger.info(line)
462
+
463
  # Validate required fields
464
  required_fields = [
465
+ "model_id",
466
+ "base_model",
467
+ "revision",
468
+ "precision",
469
+ "weight_type",
470
+ "model_type",
471
+ "use_chat_template",
472
  ]
473
  for field in required_fields:
474
  if field not in model_data:
 
477
  # Get model info and validate it exists on HuggingFace
478
  try:
479
  logger.info(LogFormatter.subsection("MODEL VALIDATION"))
480
+
481
  # Get the model info to check if it exists
482
  model_info = self.hf_api.model_info(
483
  model_data["model_id"],
484
  revision=model_data["revision"],
485
+ token=self.token,
486
  )
487
+
488
  if not model_info:
489
+ raise Exception(
490
+ f"Model {model_data['model_id']} not found on HuggingFace Hub"
491
+ )
492
+
493
  logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
494
+
495
  except Exception as e:
496
  logger.error(LogFormatter.error("Model validation failed", e))
497
  raise
498
+
499
  # Update model revision with commit sha
500
  model_data["revision"] = model_info.sha
501
 
 
503
  try:
504
  logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
505
  existing_models = await self.get_models()
506
+
507
  # Call the official provider status check
508
+ (
509
+ is_valid,
510
+ error_message,
511
+ ) = await self.validator.check_official_provider_status(
512
+ model_data["model_id"], existing_models
513
  )
514
  if not is_valid:
515
  raise ValueError(error_message)
 
517
  # Check in all statuses (pending, evaluating, finished)
518
  for status, models in existing_models.items():
519
  for model in models:
520
+ if (
521
+ model["name"] == model_data["model_id"]
522
+ and model["revision"] == model_data["revision"]
523
+ ):
524
  error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
525
+ logger.error(
526
+ LogFormatter.error("Submission rejected", error_msg)
527
+ )
528
  raise ValueError(error_msg)
529
+
530
  logger.info(LogFormatter.success("No existing submission found"))
531
  except ValueError:
532
  raise
 
536
 
537
  # Check that model on hub and valid
538
  valid, error, model_config = await self.validator.is_model_on_hub(
539
+ model_data["model_id"], model_data["revision"], test_tokenizer=True
 
 
540
  )
541
  if not valid:
542
  logger.error(LogFormatter.error("Model on hub validation failed", error))
 
557
  model_info,
558
  model_data["precision"],
559
  model_data["base_model"],
560
+ revision=model_data["revision"],
561
  )
562
  if model_size is None:
563
  logger.error(LogFormatter.error("Model size validation failed", error))
564
  raise Exception(error)
565
+ logger.info(
566
+ LogFormatter.success(f"Model size validation passed: {model_size:.1f}B")
567
+ )
568
 
569
  # Size limits based on precision
570
  if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
 
575
  # Chat template validation if requested
576
  if model_data["use_chat_template"]:
577
  valid, error = await self.validator.check_chat_template(
578
+ model_data["model_id"], model_data["revision"]
 
579
  )
580
  if not valid:
581
+ logger.error(
582
+ LogFormatter.error("Chat template validation failed", error)
583
+ )
584
  raise Exception(error)
585
  logger.info(LogFormatter.success("Chat template validation passed"))
586
 
587
+ architectures = model_info.config.get("architectures", "")
 
588
  if architectures:
589
  architectures = ";".join(architectures)
590
 
 
603
  "job_id": -1,
604
  "job_start_time": None,
605
  "use_chat_template": model_data["use_chat_template"],
606
+ "sender": user_id,
607
  }
608
+
609
  logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
610
  for line in LogFormatter.tree(eval_entry):
611
  logger.info(line)
 
614
  try:
615
  logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
616
  logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
617
+
618
  # Construct the path in the dataset
619
+ org_or_user = (
620
+ model_data["model_id"].split("/")[0]
621
+ if "/" in model_data["model_id"]
622
+ else ""
623
+ )
624
  model_path = model_data["model_id"].split("/")[-1]
625
  relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
626
+
627
  # Create a temporary file with the request
628
+ with tempfile.NamedTemporaryFile(
629
+ mode="w", suffix=".json", delete=False
630
+ ) as temp_file:
631
  json.dump(eval_entry, temp_file, indent=2)
632
  temp_file.flush()
633
  temp_path = temp_file.name
634
+
635
  # Upload file directly
636
  self.hf_api.upload_file(
637
  path_or_fileobj=temp_path,
 
639
  repo_id=QUEUE_REPO,
640
  repo_type="dataset",
641
  commit_message=f"Add {model_data['model_id']} to eval queue",
642
+ token=self.token,
643
  )
644
+
645
  # Clean up temp file
646
  os.unlink(temp_path)
647
+
648
  logger.info(LogFormatter.success("Upload successful"))
649
+
650
  except Exception as e:
651
  logger.error(LogFormatter.error("Upload failed", e))
652
  raise
 
654
  # Add automatic vote
655
  try:
656
  logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
657
+ logger.info(
658
+ LogFormatter.info(
659
+ f"Adding upvote for {model_data['model_id']} by {user_id}"
660
+ )
661
+ )
662
  await self.vote_service.add_vote(
663
  model_data["model_id"],
664
  user_id,
665
  "up",
666
  {
667
  "precision": model_data["precision"],
668
+ "revision": model_data["revision"],
669
+ },
670
  )
671
  logger.info(LogFormatter.success("Vote recorded successfully"))
672
  except Exception as e:
 
675
 
676
  return {
677
  "status": "success",
678
+ "message": "The model was submitted successfully, and the vote has been recorded",
679
  }
680
 
681
  async def get_model_status(self, model_id: str) -> Dict[str, Any]:
682
  """Get evaluation status of a model"""
683
  logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
684
  eval_path = self.eval_requests_path
685
+
686
  for user_folder in eval_path.iterdir():
687
  if user_folder.is_dir():
688
  for file in user_folder.glob("*.json"):
 
692
  status = {
693
  "status": data["status"],
694
  "submitted_time": data["submitted_time"],
695
+ "job_id": data.get("job_id", -1),
696
  }
697
  logger.info(LogFormatter.success("Status found"))
698
  for line in LogFormatter.tree(status, "Model Status"):
699
  logger.info(line)
700
  return status
701
+
702
  logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
703
  return {"status": "not_found"}
704
 
705
+ async def get_organization_submissions(
706
+ self, organization: str, days: int = 7
707
+ ) -> List[Dict[str, Any]]:
708
  """Get all submissions from a user in the last n days"""
709
  try:
710
  # Get all models
711
  all_models = await self.get_models()
712
  current_time = datetime.now(timezone.utc)
713
  cutoff_time = current_time - timedelta(days=days)
714
+
715
  # Filter models by submitter and submission time
716
  user_submissions = []
717
  for status, models in all_models.items():
 
724
  )
725
  # Check if within time window
726
  if submit_time > cutoff_time:
727
+ user_submissions.append(
728
+ {
729
+ "name": model["name"],
730
+ "status": status,
731
+ "submission_time": model["submission_time"],
732
+ "precision": model["precision"],
733
+ }
734
+ )
735
+
736
  return sorted(
737
+ user_submissions, key=lambda x: x["submission_time"], reverse=True
 
 
738
  )
739
+
740
  except Exception as e:
741
+ logger.error(
742
+ LogFormatter.error(f"Failed to get submissions for {organization}", e)
743
+ )
744
+ raise
backend/app/services/rate_limiter.py DELETED
@@ -1,72 +0,0 @@
1
- """
2
- import logging
3
- from datetime import datetime, timedelta, timezone
4
- from typing import Tuple, Dict, List
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
- class RateLimiter:
9
- def __init__(self, period_days: int = 7, quota: int = 5):
10
- self.period_days = period_days
11
- self.quota = quota
12
- self.submission_history: Dict[str, List[datetime]] = {}
13
- self.higher_quota_users = set() # Users with higher quotas
14
- self.unlimited_users = set() # Users with no quota limits
15
-
16
- def add_unlimited_user(self, user_id: str):
17
- """Add a user to the unlimited users list"""
18
- self.unlimited_users.add(user_id)
19
-
20
- def add_higher_quota_user(self, user_id: str):
21
- """Add a user to the higher quota users list"""
22
- self.higher_quota_users.add(user_id)
23
-
24
- def record_submission(self, user_id: str):
25
- """Record a new submission for a user"""
26
- current_time = datetime.now(timezone.utc)
27
- if user_id not in self.submission_history:
28
- self.submission_history[user_id] = []
29
- self.submission_history[user_id].append(current_time)
30
-
31
- def clean_old_submissions(self, user_id: str):
32
- """Remove submissions older than the period"""
33
- if user_id not in self.submission_history:
34
- return
35
-
36
- current_time = datetime.now(timezone.utc)
37
- cutoff_time = current_time - timedelta(days=self.period_days)
38
-
39
- self.submission_history[user_id] = [
40
- time for time in self.submission_history[user_id]
41
- if time > cutoff_time
42
- ]
43
-
44
- async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
45
- """Check if a user has exceeded their rate limit
46
-
47
- Returns:
48
- Tuple[bool, str]: (is_allowed, error_message)
49
- """
50
- # Unlimited users bypass all checks
51
- if user_id in self.unlimited_users:
52
- return True, ""
53
-
54
- # Clean old submissions
55
- self.clean_old_submissions(user_id)
56
-
57
- # Get current submission count
58
- submission_count = len(self.submission_history.get(user_id, []))
59
-
60
- # Calculate user's quota
61
- user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
62
-
63
- # Check if user has exceeded their quota
64
- if submission_count >= user_quota:
65
- error_msg = (
66
- f"User '{user_id}' has reached the limit of {user_quota} submissions "
67
- f"in the last {self.period_days} days. Please wait before submitting again."
68
- )
69
- return False, error_msg
70
-
71
- return True, ""
72
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/app/services/votes.py CHANGED
@@ -3,7 +3,6 @@ from typing import Dict, Any, List, Set, Tuple, Optional
3
  import json
4
  import logging
5
  import asyncio
6
- from pathlib import Path
7
  import aiohttp
8
  from huggingface_hub import HfApi
9
  import tempfile
@@ -11,23 +10,24 @@ import os
11
 
12
  from app.services.hf_service import HuggingFaceService
13
  from app.config import HF_TOKEN
14
- from app.config.hf_config import HF_ORGANIZATION, VOTES_REPO
15
  from app.core.cache import cache_config
16
  from app.core.formatting import LogFormatter
17
 
18
  logger = logging.getLogger(__name__)
19
 
 
20
  class VoteService(HuggingFaceService):
21
- _instance: Optional['VoteService'] = None
22
  _initialized = False
23
-
24
  def __new__(cls):
25
  if cls._instance is None:
26
  cls._instance = super(VoteService, cls).__new__(cls)
27
  return cls._instance
28
 
29
  def __init__(self):
30
- if not hasattr(self, '_init_done'):
31
  super().__init__()
32
  self.votes_file = cache_config.votes_file
33
  self.votes_to_upload: List[Dict[str, Any]] = []
@@ -48,41 +48,43 @@ class VoteService(HuggingFaceService):
48
  if self._initialized:
49
  await self._check_for_new_votes()
50
  return
51
-
52
  try:
53
  logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
54
-
55
  # Ensure votes directory exists
56
  self.votes_file.parent.mkdir(parents=True, exist_ok=True)
57
-
58
  # Load remote votes
59
  remote_votes = await self._fetch_remote_votes()
60
  if remote_votes:
61
- logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
62
-
 
 
63
  # Save to local file
64
- with open(self.votes_file, 'w') as f:
65
  for vote in remote_votes:
66
  json.dump(vote, f)
67
- f.write('\n')
68
-
69
  # Load into memory
70
  await self._load_existing_votes()
71
  else:
72
  logger.warning(LogFormatter.warning("No votes found on hub"))
73
-
74
  self._initialized = True
75
  self._last_sync = datetime.now(timezone.utc)
76
-
77
  # Final summary
78
  stats = {
79
  "Total_Votes": self._total_votes,
80
- "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
81
  }
82
  logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
83
  for line in LogFormatter.stats(stats):
84
  logger.info(line)
85
-
86
  except Exception as e:
87
  logger.error(LogFormatter.error("Initialization failed", e))
88
  raise
@@ -91,7 +93,7 @@ class VoteService(HuggingFaceService):
91
  """Fetch votes from HF hub"""
92
  url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
93
  headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
94
-
95
  try:
96
  async with aiohttp.ClientSession() as session:
97
  async with session.get(url, headers=headers) as response:
@@ -106,7 +108,9 @@ class VoteService(HuggingFaceService):
106
  continue
107
  return votes
108
  else:
109
- logger.error(f"Failed to get remote votes: HTTP {response.status}")
 
 
110
  return []
111
  except Exception as e:
112
  logger.error(f"Error fetching remote votes: {str(e)}")
@@ -117,18 +121,20 @@ class VoteService(HuggingFaceService):
117
  try:
118
  remote_votes = await self._fetch_remote_votes()
119
  if len(remote_votes) != self._total_votes:
120
- logger.info(f"Vote count changed: Local ({self._total_votes}) ≠ Remote ({len(remote_votes)})")
 
 
121
  # Save to local file
122
- with open(self.votes_file, 'w') as f:
123
  for vote in remote_votes:
124
  json.dump(vote, f)
125
- f.write('\n')
126
-
127
  # Reload into memory
128
  await self._load_existing_votes()
129
  else:
130
  logger.info("Votes are in sync")
131
-
132
  except Exception as e:
133
  logger.error(f"Error checking for new votes: {str(e)}")
134
 
@@ -136,25 +142,31 @@ class VoteService(HuggingFaceService):
136
  """Sync votes with HuggingFace hub"""
137
  try:
138
  logger.info(LogFormatter.section("VOTE SYNC"))
139
-
140
  # Get current remote votes
141
  remote_votes = await self._fetch_remote_votes()
142
  logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
143
-
144
  # If we have pending votes to upload
145
  if self.votes_to_upload:
146
- logger.info(LogFormatter.info(f"Adding {len(self.votes_to_upload)} pending votes..."))
147
-
 
 
 
 
148
  # Add new votes to remote votes
149
  remote_votes.extend(self.votes_to_upload)
150
-
151
  # Create temporary file with all votes
152
- with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as temp_file:
 
 
153
  for vote in remote_votes:
154
  json.dump(vote, temp_file)
155
- temp_file.write('\n')
156
  temp_path = temp_file.name
157
-
158
  try:
159
  # Upload JSONL file directly
160
  self.hf_api.upload_file(
@@ -163,32 +175,34 @@ class VoteService(HuggingFaceService):
163
  repo_id=VOTES_REPO,
164
  repo_type="dataset",
165
  commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
166
- token=self.token
167
  )
168
-
169
  # Clear pending votes only if upload succeeded
170
  self.votes_to_upload.clear()
171
- logger.info(LogFormatter.success("Pending votes uploaded successfully"))
172
-
 
 
173
  except Exception as e:
174
  logger.error(LogFormatter.error("Failed to upload votes to hub", e))
175
  raise
176
  finally:
177
  # Clean up temp file
178
  os.unlink(temp_path)
179
-
180
  # Update local state
181
- with open(self.votes_file, 'w') as f:
182
  for vote in remote_votes:
183
  json.dump(vote, f)
184
- f.write('\n')
185
-
186
  # Reload votes in memory
187
  await self._load_existing_votes()
188
  logger.info(LogFormatter.success("Sync completed successfully"))
189
 
190
  self._last_sync = datetime.now(timezone.utc)
191
-
192
  except Exception as e:
193
  logger.error(LogFormatter.error("Sync failed", e))
194
  raise
@@ -201,58 +215,73 @@ class VoteService(HuggingFaceService):
201
 
202
  try:
203
  logger.info(LogFormatter.section("LOADING VOTES"))
204
-
205
  # Clear existing data structures
206
  self.vote_check_set.clear()
207
  self._votes_by_model.clear()
208
  self._votes_by_user.clear()
209
-
210
  vote_count = 0
211
  latest_timestamp = None
212
-
213
  with open(self.votes_file, "r") as f:
214
  for line in f:
215
  try:
216
  vote = json.loads(line.strip())
217
  vote_count += 1
218
-
219
  # Track latest timestamp
220
  try:
221
- vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
222
- if not latest_timestamp or vote_timestamp > latest_timestamp:
 
 
 
 
 
223
  latest_timestamp = vote_timestamp
224
- vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
 
 
225
  except (KeyError, ValueError) as e:
226
- logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
 
 
 
 
227
  continue
228
-
229
  if vote_count % 1000 == 0:
230
- logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
231
-
 
 
232
  self._add_vote_to_memory(vote)
233
-
234
  except json.JSONDecodeError as e:
235
  logger.error(LogFormatter.error("Vote parsing failed", e))
236
  continue
237
  except Exception as e:
238
  logger.error(LogFormatter.error("Vote processing failed", e))
239
  continue
240
-
241
  self._total_votes = vote_count
242
  self._last_vote_timestamp = latest_timestamp
243
-
244
  # Final summary
245
  stats = {
246
  "Total_Votes": vote_count,
247
- "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
 
 
248
  "Unique_Models": len(self._votes_by_model),
249
- "Unique_Users": len(self._votes_by_user)
250
  }
251
-
252
  logger.info(LogFormatter.section("VOTE SUMMARY"))
253
  for line in LogFormatter.stats(stats):
254
  logger.info(line)
255
-
256
  except Exception as e:
257
  logger.error(LogFormatter.error("Failed to load votes", e))
258
  raise
@@ -265,25 +294,25 @@ class VoteService(HuggingFaceService):
265
  vote["model"],
266
  vote.get("revision", "main"),
267
  vote["username"],
268
- vote.get("precision", "unknown")
269
  )
270
-
271
  # Skip if we already have this vote
272
  if check_tuple in self.vote_check_set:
273
  return
274
-
275
  self.vote_check_set.add(check_tuple)
276
-
277
  # Update model votes
278
  if vote["model"] not in self._votes_by_model:
279
  self._votes_by_model[vote["model"]] = []
280
  self._votes_by_model[vote["model"]].append(vote)
281
-
282
  # Update user votes
283
  if vote["username"] not in self._votes_by_user:
284
  self._votes_by_user[vote["username"]] = []
285
  self._votes_by_user[vote["username"]].append(vote)
286
-
287
  except KeyError as e:
288
  logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
289
  except Exception as e:
@@ -292,12 +321,14 @@ class VoteService(HuggingFaceService):
292
  async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
293
  """Get all votes from a specific user"""
294
  logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
295
-
296
  # Check if we need to refresh votes
297
- if (datetime.now(timezone.utc) - self._last_sync).total_seconds() > self._sync_interval:
 
 
298
  logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
299
  await self._check_for_new_votes()
300
-
301
  votes = self._votes_by_user.get(user_id, [])
302
  logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
303
  return votes
@@ -305,14 +336,16 @@ class VoteService(HuggingFaceService):
305
  async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
306
  """Get all votes for a specific model"""
307
  logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
308
-
309
  # Check if we need to refresh votes
310
- if (datetime.now(timezone.utc) - self._last_sync).total_seconds() > self._sync_interval:
 
 
311
  logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
312
  await self._check_for_new_votes()
313
-
314
  votes = self._votes_by_model.get(model_id, [])
315
-
316
  # Group votes by revision and precision
317
  votes_by_config = {}
318
  for vote in votes:
@@ -323,23 +356,23 @@ class VoteService(HuggingFaceService):
323
  votes_by_config[config_key] = {
324
  "revision": revision,
325
  "precision": precision,
326
- "count": 0
327
  }
328
  votes_by_config[config_key]["count"] += 1
329
-
330
  stats = {
331
  "Total_Votes": len(votes),
332
- **{f"Config_{k}": v["count"] for k, v in votes_by_config.items()}
333
  }
334
-
335
  logger.info(LogFormatter.section("VOTE STATISTICS"))
336
  for line in LogFormatter.stats(stats):
337
  logger.info(line)
338
-
339
  return {
340
  "total_votes": len(votes),
341
  "votes_by_config": votes_by_config,
342
- "votes": votes
343
  }
344
 
345
  async def _get_model_revision(self, model_id: str) -> str:
@@ -348,60 +381,86 @@ class VoteService(HuggingFaceService):
348
  for attempt in range(self._max_retries):
349
  try:
350
  model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
351
- logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
 
 
352
  return model_info.sha
353
  except Exception as e:
354
- logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
 
 
355
  if attempt < self._max_retries - 1:
356
  retry_delay = self._retry_delay * (attempt + 1)
357
  logger.info(f"Retrying in {retry_delay} seconds...")
358
  await asyncio.sleep(retry_delay)
359
  else:
360
- logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
 
 
361
  return "main"
362
 
363
- async def add_vote(self, model_id: str, user_id: str, vote_type: str, vote_data: Dict[str, Any] = None) -> Dict[str, Any]:
 
 
 
 
 
 
364
  """Add a vote for a model"""
365
  try:
366
- self._log_repo_operation("add", VOTES_REPO, f"Adding {vote_type} vote for {model_id} by {user_id}")
 
 
 
 
367
  logger.info(LogFormatter.section("NEW VOTE"))
368
  stats = {
369
  "Model": model_id,
370
  "User": user_id,
371
  "Type": vote_type,
372
- "Config": vote_data or {}
373
  }
374
  for line in LogFormatter.tree(stats, "Vote Details"):
375
  logger.info(line)
376
-
377
  # Use provided configuration or fallback to model info
378
  precision = None
379
  revision = None
380
-
381
  if vote_data:
382
  precision = vote_data.get("precision")
383
  revision = vote_data.get("revision")
384
-
385
  # If any info is missing, try to get it from model info
386
  if not all([precision, revision]):
387
  try:
388
- model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
389
- model_card_data = model_info.cardData if hasattr(model_info, 'cardData') else {}
390
-
 
 
 
 
391
  if not precision:
392
  precision = model_card_data.get("precision", "unknown")
393
  if not revision:
394
  revision = model_info.sha
395
  except Exception as e:
396
- logger.warning(LogFormatter.warning(f"Failed to get model info: {str(e)}. Using default values."))
 
 
 
 
397
  precision = precision or "unknown"
398
  revision = revision or "main"
399
-
400
  # Check if vote already exists with this configuration
401
  check_tuple = (model_id, revision, user_id, precision)
402
-
403
  if check_tuple in self.vote_check_set:
404
- raise ValueError(f"Vote already recorded for this model configuration (precision: {precision}, revision: {revision[:7] if revision else 'unknown'})")
 
 
405
 
406
  vote = {
407
  "model": model_id,
@@ -409,33 +468,33 @@ class VoteService(HuggingFaceService):
409
  "username": user_id,
410
  "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
411
  "vote_type": vote_type,
412
- "precision": precision
413
  }
414
 
415
  # Update local storage
416
  with open(self.votes_file, "a") as f:
417
  f.write(json.dumps(vote) + "\n")
418
-
419
  self._add_vote_to_memory(vote)
420
  self.votes_to_upload.append(vote)
421
-
422
  stats = {
423
  "Status": "Success",
424
  "Queue_Size": len(self.votes_to_upload),
425
  "Model_Config": {
426
  "Precision": precision,
427
- "Revision": revision[:7] if revision else "unknown"
428
- }
429
  }
430
  for line in LogFormatter.stats(stats):
431
  logger.info(line)
432
-
433
  # Force immediate sync
434
  logger.info(LogFormatter.info("Forcing immediate sync with hub"))
435
  await self._sync_with_hub()
436
-
437
  return {"status": "success", "message": "Vote added successfully"}
438
-
439
  except Exception as e:
440
  logger.error(LogFormatter.error("Failed to add vote", e))
441
  raise
 
3
  import json
4
  import logging
5
  import asyncio
 
6
  import aiohttp
7
  from huggingface_hub import HfApi
8
  import tempfile
 
10
 
11
  from app.services.hf_service import HuggingFaceService
12
  from app.config import HF_TOKEN
13
+ from app.config.hf_config import VOTES_REPO
14
  from app.core.cache import cache_config
15
  from app.core.formatting import LogFormatter
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
+
20
  class VoteService(HuggingFaceService):
21
+ _instance: Optional["VoteService"] = None
22
  _initialized = False
23
+
24
  def __new__(cls):
25
  if cls._instance is None:
26
  cls._instance = super(VoteService, cls).__new__(cls)
27
  return cls._instance
28
 
29
  def __init__(self):
30
+ if not hasattr(self, "_init_done"):
31
  super().__init__()
32
  self.votes_file = cache_config.votes_file
33
  self.votes_to_upload: List[Dict[str, Any]] = []
 
48
  if self._initialized:
49
  await self._check_for_new_votes()
50
  return
51
+
52
  try:
53
  logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
54
+
55
  # Ensure votes directory exists
56
  self.votes_file.parent.mkdir(parents=True, exist_ok=True)
57
+
58
  # Load remote votes
59
  remote_votes = await self._fetch_remote_votes()
60
  if remote_votes:
61
+ logger.info(
62
+ LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub")
63
+ )
64
+
65
  # Save to local file
66
+ with open(self.votes_file, "w") as f:
67
  for vote in remote_votes:
68
  json.dump(vote, f)
69
+ f.write("\n")
70
+
71
  # Load into memory
72
  await self._load_existing_votes()
73
  else:
74
  logger.warning(LogFormatter.warning("No votes found on hub"))
75
+
76
  self._initialized = True
77
  self._last_sync = datetime.now(timezone.utc)
78
+
79
  # Final summary
80
  stats = {
81
  "Total_Votes": self._total_votes,
82
+ "Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC"),
83
  }
84
  logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
85
  for line in LogFormatter.stats(stats):
86
  logger.info(line)
87
+
88
  except Exception as e:
89
  logger.error(LogFormatter.error("Initialization failed", e))
90
  raise
 
93
  """Fetch votes from HF hub"""
94
  url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
95
  headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
96
+
97
  try:
98
  async with aiohttp.ClientSession() as session:
99
  async with session.get(url, headers=headers) as response:
 
108
  continue
109
  return votes
110
  else:
111
+ logger.error(
112
+ f"Failed to get remote votes: HTTP {response.status}"
113
+ )
114
  return []
115
  except Exception as e:
116
  logger.error(f"Error fetching remote votes: {str(e)}")
 
121
  try:
122
  remote_votes = await self._fetch_remote_votes()
123
  if len(remote_votes) != self._total_votes:
124
+ logger.info(
125
+ f"Vote count changed: Local ({self._total_votes}) ≠ Remote ({len(remote_votes)})"
126
+ )
127
  # Save to local file
128
+ with open(self.votes_file, "w") as f:
129
  for vote in remote_votes:
130
  json.dump(vote, f)
131
+ f.write("\n")
132
+
133
  # Reload into memory
134
  await self._load_existing_votes()
135
  else:
136
  logger.info("Votes are in sync")
137
+
138
  except Exception as e:
139
  logger.error(f"Error checking for new votes: {str(e)}")
140
 
 
142
  """Sync votes with HuggingFace hub"""
143
  try:
144
  logger.info(LogFormatter.section("VOTE SYNC"))
145
+
146
  # Get current remote votes
147
  remote_votes = await self._fetch_remote_votes()
148
  logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
149
+
150
  # If we have pending votes to upload
151
  if self.votes_to_upload:
152
+ logger.info(
153
+ LogFormatter.info(
154
+ f"Adding {len(self.votes_to_upload)} pending votes..."
155
+ )
156
+ )
157
+
158
  # Add new votes to remote votes
159
  remote_votes.extend(self.votes_to_upload)
160
+
161
  # Create temporary file with all votes
162
+ with tempfile.NamedTemporaryFile(
163
+ mode="w", suffix=".jsonl", delete=False
164
+ ) as temp_file:
165
  for vote in remote_votes:
166
  json.dump(vote, temp_file)
167
+ temp_file.write("\n")
168
  temp_path = temp_file.name
169
+
170
  try:
171
  # Upload JSONL file directly
172
  self.hf_api.upload_file(
 
175
  repo_id=VOTES_REPO,
176
  repo_type="dataset",
177
  commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
178
+ token=self.token,
179
  )
180
+
181
  # Clear pending votes only if upload succeeded
182
  self.votes_to_upload.clear()
183
+ logger.info(
184
+ LogFormatter.success("Pending votes uploaded successfully")
185
+ )
186
+
187
  except Exception as e:
188
  logger.error(LogFormatter.error("Failed to upload votes to hub", e))
189
  raise
190
  finally:
191
  # Clean up temp file
192
  os.unlink(temp_path)
193
+
194
  # Update local state
195
+ with open(self.votes_file, "w") as f:
196
  for vote in remote_votes:
197
  json.dump(vote, f)
198
+ f.write("\n")
199
+
200
  # Reload votes in memory
201
  await self._load_existing_votes()
202
  logger.info(LogFormatter.success("Sync completed successfully"))
203
 
204
  self._last_sync = datetime.now(timezone.utc)
205
+
206
  except Exception as e:
207
  logger.error(LogFormatter.error("Sync failed", e))
208
  raise
 
215
 
216
  try:
217
  logger.info(LogFormatter.section("LOADING VOTES"))
218
+
219
  # Clear existing data structures
220
  self.vote_check_set.clear()
221
  self._votes_by_model.clear()
222
  self._votes_by_user.clear()
223
+
224
  vote_count = 0
225
  latest_timestamp = None
226
+
227
  with open(self.votes_file, "r") as f:
228
  for line in f:
229
  try:
230
  vote = json.loads(line.strip())
231
  vote_count += 1
232
+
233
  # Track latest timestamp
234
  try:
235
+ vote_timestamp = datetime.fromisoformat(
236
+ vote["timestamp"].replace("Z", "+00:00")
237
+ )
238
+ if (
239
+ not latest_timestamp
240
+ or vote_timestamp > latest_timestamp
241
+ ):
242
  latest_timestamp = vote_timestamp
243
+ vote["timestamp"] = vote_timestamp.strftime(
244
+ "%Y-%m-%dT%H:%M:%SZ"
245
+ )
246
  except (KeyError, ValueError) as e:
247
+ logger.warning(
248
+ LogFormatter.warning(
249
+ f"Invalid timestamp in vote: {str(e)}"
250
+ )
251
+ )
252
  continue
253
+
254
  if vote_count % 1000 == 0:
255
+ logger.info(
256
+ LogFormatter.info(f"Processed {vote_count:,} votes...")
257
+ )
258
+
259
  self._add_vote_to_memory(vote)
260
+
261
  except json.JSONDecodeError as e:
262
  logger.error(LogFormatter.error("Vote parsing failed", e))
263
  continue
264
  except Exception as e:
265
  logger.error(LogFormatter.error("Vote processing failed", e))
266
  continue
267
+
268
  self._total_votes = vote_count
269
  self._last_vote_timestamp = latest_timestamp
270
+
271
  # Final summary
272
  stats = {
273
  "Total_Votes": vote_count,
274
+ "Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
275
+ if latest_timestamp
276
+ else "None",
277
  "Unique_Models": len(self._votes_by_model),
278
+ "Unique_Users": len(self._votes_by_user),
279
  }
280
+
281
  logger.info(LogFormatter.section("VOTE SUMMARY"))
282
  for line in LogFormatter.stats(stats):
283
  logger.info(line)
284
+
285
  except Exception as e:
286
  logger.error(LogFormatter.error("Failed to load votes", e))
287
  raise
 
294
  vote["model"],
295
  vote.get("revision", "main"),
296
  vote["username"],
297
+ vote.get("precision", "unknown"),
298
  )
299
+
300
  # Skip if we already have this vote
301
  if check_tuple in self.vote_check_set:
302
  return
303
+
304
  self.vote_check_set.add(check_tuple)
305
+
306
  # Update model votes
307
  if vote["model"] not in self._votes_by_model:
308
  self._votes_by_model[vote["model"]] = []
309
  self._votes_by_model[vote["model"]].append(vote)
310
+
311
  # Update user votes
312
  if vote["username"] not in self._votes_by_user:
313
  self._votes_by_user[vote["username"]] = []
314
  self._votes_by_user[vote["username"]].append(vote)
315
+
316
  except KeyError as e:
317
  logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
318
  except Exception as e:
 
321
  async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
322
  """Get all votes from a specific user"""
323
  logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
324
+
325
  # Check if we need to refresh votes
326
+ if (
327
+ datetime.now(timezone.utc) - self._last_sync
328
+ ).total_seconds() > self._sync_interval:
329
  logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
330
  await self._check_for_new_votes()
331
+
332
  votes = self._votes_by_user.get(user_id, [])
333
  logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
334
  return votes
 
336
  async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
337
  """Get all votes for a specific model"""
338
  logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
339
+
340
  # Check if we need to refresh votes
341
+ if (
342
+ datetime.now(timezone.utc) - self._last_sync
343
+ ).total_seconds() > self._sync_interval:
344
  logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
345
  await self._check_for_new_votes()
346
+
347
  votes = self._votes_by_model.get(model_id, [])
348
+
349
  # Group votes by revision and precision
350
  votes_by_config = {}
351
  for vote in votes:
 
356
  votes_by_config[config_key] = {
357
  "revision": revision,
358
  "precision": precision,
359
+ "count": 0,
360
  }
361
  votes_by_config[config_key]["count"] += 1
362
+
363
  stats = {
364
  "Total_Votes": len(votes),
365
+ **{f"Config_{k}": v["count"] for k, v in votes_by_config.items()},
366
  }
367
+
368
  logger.info(LogFormatter.section("VOTE STATISTICS"))
369
  for line in LogFormatter.stats(stats):
370
  logger.info(line)
371
+
372
  return {
373
  "total_votes": len(votes),
374
  "votes_by_config": votes_by_config,
375
+ "votes": votes,
376
  }
377
 
378
  async def _get_model_revision(self, model_id: str) -> str:
 
381
  for attempt in range(self._max_retries):
382
  try:
383
  model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
384
+ logger.info(
385
+ f"Successfully got revision {model_info.sha} for model {model_id}"
386
+ )
387
  return model_info.sha
388
  except Exception as e:
389
+ logger.error(
390
+ f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}"
391
+ )
392
  if attempt < self._max_retries - 1:
393
  retry_delay = self._retry_delay * (attempt + 1)
394
  logger.info(f"Retrying in {retry_delay} seconds...")
395
  await asyncio.sleep(retry_delay)
396
  else:
397
+ logger.warning(
398
+ f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts"
399
+ )
400
  return "main"
401
 
402
+ async def add_vote(
403
+ self,
404
+ model_id: str,
405
+ user_id: str,
406
+ vote_type: str,
407
+ vote_data: Dict[str, Any] = None,
408
+ ) -> Dict[str, Any]:
409
  """Add a vote for a model"""
410
  try:
411
+ self._log_repo_operation(
412
+ "add",
413
+ VOTES_REPO,
414
+ f"Adding {vote_type} vote for {model_id} by {user_id}",
415
+ )
416
  logger.info(LogFormatter.section("NEW VOTE"))
417
  stats = {
418
  "Model": model_id,
419
  "User": user_id,
420
  "Type": vote_type,
421
+ "Config": vote_data or {},
422
  }
423
  for line in LogFormatter.tree(stats, "Vote Details"):
424
  logger.info(line)
425
+
426
  # Use provided configuration or fallback to model info
427
  precision = None
428
  revision = None
429
+
430
  if vote_data:
431
  precision = vote_data.get("precision")
432
  revision = vote_data.get("revision")
433
+
434
  # If any info is missing, try to get it from model info
435
  if not all([precision, revision]):
436
  try:
437
+ model_info = await asyncio.to_thread(
438
+ self.hf_api.model_info, model_id
439
+ )
440
+ model_card_data = (
441
+ model_info.cardData if hasattr(model_info, "cardData") else {}
442
+ )
443
+
444
  if not precision:
445
  precision = model_card_data.get("precision", "unknown")
446
  if not revision:
447
  revision = model_info.sha
448
  except Exception as e:
449
+ logger.warning(
450
+ LogFormatter.warning(
451
+ f"Failed to get model info: {str(e)}. Using default values."
452
+ )
453
+ )
454
  precision = precision or "unknown"
455
  revision = revision or "main"
456
+
457
  # Check if vote already exists with this configuration
458
  check_tuple = (model_id, revision, user_id, precision)
459
+
460
  if check_tuple in self.vote_check_set:
461
+ raise ValueError(
462
+ f"Vote already recorded for this model configuration (precision: {precision}, revision: {revision[:7] if revision else 'unknown'})"
463
+ )
464
 
465
  vote = {
466
  "model": model_id,
 
468
  "username": user_id,
469
  "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
470
  "vote_type": vote_type,
471
+ "precision": precision,
472
  }
473
 
474
  # Update local storage
475
  with open(self.votes_file, "a") as f:
476
  f.write(json.dumps(vote) + "\n")
477
+
478
  self._add_vote_to_memory(vote)
479
  self.votes_to_upload.append(vote)
480
+
481
  stats = {
482
  "Status": "Success",
483
  "Queue_Size": len(self.votes_to_upload),
484
  "Model_Config": {
485
  "Precision": precision,
486
+ "Revision": revision[:7] if revision else "unknown",
487
+ },
488
  }
489
  for line in LogFormatter.stats(stats):
490
  logger.info(line)
491
+
492
  # Force immediate sync
493
  logger.info(LogFormatter.info("Forcing immediate sync with hub"))
494
  await self._sync_with_hub()
495
+
496
  return {"status": "success", "message": "Vote added successfully"}
497
+
498
  except Exception as e:
499
  logger.error(LogFormatter.error("Failed to add vote", e))
500
  raise
backend/app/utils/logging.py CHANGED
@@ -1,3 +1,3 @@
1
  from app.core.formatting import LogFormatter
2
 
3
- __all__ = ['LogFormatter']
 
1
  from app.core.formatting import LogFormatter
2
 
3
+ __all__ = ["LogFormatter"]
backend/app/utils/model_validation.py CHANGED
@@ -12,50 +12,56 @@ from app.core.formatting import LogFormatter
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
15
  class ModelValidator:
16
  def __init__(self):
17
  self.token = HF_TOKEN
18
  self.api = HfApi(token=self.token)
19
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
20
-
21
- async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
 
 
22
  """Check if model has a valid model card"""
23
  try:
24
  logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
25
-
26
  # Get model card content using ModelCard.load
27
  try:
28
- model_card = await asyncio.to_thread(
29
- ModelCard.load,
30
- model_id
31
- )
32
  logger.info(LogFormatter.success("Model card found"))
33
  except Exception as e:
34
  error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
35
  logger.error(LogFormatter.error(error_msg, e))
36
  return False, error_msg, None
37
-
38
  # Check license in model card data
39
- if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
 
 
40
  error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
41
  logger.warning(LogFormatter.warning(error_msg))
42
  return False, error_msg, None
43
 
44
  # Enforce card content length
45
  if len(model_card.text) < 200:
46
- error_msg = "Please add a description to your model card, it is too short."
 
 
47
  logger.warning(LogFormatter.warning(error_msg))
48
  return False, error_msg, None
49
-
50
  logger.info(LogFormatter.success("Model card validation passed"))
51
  return True, "", model_card
52
-
53
  except Exception as e:
54
  error_msg = "Failed to validate model card"
55
  logger.error(LogFormatter.error(error_msg, e))
56
  return False, str(e), None
57
-
58
- async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main") -> Optional[Dict]:
 
 
59
  """Get metadata from a safetensors file"""
60
  try:
61
  if is_adapter:
@@ -80,26 +86,32 @@ class ModelValidator:
80
  return None
81
 
82
  async def get_model_size(
83
- self,
84
- model_info: Any,
85
- precision: str,
86
- base_model: str,
87
- revision: str
88
  ) -> Tuple[Optional[float], Optional[str]]:
89
  """Get model size in billions of parameters"""
90
  try:
91
- logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
 
 
92
 
93
  # Check if model is adapter
94
- is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
 
 
 
 
95
 
96
  # Try to get size from safetensors first
97
  model_size = None
98
 
99
  if is_adapter and base_model:
100
  # For adapters, we need both adapter and base model sizes
101
- adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
102
- base_meta = await self.get_safetensors_metadata(base_model, revision="main")
 
 
 
 
103
 
104
  if adapter_meta and base_meta:
105
  adapter_size = sum(adapter_meta.parameter_count.values())
@@ -107,16 +119,20 @@ class ModelValidator:
107
  model_size = adapter_size + base_size
108
  else:
109
  # For regular models, just get the model size
110
- meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
 
 
111
  if meta:
112
- model_size = sum(meta.parameter_count.values()) # total params
113
 
114
  if model_size is None:
115
  # If model size could not be determined, return an error
116
  return None, "Model size could not be determined"
117
 
118
  # Adjust size for GPTQ models
119
- size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
 
 
120
  model_size = model_size / 1e9 # Convert to billions, assuming float16
121
  model_size = round(size_factor * model_size, 3)
122
 
@@ -127,52 +143,49 @@ class ModelValidator:
127
  logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
128
  return None, str(e)
129
 
130
-
131
  async def check_chat_template(
132
- self,
133
- model_id: str,
134
- revision: str
135
  ) -> Tuple[bool, Optional[str]]:
136
  """Check if model has a valid chat template"""
137
  try:
138
  logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
139
-
140
  try:
141
  config_file = await asyncio.to_thread(
142
  hf_hub_download,
143
  repo_id=model_id,
144
  filename="tokenizer_config.json",
145
  revision=revision,
146
- repo_type="model"
147
  )
148
-
149
- with open(config_file, 'r') as f:
150
  tokenizer_config = json.load(f)
151
-
152
- if 'chat_template' not in tokenizer_config:
153
  error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
154
  logger.error(LogFormatter.error(error_msg))
155
  return False, error_msg
156
-
157
  logger.info(LogFormatter.success("Valid chat template found"))
158
  return True, None
159
-
160
  except Exception as e:
161
  error_msg = f"Error checking chat_template: {str(e)}"
162
  logger.error(LogFormatter.error(error_msg))
163
  return False, error_msg
164
-
165
  except Exception as e:
166
  error_msg = "Failed to check chat template"
167
  logger.error(LogFormatter.error(error_msg, e))
168
  return False, str(e)
169
-
170
  async def is_model_on_hub(
171
  self,
172
  model_name: str,
173
  revision: str,
174
  test_tokenizer: bool = False,
175
- trust_remote_code: bool = False
176
  ) -> Tuple[bool, Optional[str], Optional[Any]]:
177
  """Check if model exists and is properly configured on the Hub"""
178
  try:
@@ -182,9 +195,9 @@ class ModelValidator:
182
  revision=revision,
183
  trust_remote_code=trust_remote_code,
184
  token=self.token,
185
- force_download=True
186
  )
187
-
188
  if test_tokenizer:
189
  try:
190
  await asyncio.to_thread(
@@ -192,56 +205,80 @@ class ModelValidator:
192
  model_name,
193
  revision=revision,
194
  trust_remote_code=trust_remote_code,
195
- token=self.token
196
  )
197
  except ValueError as e:
198
- return False, f"The tokenizer is not available in an official Transformers release: {e}", None
 
 
 
 
199
  except Exception:
200
- return False, "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.", None
201
-
 
 
 
 
202
  return True, None, config
203
-
204
  except ValueError:
205
- return False, "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.", None
 
 
 
 
206
  except Exception as e:
207
  if "You are trying to access a gated repo." in str(e):
208
- return True, "The model is gated and requires special access permissions.", None
209
- return False, f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}", None
 
 
 
 
 
 
 
 
210
 
211
  async def check_official_provider_status(
212
- self,
213
- model_id: str,
214
- existing_models: Dict[str, list]
215
  ) -> Tuple[bool, Optional[str]]:
216
  """
217
  Check if model is from official provider and has finished submission.
218
-
219
  Args:
220
  model_id: The model identifier (org/model-name)
221
  existing_models: Dictionary of models by status from get_models()
222
-
223
  Returns:
224
  Tuple[bool, Optional[str]]: (is_valid, error_message)
225
  """
226
  try:
227
- logger.info(LogFormatter.info(f"Checking official provider status for {model_id}"))
228
-
 
 
229
  # Get model organization
230
- model_org = model_id.split('/')[0] if '/' in model_id else None
231
-
232
  if not model_org:
233
  return True, None
234
-
235
  # Load official providers dataset
236
  dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
237
  official_providers = dataset["train"][0]["CURATED_SET"]
238
-
239
  # Check if model org is in official providers
240
  is_official = model_org in official_providers
241
-
242
  if is_official:
243
- logger.info(LogFormatter.info(f"Model organization '{model_org}' is an official provider"))
244
-
 
 
 
 
245
  # Check for finished submissions
246
  if "finished" in existing_models:
247
  for model in existing_models["finished"]:
@@ -251,15 +288,25 @@ class ModelValidator:
251
  f"with a completed evaluation. "
252
  f"To re-evaluate, please open a discussion."
253
  )
254
- logger.error(LogFormatter.error("Validation failed", error_msg))
 
 
255
  return False, error_msg
256
-
257
- logger.info(LogFormatter.success("No finished submission found for this official provider model"))
 
 
 
 
258
  else:
259
- logger.info(LogFormatter.info(f"Model organization '{model_org}' is not an official provider"))
260
-
 
 
 
 
261
  return True, None
262
-
263
  except Exception as e:
264
  error_msg = f"Failed to check official provider status: {str(e)}"
265
  logger.error(LogFormatter.error(error_msg))
 
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
+
16
  class ModelValidator:
17
  def __init__(self):
18
  self.token = HF_TOKEN
19
  self.api = HfApi(token=self.token)
20
  self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
21
+
22
+ async def check_model_card(
23
+ self, model_id: str
24
+ ) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
25
  """Check if model has a valid model card"""
26
  try:
27
  logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
28
+
29
  # Get model card content using ModelCard.load
30
  try:
31
+ model_card = await asyncio.to_thread(ModelCard.load, model_id)
 
 
 
32
  logger.info(LogFormatter.success("Model card found"))
33
  except Exception as e:
34
  error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
35
  logger.error(LogFormatter.error(error_msg, e))
36
  return False, error_msg, None
37
+
38
  # Check license in model card data
39
+ if model_card.data.license is None and not (
40
+ "license_name" in model_card.data and "license_link" in model_card.data
41
+ ):
42
  error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
43
  logger.warning(LogFormatter.warning(error_msg))
44
  return False, error_msg, None
45
 
46
  # Enforce card content length
47
  if len(model_card.text) < 200:
48
+ error_msg = (
49
+ "Please add a description to your model card, it is too short."
50
+ )
51
  logger.warning(LogFormatter.warning(error_msg))
52
  return False, error_msg, None
53
+
54
  logger.info(LogFormatter.success("Model card validation passed"))
55
  return True, "", model_card
56
+
57
  except Exception as e:
58
  error_msg = "Failed to validate model card"
59
  logger.error(LogFormatter.error(error_msg, e))
60
  return False, str(e), None
61
+
62
+ async def get_safetensors_metadata(
63
+ self, model_id: str, is_adapter: bool = False, revision: str = "main"
64
+ ) -> Optional[Dict]:
65
  """Get metadata from a safetensors file"""
66
  try:
67
  if is_adapter:
 
86
  return None
87
 
88
  async def get_model_size(
89
+ self, model_info: Any, precision: str, base_model: str, revision: str
 
 
 
 
90
  ) -> Tuple[Optional[float], Optional[str]]:
91
  """Get model size in billions of parameters"""
92
  try:
93
+ logger.info(
94
+ LogFormatter.info(f"Checking model size for {model_info.modelId}")
95
+ )
96
 
97
  # Check if model is adapter
98
+ is_adapter = any(
99
+ s.rfilename == "adapter_config.json"
100
+ for s in model_info.siblings
101
+ if hasattr(s, "rfilename")
102
+ )
103
 
104
  # Try to get size from safetensors first
105
  model_size = None
106
 
107
  if is_adapter and base_model:
108
  # For adapters, we need both adapter and base model sizes
109
+ adapter_meta = await self.get_safetensors_metadata(
110
+ model_info.id, is_adapter=True, revision=revision
111
+ )
112
+ base_meta = await self.get_safetensors_metadata(
113
+ base_model, revision="main"
114
+ )
115
 
116
  if adapter_meta and base_meta:
117
  adapter_size = sum(adapter_meta.parameter_count.values())
 
119
  model_size = adapter_size + base_size
120
  else:
121
  # For regular models, just get the model size
122
+ meta = await self.get_safetensors_metadata(
123
+ model_info.id, revision=revision
124
+ )
125
  if meta:
126
+ model_size = sum(meta.parameter_count.values()) # total params
127
 
128
  if model_size is None:
129
  # If model size could not be determined, return an error
130
  return None, "Model size could not be determined"
131
 
132
  # Adjust size for GPTQ models
133
+ size_factor = (
134
+ 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
135
+ )
136
  model_size = model_size / 1e9 # Convert to billions, assuming float16
137
  model_size = round(size_factor * model_size, 3)
138
 
 
143
  logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
144
  return None, str(e)
145
 
 
146
  async def check_chat_template(
147
+ self, model_id: str, revision: str
 
 
148
  ) -> Tuple[bool, Optional[str]]:
149
  """Check if model has a valid chat template"""
150
  try:
151
  logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
152
+
153
  try:
154
  config_file = await asyncio.to_thread(
155
  hf_hub_download,
156
  repo_id=model_id,
157
  filename="tokenizer_config.json",
158
  revision=revision,
159
+ repo_type="model",
160
  )
161
+
162
+ with open(config_file, "r") as f:
163
  tokenizer_config = json.load(f)
164
+
165
+ if "chat_template" not in tokenizer_config:
166
  error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
167
  logger.error(LogFormatter.error(error_msg))
168
  return False, error_msg
169
+
170
  logger.info(LogFormatter.success("Valid chat template found"))
171
  return True, None
172
+
173
  except Exception as e:
174
  error_msg = f"Error checking chat_template: {str(e)}"
175
  logger.error(LogFormatter.error(error_msg))
176
  return False, error_msg
177
+
178
  except Exception as e:
179
  error_msg = "Failed to check chat template"
180
  logger.error(LogFormatter.error(error_msg, e))
181
  return False, str(e)
182
+
183
  async def is_model_on_hub(
184
  self,
185
  model_name: str,
186
  revision: str,
187
  test_tokenizer: bool = False,
188
+ trust_remote_code: bool = False,
189
  ) -> Tuple[bool, Optional[str], Optional[Any]]:
190
  """Check if model exists and is properly configured on the Hub"""
191
  try:
 
195
  revision=revision,
196
  trust_remote_code=trust_remote_code,
197
  token=self.token,
198
+ force_download=True,
199
  )
200
+
201
  if test_tokenizer:
202
  try:
203
  await asyncio.to_thread(
 
205
  model_name,
206
  revision=revision,
207
  trust_remote_code=trust_remote_code,
208
+ token=self.token,
209
  )
210
  except ValueError as e:
211
+ return (
212
+ False,
213
+ f"The tokenizer is not available in an official Transformers release: {e}",
214
+ None,
215
+ )
216
  except Exception:
217
+ return (
218
+ False,
219
+ "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.",
220
+ None,
221
+ )
222
+
223
  return True, None, config
224
+
225
  except ValueError:
226
+ return (
227
+ False,
228
+ "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.",
229
+ None,
230
+ )
231
  except Exception as e:
232
  if "You are trying to access a gated repo." in str(e):
233
+ return (
234
+ True,
235
+ "The model is gated and requires special access permissions.",
236
+ None,
237
+ )
238
+ return (
239
+ False,
240
+ f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}",
241
+ None,
242
+ )
243
 
244
  async def check_official_provider_status(
245
+ self, model_id: str, existing_models: Dict[str, list]
 
 
246
  ) -> Tuple[bool, Optional[str]]:
247
  """
248
  Check if model is from official provider and has finished submission.
249
+
250
  Args:
251
  model_id: The model identifier (org/model-name)
252
  existing_models: Dictionary of models by status from get_models()
253
+
254
  Returns:
255
  Tuple[bool, Optional[str]]: (is_valid, error_message)
256
  """
257
  try:
258
+ logger.info(
259
+ LogFormatter.info(f"Checking official provider status for {model_id}")
260
+ )
261
+
262
  # Get model organization
263
+ model_org = model_id.split("/")[0] if "/" in model_id else None
264
+
265
  if not model_org:
266
  return True, None
267
+
268
  # Load official providers dataset
269
  dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
270
  official_providers = dataset["train"][0]["CURATED_SET"]
271
+
272
  # Check if model org is in official providers
273
  is_official = model_org in official_providers
274
+
275
  if is_official:
276
+ logger.info(
277
+ LogFormatter.info(
278
+ f"Model organization '{model_org}' is an official provider"
279
+ )
280
+ )
281
+
282
  # Check for finished submissions
283
  if "finished" in existing_models:
284
  for model in existing_models["finished"]:
 
288
  f"with a completed evaluation. "
289
  f"To re-evaluate, please open a discussion."
290
  )
291
+ logger.error(
292
+ LogFormatter.error("Validation failed", error_msg)
293
+ )
294
  return False, error_msg
295
+
296
+ logger.info(
297
+ LogFormatter.success(
298
+ "No finished submission found for this official provider model"
299
+ )
300
+ )
301
  else:
302
+ logger.info(
303
+ LogFormatter.info(
304
+ f"Model organization '{model_org}' is not an official provider"
305
+ )
306
+ )
307
+
308
  return True, None
309
+
310
  except Exception as e:
311
  error_msg = f"Failed to check official provider status: {str(e)}"
312
  logger.error(LogFormatter.error(error_msg))
backend/utils/analyze_prod_datasets.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  import logging
4
  from datetime import datetime
5
  from pathlib import Path
6
- from typing import Dict, Any, List
7
  from huggingface_hub import HfApi
8
  from dotenv import load_dotenv
9
  from app.config.hf_config import HF_ORGANIZATION
@@ -16,10 +16,7 @@ ROOT_DIR = BACKEND_DIR.parent
16
  load_dotenv(ROOT_DIR / ".env")
17
 
18
  # Configure logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(message)s'
22
- )
23
  logger = logging.getLogger(__name__)
24
 
25
  # Initialize Hugging Face API
@@ -28,53 +25,50 @@ if not HF_TOKEN:
28
  raise ValueError("HF_TOKEN not found in environment variables")
29
  api = HfApi(token=HF_TOKEN)
30
 
 
31
  def analyze_dataset(repo_id: str) -> Dict[str, Any]:
32
  """Analyze a dataset and return statistics"""
33
  try:
34
  # Get dataset info
35
  dataset_info = api.dataset_info(repo_id=repo_id)
36
-
37
  # Get file list
38
  files = api.list_repo_files(repo_id, repo_type="dataset")
39
-
40
  # Get last commit info
41
  commits = api.list_repo_commits(repo_id, repo_type="dataset")
42
  last_commit = next(commits, None)
43
-
44
  # Count lines in jsonl files
45
  total_entries = 0
46
  for file in files:
47
- if file.endswith('.jsonl'):
48
  try:
49
  # Download file content
50
  content = api.hf_hub_download(
51
- repo_id=repo_id,
52
- filename=file,
53
- repo_type="dataset"
54
  )
55
-
56
  # Count lines
57
- with open(content, 'r') as f:
58
  for _ in f:
59
  total_entries += 1
60
-
61
  except Exception as e:
62
  logger.error(f"Error processing file {file}: {str(e)}")
63
  continue
64
-
65
  # Special handling for requests dataset
66
  if repo_id == f"{HF_ORGANIZATION}/requests":
67
  pending_count = 0
68
  completed_count = 0
69
-
70
  try:
71
  content = api.hf_hub_download(
72
- repo_id=repo_id,
73
- filename="eval_requests.jsonl",
74
- repo_type="dataset"
75
  )
76
-
77
- with open(content, 'r') as f:
78
  for line in f:
79
  try:
80
  entry = json.loads(line)
@@ -84,10 +78,10 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
84
  completed_count += 1
85
  except json.JSONDecodeError:
86
  continue
87
-
88
  except Exception as e:
89
  logger.error(f"Error analyzing requests: {str(e)}")
90
-
91
  # Build response
92
  response = {
93
  "id": repo_id,
@@ -97,22 +91,22 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
97
  "size_bytes": dataset_info.size_in_bytes,
98
  "downloads": dataset_info.downloads,
99
  }
100
-
101
  # Add request-specific info if applicable
102
  if repo_id == f"{HF_ORGANIZATION}/requests":
103
- response.update({
104
- "pending_requests": pending_count,
105
- "completed_requests": completed_count
106
- })
107
-
 
 
108
  return response
109
-
110
  except Exception as e:
111
  logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
112
- return {
113
- "id": repo_id,
114
- "error": str(e)
115
- }
116
 
117
  def main():
118
  """Main function to analyze all datasets"""
@@ -121,50 +115,49 @@ def main():
121
  datasets = [
122
  {
123
  "id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
124
- "description": "Aggregated results"
125
- },
126
- {
127
- "id": f"{HF_ORGANIZATION}/requests",
128
- "description": "Evaluation requests"
129
  },
 
 
130
  {
131
- "id": f"{HF_ORGANIZATION}/votes",
132
- "description": "User votes"
133
  },
134
- {
135
- "id": f"open-llm-leaderboard/official-providers",
136
- "description": "Highlighted models"
137
- }
138
  ]
139
-
140
  # Analyze each dataset
141
  results = []
142
  for dataset in datasets:
143
  logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
144
- result = analyze_dataset(dataset['id'])
145
  results.append(result)
146
-
147
- if 'error' in result:
148
  logger.error(f"❌ Error: {result['error']}")
149
  else:
150
  logger.info(f"✓ {result['total_entries']} entries")
151
  logger.info(f"✓ {result['file_count']} files")
152
  logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
153
  logger.info(f"✓ {result['downloads']} downloads")
154
-
155
- if 'pending_requests' in result:
156
  logger.info(f"✓ {result['pending_requests']} pending requests")
157
  logger.info(f"✓ {result['completed_requests']} completed requests")
158
-
159
- if result['last_modified']:
160
- last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
161
- logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
162
-
 
 
 
 
163
  return results
164
-
165
  except Exception as e:
166
  logger.error(f"Global error: {str(e)}")
167
  return []
168
 
 
169
  if __name__ == "__main__":
170
- main()
 
3
  import logging
4
  from datetime import datetime
5
  from pathlib import Path
6
+ from typing import Dict, Any
7
  from huggingface_hub import HfApi
8
  from dotenv import load_dotenv
9
  from app.config.hf_config import HF_ORGANIZATION
 
16
  load_dotenv(ROOT_DIR / ".env")
17
 
18
  # Configure logging
19
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
 
 
 
20
  logger = logging.getLogger(__name__)
21
 
22
  # Initialize Hugging Face API
 
25
  raise ValueError("HF_TOKEN not found in environment variables")
26
  api = HfApi(token=HF_TOKEN)
27
 
28
+
29
  def analyze_dataset(repo_id: str) -> Dict[str, Any]:
30
  """Analyze a dataset and return statistics"""
31
  try:
32
  # Get dataset info
33
  dataset_info = api.dataset_info(repo_id=repo_id)
34
+
35
  # Get file list
36
  files = api.list_repo_files(repo_id, repo_type="dataset")
37
+
38
  # Get last commit info
39
  commits = api.list_repo_commits(repo_id, repo_type="dataset")
40
  last_commit = next(commits, None)
41
+
42
  # Count lines in jsonl files
43
  total_entries = 0
44
  for file in files:
45
+ if file.endswith(".jsonl"):
46
  try:
47
  # Download file content
48
  content = api.hf_hub_download(
49
+ repo_id=repo_id, filename=file, repo_type="dataset"
 
 
50
  )
51
+
52
  # Count lines
53
+ with open(content, "r") as f:
54
  for _ in f:
55
  total_entries += 1
56
+
57
  except Exception as e:
58
  logger.error(f"Error processing file {file}: {str(e)}")
59
  continue
60
+
61
  # Special handling for requests dataset
62
  if repo_id == f"{HF_ORGANIZATION}/requests":
63
  pending_count = 0
64
  completed_count = 0
65
+
66
  try:
67
  content = api.hf_hub_download(
68
+ repo_id=repo_id, filename="eval_requests.jsonl", repo_type="dataset"
 
 
69
  )
70
+
71
+ with open(content, "r") as f:
72
  for line in f:
73
  try:
74
  entry = json.loads(line)
 
78
  completed_count += 1
79
  except json.JSONDecodeError:
80
  continue
81
+
82
  except Exception as e:
83
  logger.error(f"Error analyzing requests: {str(e)}")
84
+
85
  # Build response
86
  response = {
87
  "id": repo_id,
 
91
  "size_bytes": dataset_info.size_in_bytes,
92
  "downloads": dataset_info.downloads,
93
  }
94
+
95
  # Add request-specific info if applicable
96
  if repo_id == f"{HF_ORGANIZATION}/requests":
97
+ response.update(
98
+ {
99
+ "pending_requests": pending_count,
100
+ "completed_requests": completed_count,
101
+ }
102
+ )
103
+
104
  return response
105
+
106
  except Exception as e:
107
  logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
108
+ return {"id": repo_id, "error": str(e)}
109
+
 
 
110
 
111
  def main():
112
  """Main function to analyze all datasets"""
 
115
  datasets = [
116
  {
117
  "id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
118
+ "description": "Aggregated results",
 
 
 
 
119
  },
120
+ {"id": f"{HF_ORGANIZATION}/requests", "description": "Evaluation requests"},
121
+ {"id": f"{HF_ORGANIZATION}/votes", "description": "User votes"},
122
  {
123
+ "id": "open-llm-leaderboard/official-providers",
124
+ "description": "Highlighted models",
125
  },
 
 
 
 
126
  ]
127
+
128
  # Analyze each dataset
129
  results = []
130
  for dataset in datasets:
131
  logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
132
+ result = analyze_dataset(dataset["id"])
133
  results.append(result)
134
+
135
+ if "error" in result:
136
  logger.error(f"❌ Error: {result['error']}")
137
  else:
138
  logger.info(f"✓ {result['total_entries']} entries")
139
  logger.info(f"✓ {result['file_count']} files")
140
  logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
141
  logger.info(f"✓ {result['downloads']} downloads")
142
+
143
+ if "pending_requests" in result:
144
  logger.info(f"✓ {result['pending_requests']} pending requests")
145
  logger.info(f"✓ {result['completed_requests']} completed requests")
146
+
147
+ if result["last_modified"]:
148
+ last_modified = datetime.fromisoformat(
149
+ result["last_modified"].replace("Z", "+00:00")
150
+ )
151
+ logger.info(
152
+ f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
153
+ )
154
+
155
  return results
156
+
157
  except Exception as e:
158
  logger.error(f"Global error: {str(e)}")
159
  return []
160
 
161
+
162
  if __name__ == "__main__":
163
+ main()
backend/utils/analyze_prod_models.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import json
3
  import logging
4
  from datetime import datetime
5
  from pathlib import Path
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
15
  load_dotenv(ROOT_DIR / ".env")
16
 
17
  # Configure logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(message)s'
21
- )
22
  logger = logging.getLogger(__name__)
23
 
24
  # Initialize Hugging Face API
@@ -27,80 +23,92 @@ if not HF_TOKEN:
27
  raise ValueError("HF_TOKEN not found in environment variables")
28
  api = HfApi(token=HF_TOKEN)
29
 
 
30
  def count_evaluated_models():
31
  """Count the number of evaluated models"""
32
  try:
33
  # Get dataset info
34
- dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
35
-
 
 
 
36
  # Get file list
37
- files = api.list_repo_files(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
38
-
 
 
39
  # Get last commit info
40
- commits = api.list_repo_commits(f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset")
 
 
41
  last_commit = next(commits, None)
42
-
43
  # Count lines in jsonl files
44
  total_entries = 0
45
  for file in files:
46
- if file.endswith('.jsonl'):
47
  try:
48
  # Download file content
49
  content = api.hf_hub_download(
50
  repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
51
  filename=file,
52
- repo_type="dataset"
53
  )
54
-
55
  # Count lines
56
- with open(content, 'r') as f:
57
  for _ in f:
58
  total_entries += 1
59
-
60
  except Exception as e:
61
  logger.error(f"Error processing file {file}: {str(e)}")
62
  continue
63
-
64
  # Build response
65
  response = {
66
  "total_models": total_entries,
67
  "last_modified": last_commit.created_at if last_commit else None,
68
  "file_count": len(files),
69
  "size_bytes": dataset_info.size_in_bytes,
70
- "downloads": dataset_info.downloads
71
  }
72
-
73
  return response
74
-
75
  except Exception as e:
76
  logger.error(f"Error counting evaluated models: {str(e)}")
77
- return {
78
- "error": str(e)
79
- }
80
 
81
  def main():
82
  """Main function to count evaluated models"""
83
  try:
84
  logger.info("\nAnalyzing evaluated models...")
85
  result = count_evaluated_models()
86
-
87
- if 'error' in result:
88
  logger.error(f"❌ Error: {result['error']}")
89
  else:
90
  logger.info(f"✓ {result['total_models']} models evaluated")
91
  logger.info(f"✓ {result['file_count']} files")
92
  logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
93
  logger.info(f"✓ {result['downloads']} downloads")
94
-
95
- if result['last_modified']:
96
- last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
97
- logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
98
-
 
 
 
 
99
  return result
100
-
101
  except Exception as e:
102
  logger.error(f"Global error: {str(e)}")
103
  return {"error": str(e)}
104
 
 
105
  if __name__ == "__main__":
106
- main()
 
1
  import os
 
2
  import logging
3
  from datetime import datetime
4
  from pathlib import Path
 
14
  load_dotenv(ROOT_DIR / ".env")
15
 
16
  # Configure logging
17
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
 
 
 
18
  logger = logging.getLogger(__name__)
19
 
20
  # Initialize Hugging Face API
 
23
  raise ValueError("HF_TOKEN not found in environment variables")
24
  api = HfApi(token=HF_TOKEN)
25
 
26
+
27
  def count_evaluated_models():
28
  """Count the number of evaluated models"""
29
  try:
30
  # Get dataset info
31
+ dataset_info = api.dataset_info(
32
+ repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
33
+ repo_type="dataset",
34
+ )
35
+
36
  # Get file list
37
+ files = api.list_repo_files(
38
+ f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
39
+ )
40
+
41
  # Get last commit info
42
+ commits = api.list_repo_commits(
43
+ f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
44
+ )
45
  last_commit = next(commits, None)
46
+
47
  # Count lines in jsonl files
48
  total_entries = 0
49
  for file in files:
50
+ if file.endswith(".jsonl"):
51
  try:
52
  # Download file content
53
  content = api.hf_hub_download(
54
  repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
55
  filename=file,
56
+ repo_type="dataset",
57
  )
58
+
59
  # Count lines
60
+ with open(content, "r") as f:
61
  for _ in f:
62
  total_entries += 1
63
+
64
  except Exception as e:
65
  logger.error(f"Error processing file {file}: {str(e)}")
66
  continue
67
+
68
  # Build response
69
  response = {
70
  "total_models": total_entries,
71
  "last_modified": last_commit.created_at if last_commit else None,
72
  "file_count": len(files),
73
  "size_bytes": dataset_info.size_in_bytes,
74
+ "downloads": dataset_info.downloads,
75
  }
76
+
77
  return response
78
+
79
  except Exception as e:
80
  logger.error(f"Error counting evaluated models: {str(e)}")
81
+ return {"error": str(e)}
82
+
 
83
 
84
  def main():
85
  """Main function to count evaluated models"""
86
  try:
87
  logger.info("\nAnalyzing evaluated models...")
88
  result = count_evaluated_models()
89
+
90
+ if "error" in result:
91
  logger.error(f"❌ Error: {result['error']}")
92
  else:
93
  logger.info(f"✓ {result['total_models']} models evaluated")
94
  logger.info(f"✓ {result['file_count']} files")
95
  logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
96
  logger.info(f"✓ {result['downloads']} downloads")
97
+
98
+ if result["last_modified"]:
99
+ last_modified = datetime.fromisoformat(
100
+ result["last_modified"].replace("Z", "+00:00")
101
+ )
102
+ logger.info(
103
+ f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
104
+ )
105
+
106
  return result
107
+
108
  except Exception as e:
109
  logger.error(f"Global error: {str(e)}")
110
  return {"error": str(e)}
111
 
112
+
113
  if __name__ == "__main__":
114
+ main()
backend/utils/fix_wrong_model_size.py CHANGED
@@ -1,15 +1,11 @@
1
- import os
2
  import json
3
- import pytz
4
  import logging
5
  import asyncio
6
  from datetime import datetime
7
- from pathlib import Path
8
  import huggingface_hub
9
  from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
10
  from dotenv import load_dotenv
11
  from git import Repo
12
- from datetime import datetime
13
  from tqdm.auto import tqdm
14
  from tqdm.contrib.logging import logging_redirect_tqdm
15
 
@@ -20,22 +16,22 @@ from app.utils.model_validation import ModelValidator
20
  huggingface_hub.logging.set_verbosity_error()
21
  huggingface_hub.utils.disable_progress_bars()
22
 
23
- logging.basicConfig(
24
- level=logging.ERROR,
25
- format='%(message)s'
26
- )
27
  logger = logging.getLogger(__name__)
28
  load_dotenv()
29
 
30
  validator = ModelValidator()
31
 
 
32
  def get_changed_files(repo_path, start_date, end_date):
33
  repo = Repo(repo_path)
34
- start = datetime.strptime(start_date, '%Y-%m-%d')
35
- end = datetime.strptime(end_date, '%Y-%m-%d')
36
-
37
  changed_files = set()
38
- pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}")
 
 
39
  for commit in pbar:
40
  commit_date = datetime.fromtimestamp(commit.committed_date)
41
  pbar.set_postfix_str(f"Commit date: {commit_date}")
@@ -62,44 +58,52 @@ def main():
62
  requests_path = "/requests"
63
  start_date = "2024-12-09"
64
  end_date = "2025-01-07"
65
-
66
  changed_files = get_changed_files(requests_path, start_date, end_date)
67
 
68
  for file in tqdm(changed_files):
69
  try:
70
  request_data = read_json(requests_path, file)
71
- except FileNotFoundError as e:
72
  tqdm.write(f"File {file} not found")
73
  continue
74
-
75
  try:
76
  model_info = API.model_info(
77
  repo_id=request_data["model"],
78
  revision=request_data["revision"],
79
- token=HF_TOKEN
80
  )
81
- except (RepositoryNotFoundError, RevisionNotFoundError) as e:
82
- tqdm.write(f"Model info for {request_data["model"]} not found")
83
  continue
84
-
85
  with logging_redirect_tqdm():
86
- new_model_size, error = asyncio.run(validator.get_model_size(
87
- model_info=model_info,
88
- precision=request_data["precision"],
89
- base_model=request_data["base_model"],
90
- revision=request_data["revision"]
91
- ))
 
 
92
 
93
  if error:
94
- tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}")
 
 
95
  continue
96
-
97
  old_model_size = request_data["params"]
98
  if old_model_size != new_model_size:
99
  if new_model_size > 100:
100
- tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}")
101
-
102
- tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}")
 
 
 
 
103
  tqdm.write(f"Updating request file {file}")
104
 
105
  request_data["params"] = new_model_size
 
 
1
  import json
 
2
  import logging
3
  import asyncio
4
  from datetime import datetime
 
5
  import huggingface_hub
6
  from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
7
  from dotenv import load_dotenv
8
  from git import Repo
 
9
  from tqdm.auto import tqdm
10
  from tqdm.contrib.logging import logging_redirect_tqdm
11
 
 
16
  huggingface_hub.logging.set_verbosity_error()
17
  huggingface_hub.utils.disable_progress_bars()
18
 
19
+ logging.basicConfig(level=logging.ERROR, format="%(message)s")
 
 
 
20
  logger = logging.getLogger(__name__)
21
  load_dotenv()
22
 
23
  validator = ModelValidator()
24
 
25
+
26
  def get_changed_files(repo_path, start_date, end_date):
27
  repo = Repo(repo_path)
28
+ start = datetime.strptime(start_date, "%Y-%m-%d")
29
+ end = datetime.strptime(end_date, "%Y-%m-%d")
30
+
31
  changed_files = set()
32
+ pbar = tqdm(
33
+ repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}"
34
+ )
35
  for commit in pbar:
36
  commit_date = datetime.fromtimestamp(commit.committed_date)
37
  pbar.set_postfix_str(f"Commit date: {commit_date}")
 
58
  requests_path = "/requests"
59
  start_date = "2024-12-09"
60
  end_date = "2025-01-07"
61
+
62
  changed_files = get_changed_files(requests_path, start_date, end_date)
63
 
64
  for file in tqdm(changed_files):
65
  try:
66
  request_data = read_json(requests_path, file)
67
+ except FileNotFoundError:
68
  tqdm.write(f"File {file} not found")
69
  continue
70
+
71
  try:
72
  model_info = API.model_info(
73
  repo_id=request_data["model"],
74
  revision=request_data["revision"],
75
+ token=HF_TOKEN,
76
  )
77
+ except (RepositoryNotFoundError, RevisionNotFoundError):
78
+ tqdm.write(f"Model info for {request_data['model']} not found")
79
  continue
80
+
81
  with logging_redirect_tqdm():
82
+ new_model_size, error = asyncio.run(
83
+ validator.get_model_size(
84
+ model_info=model_info,
85
+ precision=request_data["precision"],
86
+ base_model=request_data["base_model"],
87
+ revision=request_data["revision"],
88
+ )
89
+ )
90
 
91
  if error:
92
+ tqdm.write(
93
+ f"Error getting model size info for {request_data['model']}, {error}"
94
+ )
95
  continue
96
+
97
  old_model_size = request_data["params"]
98
  if old_model_size != new_model_size:
99
  if new_model_size > 100:
100
+ tqdm.write(
101
+ f"Model: {request_data['model']}, size is more 100B: {new_model_size}"
102
+ )
103
+
104
+ tqdm.write(
105
+ f"Model: {request_data['model']}, old size: {request_data['params']} new size: {new_model_size}"
106
+ )
107
  tqdm.write(f"Updating request file {file}")
108
 
109
  request_data["params"] = new_model_size
backend/utils/last_activity.py CHANGED
@@ -1,9 +1,8 @@
1
  import os
2
  import json
3
  import logging
4
- from datetime import datetime
5
  from pathlib import Path
6
- from typing import Dict, Any, List, Tuple
7
  from huggingface_hub import HfApi
8
  from dotenv import load_dotenv
9
 
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
15
  load_dotenv(ROOT_DIR / ".env")
16
 
17
  # Configure logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(message)s'
21
- )
22
  logger = logging.getLogger(__name__)
23
 
24
  # Initialize Hugging Face API
@@ -28,107 +24,117 @@ if not HF_TOKEN:
28
  api = HfApi(token=HF_TOKEN)
29
 
30
  # Default organization
31
- HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'stacklok')
 
32
 
33
  def get_last_votes(limit: int = 5) -> List[Dict]:
34
  """Get the last votes from the votes dataset"""
35
  try:
36
  logger.info("\nFetching last votes...")
37
-
38
  # Download and read votes file
39
  logger.info("Downloading votes file...")
40
  votes_file = api.hf_hub_download(
41
  repo_id=f"{HF_ORGANIZATION}/votes",
42
  filename="votes_data.jsonl",
43
- repo_type="dataset"
44
  )
45
-
46
  logger.info("Reading votes file...")
47
  votes = []
48
- with open(votes_file, 'r') as f:
49
  for line in f:
50
  try:
51
  vote = json.loads(line)
52
  votes.append(vote)
53
  except json.JSONDecodeError:
54
  continue
55
-
56
  # Sort by timestamp and get last n votes
57
  logger.info("Sorting votes...")
58
- votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
59
  last_votes = votes[:limit]
60
-
61
  logger.info(f"✓ Found {len(last_votes)} recent votes")
62
  return last_votes
63
-
64
  except Exception as e:
65
  logger.error(f"Error reading votes: {str(e)}")
66
  return []
67
 
 
68
  def get_last_models(limit: int = 5) -> List[Dict]:
69
  """Get the last models from the requests dataset using commit history"""
70
  try:
71
  logger.info("\nFetching last model submissions...")
72
-
73
  # Get commit history
74
  logger.info("Getting commit history...")
75
- commits = list(api.list_repo_commits(
76
- repo_id=f"{HF_ORGANIZATION}/requests",
77
- repo_type="dataset"
78
- ))
 
79
  logger.info(f"Found {len(commits)} commits")
80
-
81
  # Track processed files to avoid duplicates
82
  processed_files = set()
83
  models = []
84
-
85
  # Process commits until we have enough models
86
  for i, commit in enumerate(commits):
87
- logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
88
-
 
 
89
  # Look at added/modified files in this commit
90
- files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
 
 
91
  if files_to_process:
92
  logger.info(f"Found {len(files_to_process)} JSON files in commit")
93
-
94
  for file in files_to_process:
95
  if file in processed_files:
96
  continue
97
-
98
  processed_files.add(file)
99
  logger.info(f"Downloading {file}...")
100
-
101
  try:
102
  # Download and read the file
103
  content = api.hf_hub_download(
104
  repo_id=f"{HF_ORGANIZATION}/requests",
105
  filename=file,
106
- repo_type="dataset"
107
  )
108
-
109
- with open(content, 'r') as f:
110
  model_data = json.load(f)
111
  models.append(model_data)
112
- logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
113
-
 
 
114
  if len(models) >= limit:
115
  logger.info("Reached desired number of models")
116
  break
117
-
118
  except Exception as e:
119
  logger.error(f"Error reading file {file}: {str(e)}")
120
  continue
121
-
122
  if len(models) >= limit:
123
  break
124
-
125
  logger.info(f"✓ Found {len(models)} recent model submissions")
126
  return models
127
-
128
  except Exception as e:
129
  logger.error(f"Error reading models: {str(e)}")
130
  return []
131
 
 
132
  def main():
133
  """Display last activities from the leaderboard"""
134
  try:
@@ -142,7 +148,7 @@ def main():
142
  logger.info(f"Timestamp: {vote.get('timestamp')}")
143
  else:
144
  logger.info("No votes found")
145
-
146
  # Get last model submissions
147
  logger.info("\n=== Last Model Submissions ===")
148
  last_models = get_last_models()
@@ -151,14 +157,17 @@ def main():
151
  logger.info(f"\nModel: {model.get('model')}")
152
  logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
153
  logger.info(f"Status: {model.get('status', 'Unknown')}")
154
- logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
 
 
155
  logger.info(f"Precision: {model.get('precision', 'Unknown')}")
156
  logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
157
  else:
158
  logger.info("No models found")
159
-
160
  except Exception as e:
161
  logger.error(f"Global error: {str(e)}")
162
 
 
163
  if __name__ == "__main__":
164
- main()
 
1
  import os
2
  import json
3
  import logging
 
4
  from pathlib import Path
5
+ from typing import Dict, List
6
  from huggingface_hub import HfApi
7
  from dotenv import load_dotenv
8
 
 
14
  load_dotenv(ROOT_DIR / ".env")
15
 
16
  # Configure logging
17
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
 
 
 
18
  logger = logging.getLogger(__name__)
19
 
20
  # Initialize Hugging Face API
 
24
  api = HfApi(token=HF_TOKEN)
25
 
26
  # Default organization
27
+ HF_ORGANIZATION = os.getenv("HF_ORGANIZATION", "stacklok")
28
+
29
 
30
  def get_last_votes(limit: int = 5) -> List[Dict]:
31
  """Get the last votes from the votes dataset"""
32
  try:
33
  logger.info("\nFetching last votes...")
34
+
35
  # Download and read votes file
36
  logger.info("Downloading votes file...")
37
  votes_file = api.hf_hub_download(
38
  repo_id=f"{HF_ORGANIZATION}/votes",
39
  filename="votes_data.jsonl",
40
+ repo_type="dataset",
41
  )
42
+
43
  logger.info("Reading votes file...")
44
  votes = []
45
+ with open(votes_file, "r") as f:
46
  for line in f:
47
  try:
48
  vote = json.loads(line)
49
  votes.append(vote)
50
  except json.JSONDecodeError:
51
  continue
52
+
53
  # Sort by timestamp and get last n votes
54
  logger.info("Sorting votes...")
55
+ votes.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
56
  last_votes = votes[:limit]
57
+
58
  logger.info(f"✓ Found {len(last_votes)} recent votes")
59
  return last_votes
60
+
61
  except Exception as e:
62
  logger.error(f"Error reading votes: {str(e)}")
63
  return []
64
 
65
+
66
  def get_last_models(limit: int = 5) -> List[Dict]:
67
  """Get the last models from the requests dataset using commit history"""
68
  try:
69
  logger.info("\nFetching last model submissions...")
70
+
71
  # Get commit history
72
  logger.info("Getting commit history...")
73
+ commits = list(
74
+ api.list_repo_commits(
75
+ repo_id=f"{HF_ORGANIZATION}/requests", repo_type="dataset"
76
+ )
77
+ )
78
  logger.info(f"Found {len(commits)} commits")
79
+
80
  # Track processed files to avoid duplicates
81
  processed_files = set()
82
  models = []
83
+
84
  # Process commits until we have enough models
85
  for i, commit in enumerate(commits):
86
+ logger.info(
87
+ f"Processing commit {i + 1}/{len(commits)} ({commit.created_at})"
88
+ )
89
+
90
  # Look at added/modified files in this commit
91
+ files_to_process = [
92
+ f for f in (commit.added + commit.modified) if f.endswith(".json")
93
+ ]
94
  if files_to_process:
95
  logger.info(f"Found {len(files_to_process)} JSON files in commit")
96
+
97
  for file in files_to_process:
98
  if file in processed_files:
99
  continue
100
+
101
  processed_files.add(file)
102
  logger.info(f"Downloading {file}...")
103
+
104
  try:
105
  # Download and read the file
106
  content = api.hf_hub_download(
107
  repo_id=f"{HF_ORGANIZATION}/requests",
108
  filename=file,
109
+ repo_type="dataset",
110
  )
111
+
112
+ with open(content, "r") as f:
113
  model_data = json.load(f)
114
  models.append(model_data)
115
+ logger.info(
116
+ f"✓ Added model {model_data.get('model', 'Unknown')}"
117
+ )
118
+
119
  if len(models) >= limit:
120
  logger.info("Reached desired number of models")
121
  break
122
+
123
  except Exception as e:
124
  logger.error(f"Error reading file {file}: {str(e)}")
125
  continue
126
+
127
  if len(models) >= limit:
128
  break
129
+
130
  logger.info(f"✓ Found {len(models)} recent model submissions")
131
  return models
132
+
133
  except Exception as e:
134
  logger.error(f"Error reading models: {str(e)}")
135
  return []
136
 
137
+
138
  def main():
139
  """Display last activities from the leaderboard"""
140
  try:
 
148
  logger.info(f"Timestamp: {vote.get('timestamp')}")
149
  else:
150
  logger.info("No votes found")
151
+
152
  # Get last model submissions
153
  logger.info("\n=== Last Model Submissions ===")
154
  last_models = get_last_models()
 
157
  logger.info(f"\nModel: {model.get('model')}")
158
  logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
159
  logger.info(f"Status: {model.get('status', 'Unknown')}")
160
+ logger.info(
161
+ f"Submission Time: {model.get('submitted_time', 'Unknown')}"
162
+ )
163
  logger.info(f"Precision: {model.get('precision', 'Unknown')}")
164
  logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
165
  else:
166
  logger.info("No models found")
167
+
168
  except Exception as e:
169
  logger.error(f"Global error: {str(e)}")
170
 
171
+
172
  if __name__ == "__main__":
173
+ main()
backend/utils/sync_datasets_locally.py CHANGED
@@ -1,9 +1,8 @@
1
  import os
2
- import shutil
3
  import tempfile
4
  import logging
5
  from pathlib import Path
6
- from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
7
  from dotenv import load_dotenv
8
 
9
  # Configure source and destination usernames
@@ -18,10 +17,7 @@ ROOT_DIR = BACKEND_DIR.parent
18
  load_dotenv(ROOT_DIR / ".env")
19
 
20
  # Configure logging
21
- logging.basicConfig(
22
- level=logging.INFO,
23
- format='%(message)s'
24
- )
25
  logger = logging.getLogger(__name__)
26
 
27
  # List of dataset names to sync
@@ -38,12 +34,17 @@ DATASETS = [
38
  (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
39
  for name in DATASET_NAMES
40
  ] + [
41
- (name, f"open-llm-leaderboard/official-providers", f"{DESTINATION_USERNAME}/official-providers")
42
- ]
 
 
 
 
43
 
44
  # Initialize Hugging Face API
45
  api = HfApi()
46
 
 
47
  def ensure_repo_exists(repo_id, token):
48
  """Ensure the repository exists, create it if it doesn't"""
49
  try:
@@ -51,23 +52,19 @@ def ensure_repo_exists(repo_id, token):
51
  logger.info(f"✓ Repository {repo_id} already exists")
52
  except Exception:
53
  logger.info(f"Creating repository {repo_id}...")
54
- create_repo(
55
- repo_id=repo_id,
56
- repo_type="dataset",
57
- token=token,
58
- private=True
59
- )
60
  logger.info(f"✓ Repository {repo_id} created")
61
 
 
62
  def process_dataset(dataset_info, token):
63
  """Process a single dataset"""
64
  name, source_dataset, destination_dataset = dataset_info
65
  try:
66
  logger.info(f"\n📥 Processing dataset: {name}")
67
-
68
  # Ensure destination repository exists
69
  ensure_repo_exists(destination_dataset, token)
70
-
71
  # Create a temporary directory for this dataset
72
  with tempfile.TemporaryDirectory() as temp_dir:
73
  try:
@@ -75,28 +72,28 @@ def process_dataset(dataset_info, token):
75
  logger.info(f"Listing files in {source_dataset}...")
76
  files = api.list_repo_files(source_dataset, repo_type="dataset")
77
  logger.info(f"Detected structure: {len(files)} files")
78
-
79
  # Download dataset
80
  logger.info(f"Downloading from {source_dataset}...")
81
  local_dir = snapshot_download(
82
  repo_id=source_dataset,
83
  repo_type="dataset",
84
  local_dir=temp_dir,
85
- token=token
86
  )
87
- logger.info(f"✓ Download complete")
88
-
89
  # Upload to destination while preserving structure
90
  logger.info(f"📤 Uploading to {destination_dataset}...")
91
  api.upload_folder(
92
  folder_path=local_dir,
93
  repo_id=destination_dataset,
94
  repo_type="dataset",
95
- token=token
96
  )
97
  logger.info(f"✅ {name} copied successfully!")
98
  return True
99
-
100
  except Exception as e:
101
  logger.error(f"❌ Error processing {name}: {str(e)}")
102
  return False
@@ -105,6 +102,7 @@ def process_dataset(dataset_info, token):
105
  logger.error(f"❌ Error for {name}: {str(e)}")
106
  return False
107
 
 
108
  def copy_datasets():
109
  try:
110
  logger.info("🔑 Checking authentication...")
@@ -112,21 +110,22 @@ def copy_datasets():
112
  token = os.getenv("HF_TOKEN")
113
  if not token:
114
  raise ValueError("HF_TOKEN not found in .env file")
115
-
116
  # Process datasets sequentially
117
  results = []
118
  for dataset_info in DATASETS:
119
  success = process_dataset(dataset_info, token)
120
  results.append((dataset_info[0], success))
121
-
122
  # Print final summary
123
  logger.info("\n📊 Final summary:")
124
  for dataset, success in results:
125
  status = "✅ Success" if success else "❌ Failure"
126
  logger.info(f"{dataset}: {status}")
127
-
128
  except Exception as e:
129
  logger.error(f"❌ Global error: {str(e)}")
130
 
 
131
  if __name__ == "__main__":
132
- copy_datasets()
 
1
  import os
 
2
  import tempfile
3
  import logging
4
  from pathlib import Path
5
+ from huggingface_hub import HfApi, snapshot_download, create_repo
6
  from dotenv import load_dotenv
7
 
8
  # Configure source and destination usernames
 
17
  load_dotenv(ROOT_DIR / ".env")
18
 
19
  # Configure logging
20
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
 
 
 
21
  logger = logging.getLogger(__name__)
22
 
23
  # List of dataset names to sync
 
34
  (name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
35
  for name in DATASET_NAMES
36
  ] + [
37
+ (
38
+ "official-providers",
39
+ "open-llm-leaderboard/official-providers",
40
+ f"{DESTINATION_USERNAME}/official-providers",
41
+ )
42
+ ]
43
 
44
  # Initialize Hugging Face API
45
  api = HfApi()
46
 
47
+
48
  def ensure_repo_exists(repo_id, token):
49
  """Ensure the repository exists, create it if it doesn't"""
50
  try:
 
52
  logger.info(f"✓ Repository {repo_id} already exists")
53
  except Exception:
54
  logger.info(f"Creating repository {repo_id}...")
55
+ create_repo(repo_id=repo_id, repo_type="dataset", token=token, private=True)
 
 
 
 
 
56
  logger.info(f"✓ Repository {repo_id} created")
57
 
58
+
59
  def process_dataset(dataset_info, token):
60
  """Process a single dataset"""
61
  name, source_dataset, destination_dataset = dataset_info
62
  try:
63
  logger.info(f"\n📥 Processing dataset: {name}")
64
+
65
  # Ensure destination repository exists
66
  ensure_repo_exists(destination_dataset, token)
67
+
68
  # Create a temporary directory for this dataset
69
  with tempfile.TemporaryDirectory() as temp_dir:
70
  try:
 
72
  logger.info(f"Listing files in {source_dataset}...")
73
  files = api.list_repo_files(source_dataset, repo_type="dataset")
74
  logger.info(f"Detected structure: {len(files)} files")
75
+
76
  # Download dataset
77
  logger.info(f"Downloading from {source_dataset}...")
78
  local_dir = snapshot_download(
79
  repo_id=source_dataset,
80
  repo_type="dataset",
81
  local_dir=temp_dir,
82
+ token=token,
83
  )
84
+ logger.info("✓ Download complete")
85
+
86
  # Upload to destination while preserving structure
87
  logger.info(f"📤 Uploading to {destination_dataset}...")
88
  api.upload_folder(
89
  folder_path=local_dir,
90
  repo_id=destination_dataset,
91
  repo_type="dataset",
92
+ token=token,
93
  )
94
  logger.info(f"✅ {name} copied successfully!")
95
  return True
96
+
97
  except Exception as e:
98
  logger.error(f"❌ Error processing {name}: {str(e)}")
99
  return False
 
102
  logger.error(f"❌ Error for {name}: {str(e)}")
103
  return False
104
 
105
+
106
  def copy_datasets():
107
  try:
108
  logger.info("🔑 Checking authentication...")
 
110
  token = os.getenv("HF_TOKEN")
111
  if not token:
112
  raise ValueError("HF_TOKEN not found in .env file")
113
+
114
  # Process datasets sequentially
115
  results = []
116
  for dataset_info in DATASETS:
117
  success = process_dataset(dataset_info, token)
118
  results.append((dataset_info[0], success))
119
+
120
  # Print final summary
121
  logger.info("\n📊 Final summary:")
122
  for dataset, success in results:
123
  status = "✅ Success" if success else "❌ Failure"
124
  logger.info(f"{dataset}: {status}")
125
+
126
  except Exception as e:
127
  logger.error(f"❌ Global error: {str(e)}")
128
 
129
+
130
  if __name__ == "__main__":
131
+ copy_datasets()
docker-compose.yml CHANGED
@@ -30,4 +30,4 @@ services:
30
  - PORT=${FRONTEND_PORT:-7860}
31
  command: npm start
32
  stdin_open: true
33
- tty: true
 
30
  - PORT=${FRONTEND_PORT:-7860}
31
  command: npm start
32
  stdin_open: true
33
+ tty: true
frontend/Dockerfile.dev CHANGED
@@ -12,4 +12,4 @@ COPY package*.json ./
12
  RUN npm install
13
 
14
  # Volume will be mounted here, no need for COPY
15
- CMD ["npm", "start"]
 
12
  RUN npm install
13
 
14
  # Volume will be mounted here, no need for COPY
15
+ CMD ["npm", "start"]
frontend/src/components/Logo/HFLogo.js CHANGED
@@ -16,4 +16,4 @@ const HFLogo = () => (
16
  </svg>
17
  );
18
 
19
- export default HFLogo;
 
16
  </svg>
17
  );
18
 
19
+ export default HFLogo;
frontend/src/components/shared/CodeBlock.js CHANGED
@@ -34,4 +34,4 @@ const CodeBlock = ({ code }) => (
34
  </Box>
35
  );
36
 
37
- export default CodeBlock;
 
34
  </Box>
35
  );
36
 
37
+ export default CodeBlock;
frontend/src/config/auth.js CHANGED
@@ -4,4 +4,4 @@ export const HF_CONFIG = {
4
  SCOPE: "openid profile",
5
  PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
6
  DEV_URL: "http://localhost:7860"
7
- };
 
4
  SCOPE: "openid profile",
5
  PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
6
  DEV_URL: "http://localhost:7860"
7
+ };
frontend/src/hooks/useThemeMode.js CHANGED
@@ -15,7 +15,7 @@ export const useThemeMode = () => {
15
  const handleChange = (e) => {
16
  setMode(e.matches ? 'dark' : 'light');
17
  };
18
-
19
  mediaQuery.addEventListener('change', handleChange);
20
  return () => mediaQuery.removeEventListener('change', handleChange);
21
  }, []);
@@ -25,4 +25,4 @@ export const useThemeMode = () => {
25
  };
26
 
27
  return { mode, toggleTheme };
28
- };
 
15
  const handleChange = (e) => {
16
  setMode(e.matches ? 'dark' : 'light');
17
  };
18
+
19
  mediaQuery.addEventListener('change', handleChange);
20
  return () => mediaQuery.removeEventListener('change', handleChange);
21
  }, []);
 
25
  };
26
 
27
  return { mode, toggleTheme };
28
+ };
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js CHANGED
@@ -48,7 +48,7 @@ export const MODEL_TYPES = {
48
 
49
  export const getModelTypeIcon = (type) => {
50
  const cleanType = type.toLowerCase().trim();
51
- const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
52
  cleanType.includes(key)
53
  );
54
  return matchedType ? matchedType[1].icon : '❓';
@@ -56,7 +56,7 @@ export const getModelTypeIcon = (type) => {
56
 
57
  export const getModelTypeLabel = (type) => {
58
  const cleanType = type.toLowerCase().trim();
59
- const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
60
  cleanType.includes(key)
61
  );
62
  return matchedType ? matchedType[1].label : type;
@@ -64,7 +64,7 @@ export const getModelTypeLabel = (type) => {
64
 
65
  export const getModelTypeDescription = (type) => {
66
  const cleanType = type.toLowerCase().trim();
67
- const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
68
  cleanType.includes(key)
69
  );
70
  return matchedType ? matchedType[1].description : 'Unknown model type';
@@ -72,8 +72,8 @@ export const getModelTypeDescription = (type) => {
72
 
73
  export const getModelTypeOrder = (type) => {
74
  const cleanType = type.toLowerCase().trim();
75
- const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
76
  cleanType.includes(key)
77
  );
78
  return matchedType ? matchedType[1].order : Infinity;
79
- };
 
48
 
49
  export const getModelTypeIcon = (type) => {
50
  const cleanType = type.toLowerCase().trim();
51
+ const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
52
  cleanType.includes(key)
53
  );
54
  return matchedType ? matchedType[1].icon : '❓';
 
56
 
57
  export const getModelTypeLabel = (type) => {
58
  const cleanType = type.toLowerCase().trim();
59
+ const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
60
  cleanType.includes(key)
61
  );
62
  return matchedType ? matchedType[1].label : type;
 
64
 
65
  export const getModelTypeDescription = (type) => {
66
  const cleanType = type.toLowerCase().trim();
67
+ const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
68
  cleanType.includes(key)
69
  );
70
  return matchedType ? matchedType[1].description : 'Unknown model type';
 
72
 
73
  export const getModelTypeOrder = (type) => {
74
  const cleanType = type.toLowerCase().trim();
75
+ const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
76
  cleanType.includes(key)
77
  );
78
  return matchedType ? matchedType[1].order : Infinity;
79
+ };
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js CHANGED
@@ -48,4 +48,4 @@ export const QUICK_FILTER_PRESETS = [
48
  selectedBooleanFilters: ['is_official_provider']
49
  }
50
  }
51
- ];
 
48
  selectedBooleanFilters: ['is_official_provider']
49
  }
50
  }
51
+ ];
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js CHANGED
@@ -28,4 +28,4 @@ export const useBatchedState = (initialState, options = {}) => {
28
  }, [batchDelay, useTransitions]);
29
 
30
  return [state, setBatchedState, isPending];
31
- };
 
28
  }, [batchDelay, useTransitions]);
29
 
30
  return [state, setBatchedState, isPending];
31
+ };