Daniel Kantor
commited on
Commit
·
58582d3
1
Parent(s):
39651ed
set up ruff for formatting
Browse files- .gitignore +0 -1
- Dockerfile +1 -1
- backend/Dockerfile.dev +1 -1
- backend/app/api/dependencies.py +4 -2
- backend/app/api/endpoints/leaderboard.py +7 -2
- backend/app/api/endpoints/models.py +27 -22
- backend/app/api/endpoints/votes.py +34 -34
- backend/app/api/router.py +1 -1
- backend/app/asgi.py +20 -11
- backend/app/config/base.py +4 -2
- backend/app/config/hf_config.py +4 -4
- backend/app/config/logging_config.py +11 -7
- backend/app/core/cache.py +23 -21
- backend/app/core/fastapi_cache.py +15 -13
- backend/app/core/formatting.py +22 -21
- backend/app/main.py +4 -3
- backend/app/services/hf_service.py +10 -4
- backend/app/services/leaderboard.py +73 -50
- backend/app/services/models.py +242 -166
- backend/app/services/rate_limiter.py +0 -72
- backend/app/services/votes.py +161 -102
- backend/app/utils/logging.py +1 -1
- backend/app/utils/model_validation.py +118 -71
- backend/utils/analyze_prod_datasets.py +52 -59
- backend/utils/analyze_prod_models.py +40 -32
- backend/utils/fix_wrong_model_size.py +34 -30
- backend/utils/last_activity.py +50 -41
- backend/utils/sync_datasets_locally.py +25 -26
- docker-compose.yml +1 -1
- frontend/Dockerfile.dev +1 -1
- frontend/src/components/Logo/HFLogo.js +1 -1
- frontend/src/components/shared/CodeBlock.js +1 -1
- frontend/src/config/auth.js +1 -1
- frontend/src/hooks/useThemeMode.js +2 -2
- frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js +5 -5
- frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js +1 -1
- frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js +1 -1
.gitignore
CHANGED
@@ -42,4 +42,3 @@ package-lock.json
|
|
42 |
.env
|
43 |
.env.*
|
44 |
!.env.example
|
45 |
-
|
|
|
42 |
.env
|
43 |
.env.*
|
44 |
!.env.example
|
|
Dockerfile
CHANGED
@@ -59,4 +59,4 @@ USER user
|
|
59 |
EXPOSE 7860
|
60 |
|
61 |
# Start both servers with wait-for
|
62 |
-
CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
|
|
|
59 |
EXPOSE 7860
|
60 |
|
61 |
# Start both servers with wait-for
|
62 |
+
CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
|
backend/Dockerfile.dev
CHANGED
@@ -22,4 +22,4 @@ ENV PYTHONUNBUFFERED=1
|
|
22 |
ENV LOG_LEVEL=INFO
|
23 |
|
24 |
# In dev, mount volume directly
|
25 |
-
CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
|
|
|
22 |
ENV LOG_LEVEL=INFO
|
23 |
|
24 |
# In dev, mount volume directly
|
25 |
+
CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
|
backend/app/api/dependencies.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import
|
2 |
import logging
|
3 |
from app.services.models import ModelService
|
4 |
from app.services.votes import VoteService
|
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
|
|
9 |
model_service = ModelService()
|
10 |
vote_service = VoteService()
|
11 |
|
|
|
12 |
async def get_model_service() -> ModelService:
|
13 |
"""Dependency to get ModelService instance"""
|
14 |
try:
|
@@ -21,6 +22,7 @@ async def get_model_service() -> ModelService:
|
|
21 |
logger.error(LogFormatter.error(error_msg, e))
|
22 |
raise HTTPException(status_code=500, detail=str(e))
|
23 |
|
|
|
24 |
async def get_vote_service() -> VoteService:
|
25 |
"""Dependency to get VoteService instance"""
|
26 |
try:
|
@@ -31,4 +33,4 @@ async def get_vote_service() -> VoteService:
|
|
31 |
except Exception as e:
|
32 |
error_msg = "Failed to initialize vote service"
|
33 |
logger.error(LogFormatter.error(error_msg, e))
|
34 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
1 |
+
from fastapi import HTTPException
|
2 |
import logging
|
3 |
from app.services.models import ModelService
|
4 |
from app.services.votes import VoteService
|
|
|
9 |
model_service = ModelService()
|
10 |
vote_service = VoteService()
|
11 |
|
12 |
+
|
13 |
async def get_model_service() -> ModelService:
|
14 |
"""Dependency to get ModelService instance"""
|
15 |
try:
|
|
|
22 |
logger.error(LogFormatter.error(error_msg, e))
|
23 |
raise HTTPException(status_code=500, detail=str(e))
|
24 |
|
25 |
+
|
26 |
async def get_vote_service() -> VoteService:
|
27 |
"""Dependency to get VoteService instance"""
|
28 |
try:
|
|
|
33 |
except Exception as e:
|
34 |
error_msg = "Failed to initialize vote service"
|
35 |
logger.error(LogFormatter.error(error_msg, e))
|
36 |
+
raise HTTPException(status_code=500, detail=str(e))
|
backend/app/api/endpoints/leaderboard.py
CHANGED
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
|
|
9 |
router = APIRouter()
|
10 |
leaderboard_service = LeaderboardService()
|
11 |
|
|
|
12 |
def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
|
13 |
"""Build cache key for leaderboard data"""
|
14 |
key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
|
@@ -16,6 +17,7 @@ def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
|
|
16 |
logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
|
17 |
return key
|
18 |
|
|
|
19 |
@router.get("")
|
20 |
@cached(expire=300, key_builder=leaderboard_key_builder)
|
21 |
async def get_leaderboard() -> List[Dict[str, Any]]:
|
@@ -32,6 +34,7 @@ async def get_leaderboard() -> List[Dict[str, Any]]:
|
|
32 |
logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
|
33 |
raise
|
34 |
|
|
|
35 |
@router.get("/formatted")
|
36 |
@cached(expire=300, key_builder=leaderboard_key_builder)
|
37 |
async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
|
@@ -45,5 +48,7 @@ async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
|
|
45 |
logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
|
46 |
return data
|
47 |
except Exception as e:
|
48 |
-
logger.error(
|
49 |
-
|
|
|
|
|
|
9 |
router = APIRouter()
|
10 |
leaderboard_service = LeaderboardService()
|
11 |
|
12 |
+
|
13 |
def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
|
14 |
"""Build cache key for leaderboard data"""
|
15 |
key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
|
|
|
17 |
logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
|
18 |
return key
|
19 |
|
20 |
+
|
21 |
@router.get("")
|
22 |
@cached(expire=300, key_builder=leaderboard_key_builder)
|
23 |
async def get_leaderboard() -> List[Dict[str, Any]]:
|
|
|
34 |
logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
|
35 |
raise
|
36 |
|
37 |
+
|
38 |
@router.get("/formatted")
|
39 |
@cached(expire=300, key_builder=leaderboard_key_builder)
|
40 |
async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
|
|
|
48 |
logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
|
49 |
return data
|
50 |
except Exception as e:
|
51 |
+
logger.error(
|
52 |
+
LogFormatter.error("Failed to fetch formatted leaderboard data", e)
|
53 |
+
)
|
54 |
+
raise
|
backend/app/api/endpoints/models.py
CHANGED
@@ -9,18 +9,17 @@ from app.core.formatting import LogFormatter
|
|
9 |
logger = logging.getLogger(__name__)
|
10 |
router = APIRouter(tags=["models"])
|
11 |
|
|
|
12 |
@router.get("/status")
|
13 |
@cached(expire=300)
|
14 |
async def get_models_status(
|
15 |
-
model_service: ModelService = Depends(get_model_service)
|
16 |
) -> Dict[str, List[Dict[str, Any]]]:
|
17 |
"""Get all models grouped by status"""
|
18 |
try:
|
19 |
logger.info(LogFormatter.info("Fetching status for all models"))
|
20 |
result = await model_service.get_models()
|
21 |
-
stats = {
|
22 |
-
status: len(models) for status, models in result.items()
|
23 |
-
}
|
24 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
25 |
logger.info(line)
|
26 |
return result
|
@@ -28,10 +27,11 @@ async def get_models_status(
|
|
28 |
logger.error(LogFormatter.error("Failed to get models status", e))
|
29 |
raise HTTPException(status_code=500, detail=str(e))
|
30 |
|
|
|
31 |
@router.get("/pending")
|
32 |
@cached(expire=60)
|
33 |
async def get_pending_models(
|
34 |
-
model_service: ModelService = Depends(get_model_service)
|
35 |
) -> List[Dict[str, Any]]:
|
36 |
"""Get all models waiting for evaluation"""
|
37 |
try:
|
@@ -44,35 +44,35 @@ async def get_pending_models(
|
|
44 |
logger.error(LogFormatter.error("Failed to get pending models", e))
|
45 |
raise HTTPException(status_code=500, detail=str(e))
|
46 |
|
|
|
47 |
@router.post("/submit")
|
48 |
async def submit_model(
|
49 |
-
model_data: Dict[str, Any],
|
50 |
-
model_service: ModelService = Depends(get_model_service)
|
51 |
) -> Dict[str, Any]:
|
52 |
try:
|
53 |
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
54 |
-
|
55 |
-
user_id = model_data.pop(
|
56 |
if not user_id:
|
57 |
error_msg = "user_id is required"
|
58 |
logger.error(LogFormatter.error("Validation failed", error_msg))
|
59 |
raise ValueError(error_msg)
|
60 |
-
|
61 |
# Log submission details
|
62 |
submission_info = {
|
63 |
"Model_ID": model_data.get("model_id"),
|
64 |
"User": user_id,
|
65 |
"Base_Model": model_data.get("base_model"),
|
66 |
"Precision": model_data.get("precision"),
|
67 |
-
"Model_Type": model_data.get("model_type")
|
68 |
}
|
69 |
for line in LogFormatter.tree(submission_info, "Submission Details"):
|
70 |
logger.info(line)
|
71 |
-
|
72 |
result = await model_service.submit_model(model_data, user_id)
|
73 |
logger.info(LogFormatter.success("Model submitted successfully"))
|
74 |
return result
|
75 |
-
|
76 |
except ValueError as e:
|
77 |
logger.error(LogFormatter.error("Invalid submission data", e))
|
78 |
raise HTTPException(status_code=400, detail=str(e))
|
@@ -80,37 +80,42 @@ async def submit_model(
|
|
80 |
logger.error(LogFormatter.error("Submission failed", e))
|
81 |
raise HTTPException(status_code=500, detail=str(e))
|
82 |
|
|
|
83 |
@router.get("/organization/{organization}/submissions")
|
84 |
async def get_organization_submissions(
|
85 |
organization: str,
|
86 |
days: int = Query(default=7, ge=1, le=30),
|
87 |
-
model_service: ModelService = Depends(get_model_service)
|
88 |
) -> List[Dict[str, Any]]:
|
89 |
"""Get all submissions from an organization in the last n days"""
|
90 |
try:
|
91 |
-
submissions = await model_service.get_organization_submissions(
|
|
|
|
|
92 |
return submissions
|
93 |
except Exception as e:
|
94 |
raise HTTPException(status_code=500, detail=str(e))
|
95 |
|
|
|
96 |
@router.get("/{model_id}/status")
|
97 |
async def get_model_status(
|
98 |
-
model_id: str,
|
99 |
-
model_service: ModelService = Depends(get_model_service)
|
100 |
) -> Dict[str, Any]:
|
101 |
try:
|
102 |
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
103 |
status = await model_service.get_model_status(model_id)
|
104 |
-
|
105 |
if status["status"] != "not_found":
|
106 |
logger.info(LogFormatter.success("Status found"))
|
107 |
for line in LogFormatter.tree(status, "Model Status"):
|
108 |
logger.info(line)
|
109 |
else:
|
110 |
-
logger.warning(
|
111 |
-
|
|
|
|
|
112 |
return status
|
113 |
-
|
114 |
except Exception as e:
|
115 |
logger.error(LogFormatter.error("Failed to get model status", e))
|
116 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
9 |
logger = logging.getLogger(__name__)
|
10 |
router = APIRouter(tags=["models"])
|
11 |
|
12 |
+
|
13 |
@router.get("/status")
|
14 |
@cached(expire=300)
|
15 |
async def get_models_status(
|
16 |
+
model_service: ModelService = Depends(get_model_service),
|
17 |
) -> Dict[str, List[Dict[str, Any]]]:
|
18 |
"""Get all models grouped by status"""
|
19 |
try:
|
20 |
logger.info(LogFormatter.info("Fetching status for all models"))
|
21 |
result = await model_service.get_models()
|
22 |
+
stats = {status: len(models) for status, models in result.items()}
|
|
|
|
|
23 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
24 |
logger.info(line)
|
25 |
return result
|
|
|
27 |
logger.error(LogFormatter.error("Failed to get models status", e))
|
28 |
raise HTTPException(status_code=500, detail=str(e))
|
29 |
|
30 |
+
|
31 |
@router.get("/pending")
|
32 |
@cached(expire=60)
|
33 |
async def get_pending_models(
|
34 |
+
model_service: ModelService = Depends(get_model_service),
|
35 |
) -> List[Dict[str, Any]]:
|
36 |
"""Get all models waiting for evaluation"""
|
37 |
try:
|
|
|
44 |
logger.error(LogFormatter.error("Failed to get pending models", e))
|
45 |
raise HTTPException(status_code=500, detail=str(e))
|
46 |
|
47 |
+
|
48 |
@router.post("/submit")
|
49 |
async def submit_model(
|
50 |
+
model_data: Dict[str, Any], model_service: ModelService = Depends(get_model_service)
|
|
|
51 |
) -> Dict[str, Any]:
|
52 |
try:
|
53 |
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
54 |
+
|
55 |
+
user_id = model_data.pop("user_id", None)
|
56 |
if not user_id:
|
57 |
error_msg = "user_id is required"
|
58 |
logger.error(LogFormatter.error("Validation failed", error_msg))
|
59 |
raise ValueError(error_msg)
|
60 |
+
|
61 |
# Log submission details
|
62 |
submission_info = {
|
63 |
"Model_ID": model_data.get("model_id"),
|
64 |
"User": user_id,
|
65 |
"Base_Model": model_data.get("base_model"),
|
66 |
"Precision": model_data.get("precision"),
|
67 |
+
"Model_Type": model_data.get("model_type"),
|
68 |
}
|
69 |
for line in LogFormatter.tree(submission_info, "Submission Details"):
|
70 |
logger.info(line)
|
71 |
+
|
72 |
result = await model_service.submit_model(model_data, user_id)
|
73 |
logger.info(LogFormatter.success("Model submitted successfully"))
|
74 |
return result
|
75 |
+
|
76 |
except ValueError as e:
|
77 |
logger.error(LogFormatter.error("Invalid submission data", e))
|
78 |
raise HTTPException(status_code=400, detail=str(e))
|
|
|
80 |
logger.error(LogFormatter.error("Submission failed", e))
|
81 |
raise HTTPException(status_code=500, detail=str(e))
|
82 |
|
83 |
+
|
84 |
@router.get("/organization/{organization}/submissions")
|
85 |
async def get_organization_submissions(
|
86 |
organization: str,
|
87 |
days: int = Query(default=7, ge=1, le=30),
|
88 |
+
model_service: ModelService = Depends(get_model_service),
|
89 |
) -> List[Dict[str, Any]]:
|
90 |
"""Get all submissions from an organization in the last n days"""
|
91 |
try:
|
92 |
+
submissions = await model_service.get_organization_submissions(
|
93 |
+
organization, days
|
94 |
+
)
|
95 |
return submissions
|
96 |
except Exception as e:
|
97 |
raise HTTPException(status_code=500, detail=str(e))
|
98 |
|
99 |
+
|
100 |
@router.get("/{model_id}/status")
|
101 |
async def get_model_status(
|
102 |
+
model_id: str, model_service: ModelService = Depends(get_model_service)
|
|
|
103 |
) -> Dict[str, Any]:
|
104 |
try:
|
105 |
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
106 |
status = await model_service.get_model_status(model_id)
|
107 |
+
|
108 |
if status["status"] != "not_found":
|
109 |
logger.info(LogFormatter.success("Status found"))
|
110 |
for line in LogFormatter.tree(status, "Model Status"):
|
111 |
logger.info(line)
|
112 |
else:
|
113 |
+
logger.warning(
|
114 |
+
LogFormatter.warning(f"No status found for model: {model_id}")
|
115 |
+
)
|
116 |
+
|
117 |
return status
|
118 |
+
|
119 |
except Exception as e:
|
120 |
logger.error(LogFormatter.error("Failed to get model status", e))
|
121 |
+
raise HTTPException(status_code=500, detail=str(e))
|
backend/app/api/endpoints/votes.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
-
from fastapi import APIRouter, HTTPException, Query,
|
2 |
from typing import Dict, Any, List
|
3 |
from app.services.votes import VoteService
|
4 |
from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
|
5 |
import logging
|
6 |
from app.core.formatting import LogFormatter
|
7 |
-
from datetime import datetime, timezone
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
router = APIRouter()
|
@@ -12,28 +11,31 @@ vote_service = VoteService()
|
|
12 |
|
13 |
CACHE_TTL = 30 # 30 seconds cache
|
14 |
|
|
|
15 |
def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
|
16 |
"""Build cache key for model votes"""
|
17 |
-
provider = kwargs.get(
|
18 |
-
model = kwargs.get(
|
19 |
key = build_cache_key(namespace, provider, model)
|
20 |
logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
|
21 |
return key
|
22 |
|
|
|
23 |
def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
|
24 |
"""Build cache key for user votes"""
|
25 |
-
user_id = kwargs.get(
|
26 |
key = build_cache_key(namespace, user_id)
|
27 |
logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
|
28 |
return key
|
29 |
|
|
|
30 |
@router.post("/{model_id:path}")
|
31 |
async def add_vote(
|
32 |
response: Response,
|
33 |
model_id: str,
|
34 |
vote_type: str = Query(..., description="Type of vote (up/down)"),
|
35 |
user_id: str = Query(..., description="HuggingFace username"),
|
36 |
-
vote_data: Dict[str, Any] = None
|
37 |
) -> Dict[str, Any]:
|
38 |
try:
|
39 |
logger.info(LogFormatter.section("ADDING VOTE"))
|
@@ -41,50 +43,46 @@ async def add_vote(
|
|
41 |
"Model": model_id,
|
42 |
"User": user_id,
|
43 |
"Type": vote_type,
|
44 |
-
"Config": vote_data or {}
|
45 |
}
|
46 |
for line in LogFormatter.tree(stats, "Vote Details"):
|
47 |
logger.info(line)
|
48 |
-
|
49 |
await vote_service.initialize()
|
50 |
result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
|
51 |
-
|
52 |
# Invalidate affected caches
|
53 |
try:
|
54 |
logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
|
55 |
-
provider, model = model_id.split(
|
56 |
-
|
57 |
# Build and invalidate cache keys
|
58 |
model_cache_key = build_cache_key("model_votes", provider, model)
|
59 |
user_cache_key = build_cache_key("user_votes", user_id)
|
60 |
-
|
61 |
await invalidate_cache_key(model_cache_key)
|
62 |
await invalidate_cache_key(user_cache_key)
|
63 |
-
|
64 |
-
cache_stats = {
|
65 |
-
"Model_Cache": model_cache_key,
|
66 |
-
"User_Cache": user_cache_key
|
67 |
-
}
|
68 |
for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
|
69 |
logger.info(line)
|
70 |
-
|
71 |
except Exception as e:
|
72 |
logger.error(LogFormatter.error("Failed to invalidate cache", e))
|
73 |
-
|
74 |
# Add cache control headers
|
75 |
response.headers["Cache-Control"] = "no-cache"
|
76 |
-
|
77 |
return result
|
78 |
except Exception as e:
|
79 |
logger.error(LogFormatter.error("Failed to add vote", e))
|
80 |
raise HTTPException(status_code=400, detail=str(e))
|
81 |
|
|
|
82 |
@router.get("/model/{provider}/{model}")
|
83 |
@cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
|
84 |
async def get_model_votes(
|
85 |
-
response: Response,
|
86 |
-
provider: str,
|
87 |
-
model: str
|
88 |
) -> Dict[str, Any]:
|
89 |
"""Get all votes for a specific model"""
|
90 |
try:
|
@@ -92,35 +90,37 @@ async def get_model_votes(
|
|
92 |
await vote_service.initialize()
|
93 |
model_id = f"{provider}/{model}"
|
94 |
result = await vote_service.get_model_votes(model_id)
|
95 |
-
|
96 |
# Add cache control headers
|
97 |
response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
|
98 |
-
response.headers["Last-Modified"] = vote_service._last_sync.strftime(
|
99 |
-
|
|
|
|
|
100 |
logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
|
101 |
return result
|
102 |
except Exception as e:
|
103 |
logger.error(LogFormatter.error("Failed to get model votes", e))
|
104 |
raise HTTPException(status_code=400, detail=str(e))
|
105 |
|
|
|
106 |
@router.get("/user/{user_id}")
|
107 |
@cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
|
108 |
-
async def get_user_votes(
|
109 |
-
response: Response,
|
110 |
-
user_id: str
|
111 |
-
) -> List[Dict[str, Any]]:
|
112 |
"""Get all votes from a specific user"""
|
113 |
try:
|
114 |
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
115 |
await vote_service.initialize()
|
116 |
votes = await vote_service.get_user_votes(user_id)
|
117 |
-
|
118 |
# Add cache control headers
|
119 |
response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
|
120 |
-
response.headers["Last-Modified"] = vote_service._last_sync.strftime(
|
121 |
-
|
|
|
|
|
122 |
logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
|
123 |
return votes
|
124 |
except Exception as e:
|
125 |
logger.error(LogFormatter.error("Failed to get user votes", e))
|
126 |
-
raise HTTPException(status_code=400, detail=str(e))
|
|
|
1 |
+
from fastapi import APIRouter, HTTPException, Query, Response
|
2 |
from typing import Dict, Any, List
|
3 |
from app.services.votes import VoteService
|
4 |
from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
|
5 |
import logging
|
6 |
from app.core.formatting import LogFormatter
|
|
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
router = APIRouter()
|
|
|
11 |
|
12 |
CACHE_TTL = 30 # 30 seconds cache
|
13 |
|
14 |
+
|
15 |
def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
|
16 |
"""Build cache key for model votes"""
|
17 |
+
provider = kwargs.get("provider")
|
18 |
+
model = kwargs.get("model")
|
19 |
key = build_cache_key(namespace, provider, model)
|
20 |
logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
|
21 |
return key
|
22 |
|
23 |
+
|
24 |
def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
|
25 |
"""Build cache key for user votes"""
|
26 |
+
user_id = kwargs.get("user_id")
|
27 |
key = build_cache_key(namespace, user_id)
|
28 |
logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
|
29 |
return key
|
30 |
|
31 |
+
|
32 |
@router.post("/{model_id:path}")
|
33 |
async def add_vote(
|
34 |
response: Response,
|
35 |
model_id: str,
|
36 |
vote_type: str = Query(..., description="Type of vote (up/down)"),
|
37 |
user_id: str = Query(..., description="HuggingFace username"),
|
38 |
+
vote_data: Dict[str, Any] = None,
|
39 |
) -> Dict[str, Any]:
|
40 |
try:
|
41 |
logger.info(LogFormatter.section("ADDING VOTE"))
|
|
|
43 |
"Model": model_id,
|
44 |
"User": user_id,
|
45 |
"Type": vote_type,
|
46 |
+
"Config": vote_data or {},
|
47 |
}
|
48 |
for line in LogFormatter.tree(stats, "Vote Details"):
|
49 |
logger.info(line)
|
50 |
+
|
51 |
await vote_service.initialize()
|
52 |
result = await vote_service.add_vote(model_id, user_id, vote_type, vote_data)
|
53 |
+
|
54 |
# Invalidate affected caches
|
55 |
try:
|
56 |
logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
|
57 |
+
provider, model = model_id.split("/", 1)
|
58 |
+
|
59 |
# Build and invalidate cache keys
|
60 |
model_cache_key = build_cache_key("model_votes", provider, model)
|
61 |
user_cache_key = build_cache_key("user_votes", user_id)
|
62 |
+
|
63 |
await invalidate_cache_key(model_cache_key)
|
64 |
await invalidate_cache_key(user_cache_key)
|
65 |
+
|
66 |
+
cache_stats = {"Model_Cache": model_cache_key, "User_Cache": user_cache_key}
|
|
|
|
|
|
|
67 |
for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
|
68 |
logger.info(line)
|
69 |
+
|
70 |
except Exception as e:
|
71 |
logger.error(LogFormatter.error("Failed to invalidate cache", e))
|
72 |
+
|
73 |
# Add cache control headers
|
74 |
response.headers["Cache-Control"] = "no-cache"
|
75 |
+
|
76 |
return result
|
77 |
except Exception as e:
|
78 |
logger.error(LogFormatter.error("Failed to add vote", e))
|
79 |
raise HTTPException(status_code=400, detail=str(e))
|
80 |
|
81 |
+
|
82 |
@router.get("/model/{provider}/{model}")
|
83 |
@cached(expire=CACHE_TTL, key_builder=model_votes_key_builder)
|
84 |
async def get_model_votes(
|
85 |
+
response: Response, provider: str, model: str
|
|
|
|
|
86 |
) -> Dict[str, Any]:
|
87 |
"""Get all votes for a specific model"""
|
88 |
try:
|
|
|
90 |
await vote_service.initialize()
|
91 |
model_id = f"{provider}/{model}"
|
92 |
result = await vote_service.get_model_votes(model_id)
|
93 |
+
|
94 |
# Add cache control headers
|
95 |
response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
|
96 |
+
response.headers["Last-Modified"] = vote_service._last_sync.strftime(
|
97 |
+
"%a, %d %b %Y %H:%M:%S GMT"
|
98 |
+
)
|
99 |
+
|
100 |
logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
|
101 |
return result
|
102 |
except Exception as e:
|
103 |
logger.error(LogFormatter.error("Failed to get model votes", e))
|
104 |
raise HTTPException(status_code=400, detail=str(e))
|
105 |
|
106 |
+
|
107 |
@router.get("/user/{user_id}")
|
108 |
@cached(expire=CACHE_TTL, key_builder=user_votes_key_builder)
|
109 |
+
async def get_user_votes(response: Response, user_id: str) -> List[Dict[str, Any]]:
|
|
|
|
|
|
|
110 |
"""Get all votes from a specific user"""
|
111 |
try:
|
112 |
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
113 |
await vote_service.initialize()
|
114 |
votes = await vote_service.get_user_votes(user_id)
|
115 |
+
|
116 |
# Add cache control headers
|
117 |
response.headers["Cache-Control"] = f"max-age={CACHE_TTL}"
|
118 |
+
response.headers["Last-Modified"] = vote_service._last_sync.strftime(
|
119 |
+
"%a, %d %b %Y %H:%M:%S GMT"
|
120 |
+
)
|
121 |
+
|
122 |
logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
|
123 |
return votes
|
124 |
except Exception as e:
|
125 |
logger.error(LogFormatter.error("Failed to get user votes", e))
|
126 |
+
raise HTTPException(status_code=400, detail=str(e))
|
backend/app/api/router.py
CHANGED
@@ -6,4 +6,4 @@ router = APIRouter()
|
|
6 |
|
7 |
router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
|
8 |
router.include_router(votes.router, prefix="/votes", tags=["votes"])
|
9 |
-
router.include_router(models.router, prefix="/models", tags=["models"])
|
|
|
6 |
|
7 |
router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
|
8 |
router.include_router(votes.router, prefix="/votes", tags=["votes"])
|
9 |
+
router.include_router(models.router, prefix="/models", tags=["models"])
|
backend/app/asgi.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
"""
|
2 |
ASGI entry point for the Open LLM Leaderboard API.
|
3 |
"""
|
4 |
-
|
5 |
-
import uvicorn
|
6 |
import logging
|
7 |
import logging.config
|
8 |
from fastapi import FastAPI
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
10 |
from fastapi.middleware.gzip import GZipMiddleware
|
11 |
-
import sys
|
12 |
|
13 |
from app.api.router import router
|
14 |
from app.core.fastapi_cache import setup_cache
|
@@ -51,12 +49,12 @@ LOGGING_CONFIG = {
|
|
51 |
"handlers": ["default"],
|
52 |
"level": "WARNING",
|
53 |
"propagate": False,
|
54 |
-
}
|
55 |
},
|
56 |
"root": {
|
57 |
"handlers": ["default"],
|
58 |
"level": "WARNING",
|
59 |
-
}
|
60 |
}
|
61 |
|
62 |
# Apply logging configuration
|
@@ -85,22 +83,33 @@ app.add_middleware(GZipMiddleware, minimum_size=500)
|
|
85 |
# Include API router
|
86 |
app.include_router(router, prefix="/api")
|
87 |
|
|
|
88 |
@app.on_event("startup")
|
89 |
async def startup_event():
|
90 |
"""Initialize services on startup"""
|
91 |
logger.info("\n")
|
92 |
logger.info(LogFormatter.section("APPLICATION STARTUP"))
|
93 |
-
|
94 |
# Log HF configuration
|
95 |
logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
|
96 |
logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
|
97 |
-
logger.info(
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
|
100 |
logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
|
101 |
logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
|
102 |
-
logger.info(
|
103 |
-
|
|
|
|
|
|
|
|
|
104 |
# Setup cache
|
105 |
setup_cache()
|
106 |
-
logger.info(
|
|
|
|
|
|
1 |
"""
|
2 |
ASGI entry point for the Open LLM Leaderboard API.
|
3 |
"""
|
4 |
+
|
|
|
5 |
import logging
|
6 |
import logging.config
|
7 |
from fastapi import FastAPI
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
9 |
from fastapi.middleware.gzip import GZipMiddleware
|
|
|
10 |
|
11 |
from app.api.router import router
|
12 |
from app.core.fastapi_cache import setup_cache
|
|
|
49 |
"handlers": ["default"],
|
50 |
"level": "WARNING",
|
51 |
"propagate": False,
|
52 |
+
},
|
53 |
},
|
54 |
"root": {
|
55 |
"handlers": ["default"],
|
56 |
"level": "WARNING",
|
57 |
+
},
|
58 |
}
|
59 |
|
60 |
# Apply logging configuration
|
|
|
83 |
# Include API router
|
84 |
app.include_router(router, prefix="/api")
|
85 |
|
86 |
+
|
87 |
@app.on_event("startup")
|
88 |
async def startup_event():
|
89 |
"""Initialize services on startup"""
|
90 |
logger.info("\n")
|
91 |
logger.info(LogFormatter.section("APPLICATION STARTUP"))
|
92 |
+
|
93 |
# Log HF configuration
|
94 |
logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
|
95 |
logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
|
96 |
+
logger.info(
|
97 |
+
LogFormatter.info(
|
98 |
+
f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"
|
99 |
+
)
|
100 |
+
)
|
101 |
+
logger.info(LogFormatter.info("Using repositories:"))
|
102 |
logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
|
103 |
logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
|
104 |
logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
|
105 |
+
logger.info(
|
106 |
+
LogFormatter.info(
|
107 |
+
f" - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"
|
108 |
+
)
|
109 |
+
)
|
110 |
+
|
111 |
# Setup cache
|
112 |
setup_cache()
|
113 |
+
logger.info(
|
114 |
+
LogFormatter.success("FastAPI Cache initialized with in-memory backend")
|
115 |
+
)
|
backend/app/config/base.py
CHANGED
@@ -8,7 +8,9 @@ WORKERS = 4
|
|
8 |
RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
|
9 |
|
10 |
# CORS configuration
|
11 |
-
ORIGINS =
|
|
|
|
|
12 |
|
13 |
# Cache configuration
|
14 |
CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
|
@@ -23,7 +25,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
23 |
HF_ORGANIZATION = "stacklok"
|
24 |
API = {
|
25 |
"INFERENCE": "https://api-inference.huggingface.co/models",
|
26 |
-
"HUB": "https://huggingface.co"
|
27 |
}
|
28 |
|
29 |
# Cache paths
|
|
|
8 |
RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
|
9 |
|
10 |
# CORS configuration
|
11 |
+
ORIGINS = (
|
12 |
+
["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
|
13 |
+
)
|
14 |
|
15 |
# Cache configuration
|
16 |
CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
|
|
|
25 |
HF_ORGANIZATION = "stacklok"
|
26 |
API = {
|
27 |
"INFERENCE": "https://api-inference.huggingface.co/models",
|
28 |
+
"HUB": "https://huggingface.co",
|
29 |
}
|
30 |
|
31 |
# Cache paths
|
backend/app/config/hf_config.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
-
from typing import Optional
|
4 |
from huggingface_hub import HfApi
|
5 |
-
from pathlib import Path
|
6 |
from app.core.cache import cache_config
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
@@ -13,7 +11,9 @@ HF_ORGANIZATION = "stacklok"
|
|
13 |
# Get HF token directly from environment
|
14 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
15 |
if not HF_TOKEN:
|
16 |
-
logger.warning(
|
|
|
|
|
17 |
|
18 |
# Initialize HF API
|
19 |
API = HfApi(token=HF_TOKEN)
|
@@ -22,7 +22,7 @@ API = HfApi(token=HF_TOKEN)
|
|
22 |
QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
|
23 |
AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
|
24 |
VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
|
25 |
-
OFFICIAL_PROVIDERS_REPO =
|
26 |
|
27 |
logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
|
28 |
logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")
|
|
|
1 |
import os
|
2 |
import logging
|
|
|
3 |
from huggingface_hub import HfApi
|
|
|
4 |
from app.core.cache import cache_config
|
5 |
|
6 |
logger = logging.getLogger(__name__)
|
|
|
11 |
# Get HF token directly from environment
|
12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
13 |
if not HF_TOKEN:
|
14 |
+
logger.warning(
|
15 |
+
"HF_TOKEN not found in environment variables. Some features may be limited."
|
16 |
+
)
|
17 |
|
18 |
# Initialize HF API
|
19 |
API = HfApi(token=HF_TOKEN)
|
|
|
22 |
QUEUE_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-requests"
|
23 |
AGGREGATED_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-contents"
|
24 |
VOTES_REPO = f"{HF_ORGANIZATION}/llm-security-leaderboard-votes"
|
25 |
+
OFFICIAL_PROVIDERS_REPO = "open-llm-leaderboard/official-providers"
|
26 |
|
27 |
logger.info(f"QUEUE_REPO: {QUEUE_REPO}")
|
28 |
logger.info(f"AGGREGATED_REPO: {AGGREGATED_REPO}")
|
backend/app/config/logging_config.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
import logging
|
2 |
-
import sys
|
3 |
from tqdm import tqdm
|
4 |
|
|
|
5 |
def get_tqdm_handler():
|
6 |
"""
|
7 |
Creates a special handler for tqdm that doesn't interfere with other logs.
|
8 |
"""
|
|
|
9 |
class TqdmLoggingHandler(logging.Handler):
|
10 |
def emit(self, record):
|
11 |
try:
|
@@ -17,22 +18,25 @@ def get_tqdm_handler():
|
|
17 |
|
18 |
return TqdmLoggingHandler()
|
19 |
|
|
|
20 |
def setup_service_logger(service_name: str) -> logging.Logger:
|
21 |
"""
|
22 |
Configure a specific logger for a given service.
|
23 |
"""
|
24 |
logger = logging.getLogger(f"app.services.{service_name}")
|
25 |
-
|
26 |
# If the logger already has handlers, don't reconfigure it
|
27 |
if logger.handlers:
|
28 |
return logger
|
29 |
-
|
30 |
# Add tqdm handler for this service
|
31 |
tqdm_handler = get_tqdm_handler()
|
32 |
-
tqdm_handler.setFormatter(
|
|
|
|
|
33 |
logger.addHandler(tqdm_handler)
|
34 |
-
|
35 |
# Don't propagate logs to parent loggers
|
36 |
logger.propagate = False
|
37 |
-
|
38 |
-
return logger
|
|
|
1 |
import logging
|
|
|
2 |
from tqdm import tqdm
|
3 |
|
4 |
+
|
5 |
def get_tqdm_handler():
|
6 |
"""
|
7 |
Creates a special handler for tqdm that doesn't interfere with other logs.
|
8 |
"""
|
9 |
+
|
10 |
class TqdmLoggingHandler(logging.Handler):
|
11 |
def emit(self, record):
|
12 |
try:
|
|
|
18 |
|
19 |
return TqdmLoggingHandler()
|
20 |
|
21 |
+
|
22 |
def setup_service_logger(service_name: str) -> logging.Logger:
|
23 |
"""
|
24 |
Configure a specific logger for a given service.
|
25 |
"""
|
26 |
logger = logging.getLogger(f"app.services.{service_name}")
|
27 |
+
|
28 |
# If the logger already has handlers, don't reconfigure it
|
29 |
if logger.handlers:
|
30 |
return logger
|
31 |
+
|
32 |
# Add tqdm handler for this service
|
33 |
tqdm_handler = get_tqdm_handler()
|
34 |
+
tqdm_handler.setFormatter(
|
35 |
+
logging.Formatter("%(name)s - %(levelname)s - %(message)s")
|
36 |
+
)
|
37 |
logger.addHandler(tqdm_handler)
|
38 |
+
|
39 |
# Don't propagate logs to parent loggers
|
40 |
logger.propagate = False
|
41 |
+
|
42 |
+
return logger
|
backend/app/core/cache.py
CHANGED
@@ -10,11 +10,12 @@ from app.config.base import (
|
|
10 |
MODELS_CACHE,
|
11 |
VOTES_CACHE,
|
12 |
EVAL_CACHE,
|
13 |
-
CACHE_TTL
|
14 |
)
|
15 |
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
|
|
18 |
class CacheConfig:
|
19 |
def __init__(self):
|
20 |
# Get cache paths from config
|
@@ -23,59 +24,60 @@ class CacheConfig:
|
|
23 |
self.models_cache = MODELS_CACHE
|
24 |
self.votes_cache = VOTES_CACHE
|
25 |
self.eval_cache = EVAL_CACHE
|
26 |
-
|
27 |
# Specific files
|
28 |
self.votes_file = self.votes_cache / "votes_data.jsonl"
|
29 |
self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
|
30 |
-
|
31 |
# Cache TTL
|
32 |
self.cache_ttl = timedelta(seconds=CACHE_TTL)
|
33 |
-
|
34 |
self._initialize_cache_dirs()
|
35 |
self._setup_environment()
|
36 |
-
|
37 |
def _initialize_cache_dirs(self):
|
38 |
"""Initialize all necessary cache directories"""
|
39 |
try:
|
40 |
logger.info(LogFormatter.section("CACHE INITIALIZATION"))
|
41 |
-
|
42 |
cache_dirs = {
|
43 |
"Root": self.cache_root,
|
44 |
"Datasets": self.datasets_cache,
|
45 |
"Models": self.models_cache,
|
46 |
"Votes": self.votes_cache,
|
47 |
-
"Eval": self.eval_cache
|
48 |
}
|
49 |
-
|
50 |
for name, cache_dir in cache_dirs.items():
|
51 |
cache_dir.mkdir(parents=True, exist_ok=True)
|
52 |
-
logger.info(
|
53 |
-
|
|
|
|
|
54 |
except Exception as e:
|
55 |
logger.error(LogFormatter.error("Failed to create cache directories", e))
|
56 |
raise
|
57 |
-
|
58 |
def _setup_environment(self):
|
59 |
"""Configure HuggingFace environment variables"""
|
60 |
logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
|
61 |
|
62 |
env_vars = {
|
63 |
"HF_HOME": str(self.cache_root),
|
64 |
-
"HF_DATASETS_CACHE": str(self.datasets_cache)
|
65 |
}
|
66 |
|
67 |
for var, value in env_vars.items():
|
68 |
os.environ[var] = value
|
69 |
logger.info(LogFormatter.info(f"Set {var}={value}"))
|
70 |
|
71 |
-
|
72 |
def get_cache_path(self, cache_type: str) -> Path:
|
73 |
"""Returns the path for a specific cache type"""
|
74 |
cache_paths = {
|
75 |
"datasets": self.datasets_cache,
|
76 |
"models": self.models_cache,
|
77 |
"votes": self.votes_cache,
|
78 |
-
"eval": self.eval_cache
|
79 |
}
|
80 |
return cache_paths.get(cache_type, self.cache_root)
|
81 |
|
@@ -83,13 +85,12 @@ class CacheConfig:
|
|
83 |
"""Flush specified cache or all caches if no type is specified"""
|
84 |
try:
|
85 |
if cache_type:
|
86 |
-
logger.info(
|
|
|
|
|
87 |
cache_dir = self.get_cache_path(cache_type)
|
88 |
if cache_dir.exists():
|
89 |
-
stats = {
|
90 |
-
"Cache_Type": cache_type,
|
91 |
-
"Directory": str(cache_dir)
|
92 |
-
}
|
93 |
for line in LogFormatter.tree(stats, "Cache Details"):
|
94 |
logger.info(line)
|
95 |
shutil.rmtree(cache_dir)
|
@@ -100,10 +101,11 @@ class CacheConfig:
|
|
100 |
for cache_type in ["datasets", "models", "votes", "eval"]:
|
101 |
self.flush_cache(cache_type)
|
102 |
logger.info(LogFormatter.success("All caches cleared successfully"))
|
103 |
-
|
104 |
except Exception as e:
|
105 |
logger.error(LogFormatter.error("Failed to flush cache", e))
|
106 |
raise
|
107 |
|
|
|
108 |
# Singleton instance of cache configuration
|
109 |
-
cache_config = CacheConfig()
|
|
|
10 |
MODELS_CACHE,
|
11 |
VOTES_CACHE,
|
12 |
EVAL_CACHE,
|
13 |
+
CACHE_TTL,
|
14 |
)
|
15 |
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
+
|
19 |
class CacheConfig:
|
20 |
def __init__(self):
|
21 |
# Get cache paths from config
|
|
|
24 |
self.models_cache = MODELS_CACHE
|
25 |
self.votes_cache = VOTES_CACHE
|
26 |
self.eval_cache = EVAL_CACHE
|
27 |
+
|
28 |
# Specific files
|
29 |
self.votes_file = self.votes_cache / "votes_data.jsonl"
|
30 |
self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
|
31 |
+
|
32 |
# Cache TTL
|
33 |
self.cache_ttl = timedelta(seconds=CACHE_TTL)
|
34 |
+
|
35 |
self._initialize_cache_dirs()
|
36 |
self._setup_environment()
|
37 |
+
|
38 |
def _initialize_cache_dirs(self):
|
39 |
"""Initialize all necessary cache directories"""
|
40 |
try:
|
41 |
logger.info(LogFormatter.section("CACHE INITIALIZATION"))
|
42 |
+
|
43 |
cache_dirs = {
|
44 |
"Root": self.cache_root,
|
45 |
"Datasets": self.datasets_cache,
|
46 |
"Models": self.models_cache,
|
47 |
"Votes": self.votes_cache,
|
48 |
+
"Eval": self.eval_cache,
|
49 |
}
|
50 |
+
|
51 |
for name, cache_dir in cache_dirs.items():
|
52 |
cache_dir.mkdir(parents=True, exist_ok=True)
|
53 |
+
logger.info(
|
54 |
+
LogFormatter.success(f"{name} cache directory: {cache_dir}")
|
55 |
+
)
|
56 |
+
|
57 |
except Exception as e:
|
58 |
logger.error(LogFormatter.error("Failed to create cache directories", e))
|
59 |
raise
|
60 |
+
|
61 |
def _setup_environment(self):
|
62 |
"""Configure HuggingFace environment variables"""
|
63 |
logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
|
64 |
|
65 |
env_vars = {
|
66 |
"HF_HOME": str(self.cache_root),
|
67 |
+
"HF_DATASETS_CACHE": str(self.datasets_cache),
|
68 |
}
|
69 |
|
70 |
for var, value in env_vars.items():
|
71 |
os.environ[var] = value
|
72 |
logger.info(LogFormatter.info(f"Set {var}={value}"))
|
73 |
|
|
|
74 |
def get_cache_path(self, cache_type: str) -> Path:
|
75 |
"""Returns the path for a specific cache type"""
|
76 |
cache_paths = {
|
77 |
"datasets": self.datasets_cache,
|
78 |
"models": self.models_cache,
|
79 |
"votes": self.votes_cache,
|
80 |
+
"eval": self.eval_cache,
|
81 |
}
|
82 |
return cache_paths.get(cache_type, self.cache_root)
|
83 |
|
|
|
85 |
"""Flush specified cache or all caches if no type is specified"""
|
86 |
try:
|
87 |
if cache_type:
|
88 |
+
logger.info(
|
89 |
+
LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE")
|
90 |
+
)
|
91 |
cache_dir = self.get_cache_path(cache_type)
|
92 |
if cache_dir.exists():
|
93 |
+
stats = {"Cache_Type": cache_type, "Directory": str(cache_dir)}
|
|
|
|
|
|
|
94 |
for line in LogFormatter.tree(stats, "Cache Details"):
|
95 |
logger.info(line)
|
96 |
shutil.rmtree(cache_dir)
|
|
|
101 |
for cache_type in ["datasets", "models", "votes", "eval"]:
|
102 |
self.flush_cache(cache_type)
|
103 |
logger.info(LogFormatter.success("All caches cleared successfully"))
|
104 |
+
|
105 |
except Exception as e:
|
106 |
logger.error(LogFormatter.error("Failed to flush cache", e))
|
107 |
raise
|
108 |
|
109 |
+
|
110 |
# Singleton instance of cache configuration
|
111 |
+
cache_config = CacheConfig()
|
backend/app/core/fastapi_cache.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
from fastapi_cache import FastAPICache
|
2 |
from fastapi_cache.backends.inmemory import InMemoryBackend
|
3 |
from fastapi_cache.decorator import cache
|
4 |
-
from datetime import timedelta
|
5 |
from app.config import CACHE_TTL
|
6 |
import logging
|
7 |
from app.core.formatting import LogFormatter
|
@@ -9,6 +8,7 @@ from typing import Optional, Any
|
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
|
|
12 |
class CustomInMemoryBackend(InMemoryBackend):
|
13 |
def __init__(self):
|
14 |
"""Initialize the cache backend"""
|
@@ -23,7 +23,9 @@ class CustomInMemoryBackend(InMemoryBackend):
|
|
23 |
return True
|
24 |
return False
|
25 |
except Exception as e:
|
26 |
-
logger.error(
|
|
|
|
|
27 |
return False
|
28 |
|
29 |
async def get(self, key: str) -> Any:
|
@@ -34,43 +36,43 @@ class CustomInMemoryBackend(InMemoryBackend):
|
|
34 |
"""Set a value in the cache"""
|
35 |
self.cache[key] = value
|
36 |
|
|
|
37 |
def setup_cache():
|
38 |
"""Initialize FastAPI Cache with in-memory backend"""
|
39 |
try:
|
40 |
logger.info(LogFormatter.section("CACHE INITIALIZATION"))
|
41 |
-
FastAPICache.init(
|
42 |
-
backend=CustomInMemoryBackend(),
|
43 |
-
prefix="fastapi-cache"
|
44 |
-
)
|
45 |
logger.info(LogFormatter.success("Cache initialized successfully"))
|
46 |
except Exception as e:
|
47 |
logger.error(LogFormatter.error("Failed to initialize cache", e))
|
48 |
raise
|
49 |
|
|
|
50 |
async def invalidate_cache_key(key: str):
|
51 |
"""Invalidate a specific cache key"""
|
52 |
try:
|
53 |
backend = FastAPICache.get_backend()
|
54 |
-
if hasattr(backend,
|
55 |
await backend.delete(key)
|
56 |
logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
|
57 |
else:
|
58 |
-
logger.warning(
|
|
|
|
|
59 |
except Exception as e:
|
60 |
logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
|
61 |
|
|
|
62 |
def build_cache_key(*args) -> str:
|
63 |
"""Build a cache key from multiple arguments"""
|
64 |
return ":".join(str(arg) for arg in args if arg is not None)
|
65 |
|
|
|
66 |
def cached(expire: int = CACHE_TTL, key_builder=None):
|
67 |
"""Decorator for caching endpoint responses
|
68 |
-
|
69 |
Args:
|
70 |
expire (int): Cache TTL in seconds
|
71 |
key_builder (callable, optional): Custom key builder function
|
72 |
"""
|
73 |
-
return cache(
|
74 |
-
expire=expire,
|
75 |
-
key_builder=key_builder
|
76 |
-
)
|
|
|
1 |
from fastapi_cache import FastAPICache
|
2 |
from fastapi_cache.backends.inmemory import InMemoryBackend
|
3 |
from fastapi_cache.decorator import cache
|
|
|
4 |
from app.config import CACHE_TTL
|
5 |
import logging
|
6 |
from app.core.formatting import LogFormatter
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
+
|
12 |
class CustomInMemoryBackend(InMemoryBackend):
|
13 |
def __init__(self):
|
14 |
"""Initialize the cache backend"""
|
|
|
23 |
return True
|
24 |
return False
|
25 |
except Exception as e:
|
26 |
+
logger.error(
|
27 |
+
LogFormatter.error(f"Failed to delete key {key} from cache", e)
|
28 |
+
)
|
29 |
return False
|
30 |
|
31 |
async def get(self, key: str) -> Any:
|
|
|
36 |
"""Set a value in the cache"""
|
37 |
self.cache[key] = value
|
38 |
|
39 |
+
|
40 |
def setup_cache():
|
41 |
"""Initialize FastAPI Cache with in-memory backend"""
|
42 |
try:
|
43 |
logger.info(LogFormatter.section("CACHE INITIALIZATION"))
|
44 |
+
FastAPICache.init(backend=CustomInMemoryBackend(), prefix="fastapi-cache")
|
|
|
|
|
|
|
45 |
logger.info(LogFormatter.success("Cache initialized successfully"))
|
46 |
except Exception as e:
|
47 |
logger.error(LogFormatter.error("Failed to initialize cache", e))
|
48 |
raise
|
49 |
|
50 |
+
|
51 |
async def invalidate_cache_key(key: str):
|
52 |
"""Invalidate a specific cache key"""
|
53 |
try:
|
54 |
backend = FastAPICache.get_backend()
|
55 |
+
if hasattr(backend, "delete"):
|
56 |
await backend.delete(key)
|
57 |
logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
|
58 |
else:
|
59 |
+
logger.warning(
|
60 |
+
LogFormatter.warning("Cache backend does not support deletion")
|
61 |
+
)
|
62 |
except Exception as e:
|
63 |
logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
|
64 |
|
65 |
+
|
66 |
def build_cache_key(*args) -> str:
|
67 |
"""Build a cache key from multiple arguments"""
|
68 |
return ":".join(str(arg) for arg in args if arg is not None)
|
69 |
|
70 |
+
|
71 |
def cached(expire: int = CACHE_TTL, key_builder=None):
|
72 |
"""Decorator for caching endpoint responses
|
73 |
+
|
74 |
Args:
|
75 |
expire (int): Cache TTL in seconds
|
76 |
key_builder (callable, optional): Custom key builder function
|
77 |
"""
|
78 |
+
return cache(expire=expire, key_builder=key_builder)
|
|
|
|
|
|
backend/app/core/formatting.py
CHANGED
@@ -3,48 +3,49 @@ from typing import Dict, Any, List, Optional
|
|
3 |
|
4 |
logger = logging.getLogger(__name__)
|
5 |
|
|
|
6 |
class LogFormatter:
|
7 |
"""Utility class for consistent log formatting across the application"""
|
8 |
-
|
9 |
@staticmethod
|
10 |
def section(title: str) -> str:
|
11 |
"""Create a section header"""
|
12 |
-
return f"\n{'='*20} {title.upper()} {'='*20}"
|
13 |
-
|
14 |
@staticmethod
|
15 |
def subsection(title: str) -> str:
|
16 |
"""Create a subsection header"""
|
17 |
-
return f"\n{'─'*20} {title} {'─'*20}"
|
18 |
-
|
19 |
@staticmethod
|
20 |
def tree(items: Dict[str, Any], title: str = None) -> List[str]:
|
21 |
"""Create a tree view of dictionary data"""
|
22 |
lines = []
|
23 |
if title:
|
24 |
lines.append(f"📊 {title}:")
|
25 |
-
|
26 |
# Get the maximum length for alignment
|
27 |
max_key_length = max(len(str(k)) for k in items.keys())
|
28 |
-
|
29 |
# Format each item
|
30 |
for i, (key, value) in enumerate(items.items()):
|
31 |
prefix = "└──" if i == len(items) - 1 else "├──"
|
32 |
if isinstance(value, (int, float)):
|
33 |
value = f"{value:,}" # Add thousand separators
|
34 |
lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
|
35 |
-
|
36 |
return lines
|
37 |
-
|
38 |
@staticmethod
|
39 |
def stats(stats: Dict[str, int], title: str = None) -> List[str]:
|
40 |
"""Format statistics with icons"""
|
41 |
lines = []
|
42 |
if title:
|
43 |
lines.append(f"📊 {title}:")
|
44 |
-
|
45 |
# Get the maximum length for alignment
|
46 |
max_key_length = max(len(str(k)) for k in stats.keys())
|
47 |
-
|
48 |
# Format each stat with an appropriate icon
|
49 |
icons = {
|
50 |
"total": "📌",
|
@@ -59,19 +60,19 @@ class LogFormatter:
|
|
59 |
"cached": "💾",
|
60 |
"size": "📏",
|
61 |
"time": "⏱️",
|
62 |
-
"rate": "🚀"
|
63 |
}
|
64 |
-
|
65 |
# Format each item
|
66 |
for i, (key, value) in enumerate(stats.items()):
|
67 |
prefix = "└──" if i == len(stats) - 1 else "├──"
|
68 |
-
icon = icons.get(key.lower().split(
|
69 |
if isinstance(value, (int, float)):
|
70 |
value = f"{value:,}" # Add thousand separators
|
71 |
lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
|
72 |
-
|
73 |
return lines
|
74 |
-
|
75 |
@staticmethod
|
76 |
def progress_bar(current: int, total: int, width: int = 20) -> str:
|
77 |
"""Create a progress bar"""
|
@@ -79,7 +80,7 @@ class LogFormatter:
|
|
79 |
filled = "█" * (percentage * width // 100)
|
80 |
empty = "░" * (width - len(filled))
|
81 |
return f"{filled}{empty} {percentage:3d}%"
|
82 |
-
|
83 |
@staticmethod
|
84 |
def error(message: str, error: Optional[Exception] = None) -> str:
|
85 |
"""Format error message"""
|
@@ -87,18 +88,18 @@ class LogFormatter:
|
|
87 |
if error:
|
88 |
error_msg += f"\n └── Details: {str(error)}"
|
89 |
return error_msg
|
90 |
-
|
91 |
@staticmethod
|
92 |
def success(message: str) -> str:
|
93 |
"""Format success message"""
|
94 |
return f"✅ {message}"
|
95 |
-
|
96 |
@staticmethod
|
97 |
def warning(message: str) -> str:
|
98 |
"""Format warning message"""
|
99 |
return f"⚠️ {message}"
|
100 |
-
|
101 |
@staticmethod
|
102 |
def info(message: str) -> str:
|
103 |
"""Format info message"""
|
104 |
-
return f"ℹ️ {message}"
|
|
|
3 |
|
4 |
logger = logging.getLogger(__name__)
|
5 |
|
6 |
+
|
7 |
class LogFormatter:
|
8 |
"""Utility class for consistent log formatting across the application"""
|
9 |
+
|
10 |
@staticmethod
|
11 |
def section(title: str) -> str:
|
12 |
"""Create a section header"""
|
13 |
+
return f"\n{'=' * 20} {title.upper()} {'=' * 20}"
|
14 |
+
|
15 |
@staticmethod
|
16 |
def subsection(title: str) -> str:
|
17 |
"""Create a subsection header"""
|
18 |
+
return f"\n{'─' * 20} {title} {'─' * 20}"
|
19 |
+
|
20 |
@staticmethod
|
21 |
def tree(items: Dict[str, Any], title: str = None) -> List[str]:
|
22 |
"""Create a tree view of dictionary data"""
|
23 |
lines = []
|
24 |
if title:
|
25 |
lines.append(f"📊 {title}:")
|
26 |
+
|
27 |
# Get the maximum length for alignment
|
28 |
max_key_length = max(len(str(k)) for k in items.keys())
|
29 |
+
|
30 |
# Format each item
|
31 |
for i, (key, value) in enumerate(items.items()):
|
32 |
prefix = "└──" if i == len(items) - 1 else "├──"
|
33 |
if isinstance(value, (int, float)):
|
34 |
value = f"{value:,}" # Add thousand separators
|
35 |
lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
|
36 |
+
|
37 |
return lines
|
38 |
+
|
39 |
@staticmethod
|
40 |
def stats(stats: Dict[str, int], title: str = None) -> List[str]:
|
41 |
"""Format statistics with icons"""
|
42 |
lines = []
|
43 |
if title:
|
44 |
lines.append(f"📊 {title}:")
|
45 |
+
|
46 |
# Get the maximum length for alignment
|
47 |
max_key_length = max(len(str(k)) for k in stats.keys())
|
48 |
+
|
49 |
# Format each stat with an appropriate icon
|
50 |
icons = {
|
51 |
"total": "📌",
|
|
|
60 |
"cached": "💾",
|
61 |
"size": "📏",
|
62 |
"time": "⏱️",
|
63 |
+
"rate": "🚀",
|
64 |
}
|
65 |
+
|
66 |
# Format each item
|
67 |
for i, (key, value) in enumerate(stats.items()):
|
68 |
prefix = "└──" if i == len(stats) - 1 else "├──"
|
69 |
+
icon = icons.get(key.lower().split("_")[0], "•")
|
70 |
if isinstance(value, (int, float)):
|
71 |
value = f"{value:,}" # Add thousand separators
|
72 |
lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
|
73 |
+
|
74 |
return lines
|
75 |
+
|
76 |
@staticmethod
|
77 |
def progress_bar(current: int, total: int, width: int = 20) -> str:
|
78 |
"""Create a progress bar"""
|
|
|
80 |
filled = "█" * (percentage * width // 100)
|
81 |
empty = "░" * (width - len(filled))
|
82 |
return f"{filled}{empty} {percentage:3d}%"
|
83 |
+
|
84 |
@staticmethod
|
85 |
def error(message: str, error: Optional[Exception] = None) -> str:
|
86 |
"""Format error message"""
|
|
|
88 |
if error:
|
89 |
error_msg += f"\n └── Details: {str(error)}"
|
90 |
return error_msg
|
91 |
+
|
92 |
@staticmethod
|
93 |
def success(message: str) -> str:
|
94 |
"""Format success message"""
|
95 |
return f"✅ {message}"
|
96 |
+
|
97 |
@staticmethod
|
98 |
def warning(message: str) -> str:
|
99 |
"""Format warning message"""
|
100 |
return f"⚠️ {message}"
|
101 |
+
|
102 |
@staticmethod
|
103 |
def info(message: str) -> str:
|
104 |
"""Format info message"""
|
105 |
+
return f"ℹ️ {message}"
|
backend/app/main.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
from app.config.logging_config import setup_logging
|
|
|
3 |
import logging
|
4 |
|
5 |
# Initialize logging configuration
|
@@ -8,11 +9,11 @@ logger = logging.getLogger(__name__)
|
|
8 |
|
9 |
app = FastAPI(title="Open LLM Leaderboard API")
|
10 |
|
|
|
11 |
@app.on_event("startup")
|
12 |
async def startup_event():
|
13 |
logger.info("Starting up the application...")
|
14 |
|
15 |
-
|
16 |
-
from app.api import models, votes
|
17 |
app.include_router(models.router, prefix="/api", tags=["models"])
|
18 |
-
app.include_router(votes.router, prefix="/api", tags=["votes"])
|
|
|
1 |
from fastapi import FastAPI
|
2 |
from app.config.logging_config import setup_logging
|
3 |
+
from app.api import models, votes
|
4 |
import logging
|
5 |
|
6 |
# Initialize logging configuration
|
|
|
9 |
|
10 |
app = FastAPI(title="Open LLM Leaderboard API")
|
11 |
|
12 |
+
|
13 |
@app.on_event("startup")
|
14 |
async def startup_event():
|
15 |
logger.info("Starting up the application...")
|
16 |
|
17 |
+
|
|
|
18 |
app.include_router(models.router, prefix="/api", tags=["models"])
|
19 |
+
app.include_router(votes.router, prefix="/api", tags=["votes"])
|
backend/app/services/hf_service.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from typing import Optional
|
2 |
-
from huggingface_hub import HfApi
|
3 |
from app.config import HF_TOKEN, API
|
4 |
from app.core.cache import cache_config
|
5 |
from app.core.formatting import LogFormatter
|
@@ -7,6 +6,7 @@ import logging
|
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
|
|
10 |
class HuggingFaceService:
|
11 |
def __init__(self):
|
12 |
self.api = API
|
@@ -31,7 +31,11 @@ class HuggingFaceService:
|
|
31 |
try:
|
32 |
logger.info(LogFormatter.info("Fetching user information..."))
|
33 |
info = self.api.get_token_permission()
|
34 |
-
logger.info(
|
|
|
|
|
|
|
|
|
35 |
return info
|
36 |
except Exception as e:
|
37 |
logger.error(LogFormatter.error("Failed to get user info", e))
|
@@ -39,7 +43,9 @@ class HuggingFaceService:
|
|
39 |
|
40 |
def _log_repo_operation(self, operation: str, repo: str, details: str = None):
|
41 |
"""Helper to log repository operations"""
|
42 |
-
logger.info(
|
|
|
|
|
43 |
stats = {
|
44 |
"Operation": operation,
|
45 |
"Repository": repo,
|
@@ -47,4 +53,4 @@ class HuggingFaceService:
|
|
47 |
if details:
|
48 |
stats["Details"] = details
|
49 |
for line in LogFormatter.tree(stats):
|
50 |
-
logger.info(line)
|
|
|
1 |
from typing import Optional
|
|
|
2 |
from app.config import HF_TOKEN, API
|
3 |
from app.core.cache import cache_config
|
4 |
from app.core.formatting import LogFormatter
|
|
|
6 |
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
+
|
10 |
class HuggingFaceService:
|
11 |
def __init__(self):
|
12 |
self.api = API
|
|
|
31 |
try:
|
32 |
logger.info(LogFormatter.info("Fetching user information..."))
|
33 |
info = self.api.get_token_permission()
|
34 |
+
logger.info(
|
35 |
+
LogFormatter.success(
|
36 |
+
f"User info retrieved for: {info.get('user', 'Unknown')}"
|
37 |
+
)
|
38 |
+
)
|
39 |
return info
|
40 |
except Exception as e:
|
41 |
logger.error(LogFormatter.error("Failed to get user info", e))
|
|
|
43 |
|
44 |
def _log_repo_operation(self, operation: str, repo: str, details: str = None):
|
45 |
"""Helper to log repository operations"""
|
46 |
+
logger.info(
|
47 |
+
LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}")
|
48 |
+
)
|
49 |
stats = {
|
50 |
"Operation": operation,
|
51 |
"Repository": repo,
|
|
|
53 |
if details:
|
54 |
stats["Details"] = details
|
55 |
for line in LogFormatter.tree(stats):
|
56 |
+
logger.info(line)
|
backend/app/services/leaderboard.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from app.core.cache import cache_config
|
2 |
-
from datetime import datetime
|
3 |
from typing import List, Dict, Any
|
4 |
import datasets
|
5 |
from fastapi import HTTPException
|
@@ -9,33 +8,38 @@ from app.core.formatting import LogFormatter
|
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
|
|
12 |
class LeaderboardService:
|
13 |
def __init__(self):
|
14 |
pass
|
15 |
-
|
16 |
async def fetch_raw_data(self) -> List[Dict[str, Any]]:
|
17 |
"""Fetch raw leaderboard data from HuggingFace dataset"""
|
18 |
try:
|
19 |
logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
|
20 |
-
logger.info(
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
dataset = datasets.load_dataset(
|
23 |
f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
24 |
-
cache_dir=cache_config.get_cache_path("datasets")
|
25 |
)["train"]
|
26 |
-
|
27 |
df = dataset.to_pandas()
|
28 |
-
data = df.to_dict(
|
29 |
-
|
30 |
stats = {
|
31 |
"Total_Entries": len(data),
|
32 |
-
"Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
|
33 |
}
|
34 |
for line in LogFormatter.stats(stats, "Dataset Statistics"):
|
35 |
logger.info(line)
|
36 |
-
|
37 |
return data
|
38 |
-
|
39 |
except Exception as e:
|
40 |
logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
|
41 |
raise HTTPException(status_code=500, detail=str(e))
|
@@ -44,53 +48,60 @@ class LeaderboardService:
|
|
44 |
"""Get formatted leaderboard data"""
|
45 |
try:
|
46 |
logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
|
47 |
-
|
48 |
raw_data = await self.fetch_raw_data()
|
49 |
formatted_data = []
|
50 |
type_counts = {}
|
51 |
error_count = 0
|
52 |
-
|
53 |
# Initialize progress tracking
|
54 |
total_items = len(raw_data)
|
55 |
logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
|
56 |
-
|
57 |
for i, item in enumerate(raw_data, 1):
|
58 |
try:
|
59 |
formatted_item = await self.transform_data(item)
|
60 |
formatted_data.append(formatted_item)
|
61 |
-
|
62 |
# Count model types
|
63 |
model_type = formatted_item["model"]["type"]
|
64 |
type_counts[model_type] = type_counts.get(model_type, 0) + 1
|
65 |
-
|
66 |
except Exception as e:
|
67 |
error_count += 1
|
68 |
-
logger.error(
|
|
|
|
|
|
|
|
|
69 |
continue
|
70 |
-
|
71 |
# Log progress every 10%
|
72 |
if i % max(1, total_items // 10) == 0:
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
76 |
# Log final statistics
|
77 |
stats = {
|
78 |
"Total_Processed": total_items,
|
79 |
"Successful": len(formatted_data),
|
80 |
-
"Failed": error_count
|
81 |
}
|
82 |
logger.info(LogFormatter.section("PROCESSING SUMMARY"))
|
83 |
for line in LogFormatter.stats(stats, "Processing Statistics"):
|
84 |
logger.info(line)
|
85 |
-
|
86 |
# Log model type distribution
|
87 |
type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
|
88 |
logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
|
89 |
for line in LogFormatter.stats(type_stats):
|
90 |
logger.info(line)
|
91 |
-
|
92 |
return formatted_data
|
93 |
-
|
94 |
except Exception as e:
|
95 |
logger.error(LogFormatter.error("Failed to format leaderboard data", e))
|
96 |
raise HTTPException(status_code=500, detail=str(e))
|
@@ -100,42 +111,44 @@ class LeaderboardService:
|
|
100 |
try:
|
101 |
# Extract model name for logging
|
102 |
model_name = data.get("fullname", "Unknown")
|
103 |
-
logger.debug(
|
104 |
-
|
|
|
|
|
105 |
# Create unique ID combining model name, precision, sha and chat template status
|
106 |
unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
|
107 |
-
|
108 |
evaluations = {
|
109 |
"ifeval": {
|
110 |
"name": "IFEval",
|
111 |
"value": data.get("IFEval Raw", 0),
|
112 |
-
"normalized_score": data.get("IFEval", 0)
|
113 |
},
|
114 |
"bbh": {
|
115 |
"name": "BBH",
|
116 |
"value": data.get("BBH Raw", 0),
|
117 |
-
"normalized_score": data.get("BBH", 0)
|
118 |
},
|
119 |
"math": {
|
120 |
"name": "MATH Level 5",
|
121 |
"value": data.get("MATH Lvl 5 Raw", 0),
|
122 |
-
"normalized_score": data.get("MATH Lvl 5", 0)
|
123 |
},
|
124 |
"gpqa": {
|
125 |
"name": "GPQA",
|
126 |
"value": data.get("GPQA Raw", 0),
|
127 |
-
"normalized_score": data.get("GPQA", 0)
|
128 |
},
|
129 |
"musr": {
|
130 |
"name": "MUSR",
|
131 |
"value": data.get("MUSR Raw", 0),
|
132 |
-
"normalized_score": data.get("MUSR", 0)
|
133 |
},
|
134 |
"mmlu_pro": {
|
135 |
"name": "MMLU-PRO",
|
136 |
"value": data.get("MMLU-PRO Raw", 0),
|
137 |
-
"normalized_score": data.get("MMLU-PRO", 0)
|
138 |
-
}
|
139 |
}
|
140 |
|
141 |
features = {
|
@@ -143,7 +156,7 @@ class LeaderboardService:
|
|
143 |
"is_merged": data.get("Merged", False),
|
144 |
"is_moe": data.get("MoE", False),
|
145 |
"is_flagged": data.get("Flagged", False),
|
146 |
-
"is_official_provider": data.get("Official Providers", False)
|
147 |
}
|
148 |
|
149 |
metadata = {
|
@@ -154,18 +167,18 @@ class LeaderboardService:
|
|
154 |
"hub_license": data.get("Hub License"),
|
155 |
"hub_hearts": data.get("Hub ❤️"),
|
156 |
"params_billions": data.get("#Params (B)"),
|
157 |
-
"co2_cost": data.get("CO₂ cost (kg)", 0)
|
158 |
}
|
159 |
|
160 |
# Clean model type by removing emojis if present
|
161 |
original_type = data.get("Type", "")
|
162 |
model_type = original_type.lower().strip()
|
163 |
-
|
164 |
# Remove emojis and parentheses
|
165 |
if "(" in model_type:
|
166 |
model_type = model_type.split("(")[0].strip()
|
167 |
-
model_type =
|
168 |
-
|
169 |
# Map old model types to new ones
|
170 |
model_type_mapping = {
|
171 |
"fine-tuned": "fined-tuned-on-domain-specific-dataset",
|
@@ -175,14 +188,18 @@ class LeaderboardService:
|
|
175 |
"ft": "fined-tuned-on-domain-specific-dataset",
|
176 |
"finetuning": "fined-tuned-on-domain-specific-dataset",
|
177 |
"fine tuning": "fined-tuned-on-domain-specific-dataset",
|
178 |
-
"fine-tuning": "fined-tuned-on-domain-specific-dataset"
|
179 |
}
|
180 |
|
181 |
mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
|
182 |
-
|
183 |
if mapped_type != model_type:
|
184 |
-
logger.debug(
|
185 |
-
|
|
|
|
|
|
|
|
|
186 |
transformed_data = {
|
187 |
"id": unique_id,
|
188 |
"model": {
|
@@ -193,16 +210,22 @@ class LeaderboardService:
|
|
193 |
"weight_type": data.get("Weight type"),
|
194 |
"architecture": data.get("Architecture"),
|
195 |
"average_score": data.get("Average ⬆️"),
|
196 |
-
"has_chat_template": data.get("Chat Template", False)
|
197 |
},
|
198 |
"evaluations": evaluations,
|
199 |
"features": features,
|
200 |
-
"metadata": metadata
|
201 |
}
|
202 |
-
|
203 |
-
logger.debug(
|
|
|
|
|
204 |
return transformed_data
|
205 |
-
|
206 |
except Exception as e:
|
207 |
-
logger.error(
|
|
|
|
|
|
|
|
|
208 |
raise
|
|
|
1 |
from app.core.cache import cache_config
|
|
|
2 |
from typing import List, Dict, Any
|
3 |
import datasets
|
4 |
from fastapi import HTTPException
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
+
|
12 |
class LeaderboardService:
|
13 |
def __init__(self):
|
14 |
pass
|
15 |
+
|
16 |
async def fetch_raw_data(self) -> List[Dict[str, Any]]:
|
17 |
"""Fetch raw leaderboard data from HuggingFace dataset"""
|
18 |
try:
|
19 |
logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
|
20 |
+
logger.info(
|
21 |
+
LogFormatter.info(
|
22 |
+
f"Loading dataset from {HF_ORGANIZATION}/llm-security-leaderboard-contents"
|
23 |
+
)
|
24 |
+
)
|
25 |
+
|
26 |
dataset = datasets.load_dataset(
|
27 |
f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
28 |
+
cache_dir=cache_config.get_cache_path("datasets"),
|
29 |
)["train"]
|
30 |
+
|
31 |
df = dataset.to_pandas()
|
32 |
+
data = df.to_dict("records")
|
33 |
+
|
34 |
stats = {
|
35 |
"Total_Entries": len(data),
|
36 |
+
"Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB",
|
37 |
}
|
38 |
for line in LogFormatter.stats(stats, "Dataset Statistics"):
|
39 |
logger.info(line)
|
40 |
+
|
41 |
return data
|
42 |
+
|
43 |
except Exception as e:
|
44 |
logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
|
45 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
48 |
"""Get formatted leaderboard data"""
|
49 |
try:
|
50 |
logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
|
51 |
+
|
52 |
raw_data = await self.fetch_raw_data()
|
53 |
formatted_data = []
|
54 |
type_counts = {}
|
55 |
error_count = 0
|
56 |
+
|
57 |
# Initialize progress tracking
|
58 |
total_items = len(raw_data)
|
59 |
logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
|
60 |
+
|
61 |
for i, item in enumerate(raw_data, 1):
|
62 |
try:
|
63 |
formatted_item = await self.transform_data(item)
|
64 |
formatted_data.append(formatted_item)
|
65 |
+
|
66 |
# Count model types
|
67 |
model_type = formatted_item["model"]["type"]
|
68 |
type_counts[model_type] = type_counts.get(model_type, 0) + 1
|
69 |
+
|
70 |
except Exception as e:
|
71 |
error_count += 1
|
72 |
+
logger.error(
|
73 |
+
LogFormatter.error(
|
74 |
+
f"Failed to format entry {i}/{total_items}", e
|
75 |
+
)
|
76 |
+
)
|
77 |
continue
|
78 |
+
|
79 |
# Log progress every 10%
|
80 |
if i % max(1, total_items // 10) == 0:
|
81 |
+
logger.info(
|
82 |
+
LogFormatter.info(
|
83 |
+
f"Progress: {LogFormatter.progress_bar(i, total_items)}"
|
84 |
+
)
|
85 |
+
)
|
86 |
+
|
87 |
# Log final statistics
|
88 |
stats = {
|
89 |
"Total_Processed": total_items,
|
90 |
"Successful": len(formatted_data),
|
91 |
+
"Failed": error_count,
|
92 |
}
|
93 |
logger.info(LogFormatter.section("PROCESSING SUMMARY"))
|
94 |
for line in LogFormatter.stats(stats, "Processing Statistics"):
|
95 |
logger.info(line)
|
96 |
+
|
97 |
# Log model type distribution
|
98 |
type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
|
99 |
logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
|
100 |
for line in LogFormatter.stats(type_stats):
|
101 |
logger.info(line)
|
102 |
+
|
103 |
return formatted_data
|
104 |
+
|
105 |
except Exception as e:
|
106 |
logger.error(LogFormatter.error("Failed to format leaderboard data", e))
|
107 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
111 |
try:
|
112 |
# Extract model name for logging
|
113 |
model_name = data.get("fullname", "Unknown")
|
114 |
+
logger.debug(
|
115 |
+
LogFormatter.info(f"Transforming data for model: {model_name}")
|
116 |
+
)
|
117 |
+
|
118 |
# Create unique ID combining model name, precision, sha and chat template status
|
119 |
unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
|
120 |
+
|
121 |
evaluations = {
|
122 |
"ifeval": {
|
123 |
"name": "IFEval",
|
124 |
"value": data.get("IFEval Raw", 0),
|
125 |
+
"normalized_score": data.get("IFEval", 0),
|
126 |
},
|
127 |
"bbh": {
|
128 |
"name": "BBH",
|
129 |
"value": data.get("BBH Raw", 0),
|
130 |
+
"normalized_score": data.get("BBH", 0),
|
131 |
},
|
132 |
"math": {
|
133 |
"name": "MATH Level 5",
|
134 |
"value": data.get("MATH Lvl 5 Raw", 0),
|
135 |
+
"normalized_score": data.get("MATH Lvl 5", 0),
|
136 |
},
|
137 |
"gpqa": {
|
138 |
"name": "GPQA",
|
139 |
"value": data.get("GPQA Raw", 0),
|
140 |
+
"normalized_score": data.get("GPQA", 0),
|
141 |
},
|
142 |
"musr": {
|
143 |
"name": "MUSR",
|
144 |
"value": data.get("MUSR Raw", 0),
|
145 |
+
"normalized_score": data.get("MUSR", 0),
|
146 |
},
|
147 |
"mmlu_pro": {
|
148 |
"name": "MMLU-PRO",
|
149 |
"value": data.get("MMLU-PRO Raw", 0),
|
150 |
+
"normalized_score": data.get("MMLU-PRO", 0),
|
151 |
+
},
|
152 |
}
|
153 |
|
154 |
features = {
|
|
|
156 |
"is_merged": data.get("Merged", False),
|
157 |
"is_moe": data.get("MoE", False),
|
158 |
"is_flagged": data.get("Flagged", False),
|
159 |
+
"is_official_provider": data.get("Official Providers", False),
|
160 |
}
|
161 |
|
162 |
metadata = {
|
|
|
167 |
"hub_license": data.get("Hub License"),
|
168 |
"hub_hearts": data.get("Hub ❤️"),
|
169 |
"params_billions": data.get("#Params (B)"),
|
170 |
+
"co2_cost": data.get("CO₂ cost (kg)", 0),
|
171 |
}
|
172 |
|
173 |
# Clean model type by removing emojis if present
|
174 |
original_type = data.get("Type", "")
|
175 |
model_type = original_type.lower().strip()
|
176 |
+
|
177 |
# Remove emojis and parentheses
|
178 |
if "(" in model_type:
|
179 |
model_type = model_type.split("(")[0].strip()
|
180 |
+
model_type = "".join(c for c in model_type if c not in "🔶🟢🟩💬🤝🌸 ")
|
181 |
+
|
182 |
# Map old model types to new ones
|
183 |
model_type_mapping = {
|
184 |
"fine-tuned": "fined-tuned-on-domain-specific-dataset",
|
|
|
188 |
"ft": "fined-tuned-on-domain-specific-dataset",
|
189 |
"finetuning": "fined-tuned-on-domain-specific-dataset",
|
190 |
"fine tuning": "fined-tuned-on-domain-specific-dataset",
|
191 |
+
"fine-tuning": "fined-tuned-on-domain-specific-dataset",
|
192 |
}
|
193 |
|
194 |
mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
|
195 |
+
|
196 |
if mapped_type != model_type:
|
197 |
+
logger.debug(
|
198 |
+
LogFormatter.info(
|
199 |
+
f"Model type mapped: {original_type} -> {mapped_type}"
|
200 |
+
)
|
201 |
+
)
|
202 |
+
|
203 |
transformed_data = {
|
204 |
"id": unique_id,
|
205 |
"model": {
|
|
|
210 |
"weight_type": data.get("Weight type"),
|
211 |
"architecture": data.get("Architecture"),
|
212 |
"average_score": data.get("Average ⬆️"),
|
213 |
+
"has_chat_template": data.get("Chat Template", False),
|
214 |
},
|
215 |
"evaluations": evaluations,
|
216 |
"features": features,
|
217 |
+
"metadata": metadata,
|
218 |
}
|
219 |
+
|
220 |
+
logger.debug(
|
221 |
+
LogFormatter.success(f"Successfully transformed data for {model_name}")
|
222 |
+
)
|
223 |
return transformed_data
|
224 |
+
|
225 |
except Exception as e:
|
226 |
+
logger.error(
|
227 |
+
LogFormatter.error(
|
228 |
+
f"Failed to transform data for {data.get('fullname', 'Unknown')}", e
|
229 |
+
)
|
230 |
+
)
|
231 |
raise
|
backend/app/services/models.py
CHANGED
@@ -5,22 +5,16 @@ import os
|
|
5 |
from pathlib import Path
|
6 |
import logging
|
7 |
import aiohttp
|
8 |
-
import asyncio
|
9 |
import time
|
10 |
-
from huggingface_hub import HfApi
|
11 |
from huggingface_hub.utils import build_hf_headers
|
12 |
from datasets import disable_progress_bar
|
13 |
import sys
|
14 |
import contextlib
|
15 |
-
from concurrent.futures import ThreadPoolExecutor
|
16 |
import tempfile
|
17 |
|
18 |
-
from app.config import
|
19 |
-
|
20 |
-
HF_TOKEN,
|
21 |
-
EVAL_REQUESTS_PATH
|
22 |
-
)
|
23 |
-
from app.config.hf_config import HF_ORGANIZATION, QUEUE_REPO
|
24 |
from app.services.hf_service import HuggingFaceService
|
25 |
from app.utils.model_validation import ModelValidator
|
26 |
from app.services.votes import VoteService
|
@@ -32,12 +26,13 @@ disable_progress_bar()
|
|
32 |
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
|
|
35 |
# Context manager to temporarily disable stdout and stderr
|
36 |
@contextlib.contextmanager
|
37 |
def suppress_output():
|
38 |
stdout = sys.stdout
|
39 |
stderr = sys.stderr
|
40 |
-
devnull = open(os.devnull,
|
41 |
try:
|
42 |
sys.stdout = devnull
|
43 |
sys.stderr = devnull
|
@@ -47,6 +42,7 @@ def suppress_output():
|
|
47 |
sys.stderr = stderr
|
48 |
devnull.close()
|
49 |
|
|
|
50 |
class ProgressTracker:
|
51 |
def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
|
52 |
self.total = total
|
@@ -55,57 +51,63 @@ class ProgressTracker:
|
|
55 |
self.start_time = time.time()
|
56 |
self.update_frequency = update_frequency # Percentage steps
|
57 |
self.last_update = -1
|
58 |
-
|
59 |
# Initial log with fancy formatting
|
60 |
logger.info(LogFormatter.section(desc))
|
61 |
logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
|
62 |
sys.stdout.flush()
|
63 |
-
|
64 |
def update(self, n: int = 1):
|
65 |
self.current += n
|
66 |
current_percentage = (self.current * 100) // self.total
|
67 |
-
|
68 |
# Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
|
69 |
-
if
|
|
|
|
|
|
|
70 |
elapsed = time.time() - self.start_time
|
71 |
rate = self.current / elapsed if elapsed > 0 else 0
|
72 |
remaining = (self.total - self.current) / rate if rate > 0 else 0
|
73 |
-
|
74 |
# Create progress stats
|
75 |
stats = {
|
76 |
"Progress": LogFormatter.progress_bar(self.current, self.total),
|
77 |
"Items": f"{self.current:,}/{self.total:,}",
|
78 |
"Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
|
79 |
-
"Rate": f"🚀 {rate:.1f} items/s"
|
80 |
}
|
81 |
-
|
82 |
# Log progress using tree format
|
83 |
for line in LogFormatter.tree(stats):
|
84 |
logger.info(line)
|
85 |
sys.stdout.flush()
|
86 |
-
|
87 |
-
self.last_update = (
|
88 |
-
|
|
|
|
|
89 |
def close(self):
|
90 |
elapsed = time.time() - self.start_time
|
91 |
rate = self.total / elapsed if elapsed > 0 else 0
|
92 |
-
|
93 |
# Final summary with fancy formatting
|
94 |
logger.info(LogFormatter.section("COMPLETED"))
|
95 |
stats = {
|
96 |
"Total": f"{self.total:,} items",
|
97 |
"Time": f"{elapsed:.1f}s",
|
98 |
-
"Rate": f"{rate:.1f} items/s"
|
99 |
}
|
100 |
for line in LogFormatter.stats(stats):
|
101 |
logger.info(line)
|
102 |
-
logger.info("="*50)
|
103 |
sys.stdout.flush()
|
104 |
|
|
|
105 |
class ModelService(HuggingFaceService):
|
106 |
-
_instance: Optional[
|
107 |
_initialized = False
|
108 |
-
|
109 |
def __new__(cls):
|
110 |
if cls._instance is None:
|
111 |
logger.info(LogFormatter.info("Creating new ModelService instance"))
|
@@ -113,14 +115,18 @@ class ModelService(HuggingFaceService):
|
|
113 |
return cls._instance
|
114 |
|
115 |
def __init__(self):
|
116 |
-
if not hasattr(self,
|
117 |
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
118 |
super().__init__()
|
119 |
self.validator = ModelValidator()
|
120 |
self.vote_service = VoteService()
|
121 |
self.eval_requests_path = cache_config.eval_requests_file
|
122 |
-
logger.info(
|
123 |
-
|
|
|
|
|
|
|
|
|
124 |
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
125 |
self.hf_api = HfApi(token=HF_TOKEN)
|
126 |
self.cached_models = None
|
@@ -129,56 +135,66 @@ class ModelService(HuggingFaceService):
|
|
129 |
self._init_done = True
|
130 |
logger.info(LogFormatter.success("Initialization complete"))
|
131 |
|
132 |
-
async def _download_and_process_file(
|
|
|
|
|
133 |
"""Download and process a file asynchronously"""
|
134 |
try:
|
135 |
# Build file URL
|
136 |
url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
|
137 |
headers = build_hf_headers(token=self.token)
|
138 |
-
|
139 |
# Download file
|
140 |
async with session.get(url, headers=headers) as response:
|
141 |
if response.status != 200:
|
142 |
-
logger.error(
|
|
|
|
|
|
|
|
|
143 |
progress.update()
|
144 |
return None
|
145 |
-
|
146 |
try:
|
147 |
# First read content as text
|
148 |
text_content = await response.text()
|
149 |
# Then parse JSON
|
150 |
content = json.loads(text_content)
|
151 |
except json.JSONDecodeError as e:
|
152 |
-
logger.error(
|
|
|
|
|
153 |
progress.update()
|
154 |
return None
|
155 |
-
|
156 |
# Get status and determine target status
|
157 |
status = content.get("status", "PENDING").upper()
|
158 |
target_status = None
|
159 |
status_map = {
|
160 |
"PENDING": ["PENDING"],
|
161 |
"EVALUATING": ["RUNNING"],
|
162 |
-
"FINISHED": ["FINISHED"]
|
163 |
}
|
164 |
-
|
165 |
for target, source_statuses in status_map.items():
|
166 |
if status in source_statuses:
|
167 |
target_status = target
|
168 |
break
|
169 |
-
|
170 |
if not target_status:
|
171 |
progress.update()
|
172 |
return None
|
173 |
-
|
174 |
# Calculate wait time
|
175 |
try:
|
176 |
-
submit_time = datetime.fromisoformat(
|
|
|
|
|
177 |
if submit_time.tzinfo is None:
|
178 |
submit_time = submit_time.replace(tzinfo=timezone.utc)
|
179 |
current_time = datetime.now(timezone.utc)
|
180 |
wait_time = current_time - submit_time
|
181 |
-
|
182 |
model_info = {
|
183 |
"name": content["model"],
|
184 |
"submitter": content.get("sender", "Unknown"),
|
@@ -186,17 +202,17 @@ class ModelService(HuggingFaceService):
|
|
186 |
"wait_time": f"{wait_time.total_seconds():.1f}s",
|
187 |
"submission_time": content["submitted_time"],
|
188 |
"status": target_status,
|
189 |
-
"precision": content.get("precision", "Unknown")
|
190 |
}
|
191 |
-
|
192 |
progress.update()
|
193 |
return model_info
|
194 |
-
|
195 |
except (ValueError, TypeError) as e:
|
196 |
logger.error(LogFormatter.error(f"Failed to process {file}", e))
|
197 |
progress.update()
|
198 |
return None
|
199 |
-
|
200 |
except Exception as e:
|
201 |
logger.error(LogFormatter.error(f"Failed to load {file}", e))
|
202 |
progress.update()
|
@@ -207,31 +223,25 @@ class ModelService(HuggingFaceService):
|
|
207 |
try:
|
208 |
logger.info(LogFormatter.section("CACHE REFRESH"))
|
209 |
self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
|
210 |
-
|
211 |
# Initialize models dictionary
|
212 |
-
models = {
|
213 |
-
|
214 |
-
"evaluating": [],
|
215 |
-
"pending": []
|
216 |
-
}
|
217 |
-
|
218 |
try:
|
219 |
logger.info(LogFormatter.subsection("DATASET LOADING"))
|
220 |
logger.info(LogFormatter.info("Loading dataset..."))
|
221 |
-
|
222 |
# Download entire dataset snapshot
|
223 |
with suppress_output():
|
224 |
local_dir = self.hf_api.snapshot_download(
|
225 |
-
repo_id=QUEUE_REPO,
|
226 |
-
repo_type="dataset",
|
227 |
-
token=self.token
|
228 |
)
|
229 |
-
|
230 |
# List JSON files in local directory
|
231 |
local_path = Path(local_dir)
|
232 |
json_files = list(local_path.glob("**/*.json"))
|
233 |
total_files = len(json_files)
|
234 |
-
|
235 |
# Log repository stats
|
236 |
stats = {
|
237 |
"Total_Files": total_files,
|
@@ -239,46 +249,48 @@ class ModelService(HuggingFaceService):
|
|
239 |
}
|
240 |
for line in LogFormatter.stats(stats, "Repository Statistics"):
|
241 |
logger.info(line)
|
242 |
-
|
243 |
if not json_files:
|
244 |
raise Exception("No JSON files found in repository")
|
245 |
-
|
246 |
# Initialize progress tracker
|
247 |
progress = ProgressTracker(total_files, "PROCESSING FILES")
|
248 |
-
|
249 |
# Process local files
|
250 |
model_submissions = {} # Dict to track latest submission for each (model_id, revision, precision)
|
251 |
for file_path in json_files:
|
252 |
try:
|
253 |
-
with open(file_path,
|
254 |
content = json.load(f)
|
255 |
-
|
256 |
# Get status and determine target status
|
257 |
status = content.get("status", "PENDING").upper()
|
258 |
target_status = None
|
259 |
status_map = {
|
260 |
"PENDING": ["PENDING"],
|
261 |
"EVALUATING": ["RUNNING"],
|
262 |
-
"FINISHED": ["FINISHED"]
|
263 |
}
|
264 |
-
|
265 |
for target, source_statuses in status_map.items():
|
266 |
if status in source_statuses:
|
267 |
target_status = target
|
268 |
break
|
269 |
-
|
270 |
if not target_status:
|
271 |
progress.update()
|
272 |
continue
|
273 |
-
|
274 |
# Calculate wait time
|
275 |
try:
|
276 |
-
submit_time = datetime.fromisoformat(
|
|
|
|
|
277 |
if submit_time.tzinfo is None:
|
278 |
submit_time = submit_time.replace(tzinfo=timezone.utc)
|
279 |
current_time = datetime.now(timezone.utc)
|
280 |
wait_time = current_time - submit_time
|
281 |
-
|
282 |
model_info = {
|
283 |
"name": content["model"],
|
284 |
"submitter": content.get("sender", "Unknown"),
|
@@ -286,50 +298,68 @@ class ModelService(HuggingFaceService):
|
|
286 |
"wait_time": f"{wait_time.total_seconds():.1f}s",
|
287 |
"submission_time": content["submitted_time"],
|
288 |
"status": target_status,
|
289 |
-
"precision": content.get("precision", "Unknown")
|
290 |
}
|
291 |
-
|
292 |
# Use (model_id, revision, precision) as key to track latest submission
|
293 |
-
key = (
|
294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
model_submissions[key] = model_info
|
296 |
-
|
297 |
except (ValueError, TypeError) as e:
|
298 |
-
logger.error(
|
299 |
-
|
|
|
|
|
|
|
|
|
300 |
except Exception as e:
|
301 |
-
logger.error(
|
|
|
|
|
302 |
finally:
|
303 |
progress.update()
|
304 |
-
|
305 |
# Populate models dict with deduplicated submissions
|
306 |
for model_info in model_submissions.values():
|
307 |
models[model_info["status"].lower()].append(model_info)
|
308 |
-
|
309 |
progress.close()
|
310 |
-
|
311 |
# Final summary with fancy formatting
|
312 |
logger.info(LogFormatter.section("CACHE SUMMARY"))
|
313 |
stats = {
|
314 |
"Finished": len(models["finished"]),
|
315 |
"Evaluating": len(models["evaluating"]),
|
316 |
-
"Pending": len(models["pending"])
|
317 |
}
|
318 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
319 |
logger.info(line)
|
320 |
-
logger.info("="*50)
|
321 |
-
|
322 |
except Exception as e:
|
323 |
logger.error(LogFormatter.error("Error processing files", e))
|
324 |
raise
|
325 |
-
|
326 |
# Update cache
|
327 |
self.cached_models = models
|
328 |
self.last_cache_update = time.time()
|
329 |
logger.info(LogFormatter.success("Cache updated successfully"))
|
330 |
-
|
331 |
return models
|
332 |
-
|
333 |
except Exception as e:
|
334 |
logger.error(LogFormatter.error("Cache refresh failed", e))
|
335 |
raise
|
@@ -337,40 +367,48 @@ class ModelService(HuggingFaceService):
|
|
337 |
async def initialize(self):
|
338 |
"""Initialize the model service"""
|
339 |
if self._initialized:
|
340 |
-
logger.info(
|
|
|
|
|
341 |
return
|
342 |
-
|
343 |
try:
|
344 |
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
345 |
-
|
346 |
# Check if cache already exists
|
347 |
cache_path = cache_config.get_cache_path("datasets")
|
348 |
if not cache_path.exists() or not any(cache_path.iterdir()):
|
349 |
-
logger.info(
|
|
|
|
|
|
|
|
|
350 |
cache_config.flush_cache("datasets")
|
351 |
else:
|
352 |
logger.info(LogFormatter.info("Using existing datasets cache"))
|
353 |
-
|
354 |
# Ensure eval requests directory exists
|
355 |
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
356 |
-
logger.info(
|
357 |
-
|
|
|
|
|
358 |
# List existing files
|
359 |
if self.eval_requests_path.exists():
|
360 |
files = list(self.eval_requests_path.glob("**/*.json"))
|
361 |
stats = {
|
362 |
"Total_Files": len(files),
|
363 |
-
"Directory": str(self.eval_requests_path)
|
364 |
}
|
365 |
for line in LogFormatter.stats(stats, "Eval Requests"):
|
366 |
logger.info(line)
|
367 |
-
|
368 |
# Load initial cache
|
369 |
await self._refresh_models_cache()
|
370 |
-
|
371 |
self._initialized = True
|
372 |
logger.info(LogFormatter.success("Model service initialization complete"))
|
373 |
-
|
374 |
except Exception as e:
|
375 |
logger.error(LogFormatter.error("Initialization failed", e))
|
376 |
raise
|
@@ -378,44 +416,59 @@ class ModelService(HuggingFaceService):
|
|
378 |
async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
|
379 |
"""Get all models with their status"""
|
380 |
if not self._initialized:
|
381 |
-
logger.info(
|
|
|
|
|
382 |
await self.initialize()
|
383 |
-
|
384 |
current_time = time.time()
|
385 |
cache_age = current_time - self.last_cache_update
|
386 |
-
|
387 |
# Check if cache needs refresh
|
388 |
if not self.cached_models:
|
389 |
-
logger.info(
|
|
|
|
|
390 |
return await self._refresh_models_cache()
|
391 |
elif cache_age > self.cache_ttl:
|
392 |
-
logger.info(
|
|
|
|
|
|
|
|
|
393 |
return await self._refresh_models_cache()
|
394 |
else:
|
395 |
logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
|
396 |
return self.cached_models
|
397 |
|
398 |
async def submit_model(
|
399 |
-
self,
|
400 |
-
model_data: Dict[str, Any],
|
401 |
-
user_id: str
|
402 |
) -> Dict[str, Any]:
|
403 |
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
404 |
-
self._log_repo_operation(
|
|
|
|
|
|
|
|
|
405 |
stats = {
|
406 |
"Model": model_data["model_id"],
|
407 |
"User": user_id,
|
408 |
"Revision": model_data["revision"],
|
409 |
"Precision": model_data["precision"],
|
410 |
-
"Type": model_data["model_type"]
|
411 |
}
|
412 |
for line in LogFormatter.tree(stats, "Submission Details"):
|
413 |
logger.info(line)
|
414 |
-
|
415 |
# Validate required fields
|
416 |
required_fields = [
|
417 |
-
"model_id",
|
418 |
-
"
|
|
|
|
|
|
|
|
|
|
|
419 |
]
|
420 |
for field in required_fields:
|
421 |
if field not in model_data:
|
@@ -424,23 +477,25 @@ class ModelService(HuggingFaceService):
|
|
424 |
# Get model info and validate it exists on HuggingFace
|
425 |
try:
|
426 |
logger.info(LogFormatter.subsection("MODEL VALIDATION"))
|
427 |
-
|
428 |
# Get the model info to check if it exists
|
429 |
model_info = self.hf_api.model_info(
|
430 |
model_data["model_id"],
|
431 |
revision=model_data["revision"],
|
432 |
-
token=self.token
|
433 |
)
|
434 |
-
|
435 |
if not model_info:
|
436 |
-
raise Exception(
|
437 |
-
|
|
|
|
|
438 |
logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
|
439 |
-
|
440 |
except Exception as e:
|
441 |
logger.error(LogFormatter.error("Model validation failed", e))
|
442 |
raise
|
443 |
-
|
444 |
# Update model revision with commit sha
|
445 |
model_data["revision"] = model_info.sha
|
446 |
|
@@ -448,11 +503,13 @@ class ModelService(HuggingFaceService):
|
|
448 |
try:
|
449 |
logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
|
450 |
existing_models = await self.get_models()
|
451 |
-
|
452 |
# Call the official provider status check
|
453 |
-
|
454 |
-
|
455 |
-
|
|
|
|
|
456 |
)
|
457 |
if not is_valid:
|
458 |
raise ValueError(error_message)
|
@@ -460,11 +517,16 @@ class ModelService(HuggingFaceService):
|
|
460 |
# Check in all statuses (pending, evaluating, finished)
|
461 |
for status, models in existing_models.items():
|
462 |
for model in models:
|
463 |
-
if
|
|
|
|
|
|
|
464 |
error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
|
465 |
-
logger.error(
|
|
|
|
|
466 |
raise ValueError(error_msg)
|
467 |
-
|
468 |
logger.info(LogFormatter.success("No existing submission found"))
|
469 |
except ValueError:
|
470 |
raise
|
@@ -474,9 +536,7 @@ class ModelService(HuggingFaceService):
|
|
474 |
|
475 |
# Check that model on hub and valid
|
476 |
valid, error, model_config = await self.validator.is_model_on_hub(
|
477 |
-
model_data["model_id"],
|
478 |
-
model_data["revision"],
|
479 |
-
test_tokenizer=True
|
480 |
)
|
481 |
if not valid:
|
482 |
logger.error(LogFormatter.error("Model on hub validation failed", error))
|
@@ -497,12 +557,14 @@ class ModelService(HuggingFaceService):
|
|
497 |
model_info,
|
498 |
model_data["precision"],
|
499 |
model_data["base_model"],
|
500 |
-
revision=model_data["revision"]
|
501 |
)
|
502 |
if model_size is None:
|
503 |
logger.error(LogFormatter.error("Model size validation failed", error))
|
504 |
raise Exception(error)
|
505 |
-
logger.info(
|
|
|
|
|
506 |
|
507 |
# Size limits based on precision
|
508 |
if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
|
@@ -513,16 +575,16 @@ class ModelService(HuggingFaceService):
|
|
513 |
# Chat template validation if requested
|
514 |
if model_data["use_chat_template"]:
|
515 |
valid, error = await self.validator.check_chat_template(
|
516 |
-
model_data["model_id"],
|
517 |
-
model_data["revision"]
|
518 |
)
|
519 |
if not valid:
|
520 |
-
logger.error(
|
|
|
|
|
521 |
raise Exception(error)
|
522 |
logger.info(LogFormatter.success("Chat template validation passed"))
|
523 |
|
524 |
-
|
525 |
-
architectures = model_info.config.get("architectures", "")
|
526 |
if architectures:
|
527 |
architectures = ";".join(architectures)
|
528 |
|
@@ -541,9 +603,9 @@ class ModelService(HuggingFaceService):
|
|
541 |
"job_id": -1,
|
542 |
"job_start_time": None,
|
543 |
"use_chat_template": model_data["use_chat_template"],
|
544 |
-
"sender": user_id
|
545 |
}
|
546 |
-
|
547 |
logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
|
548 |
for line in LogFormatter.tree(eval_entry):
|
549 |
logger.info(line)
|
@@ -552,18 +614,24 @@ class ModelService(HuggingFaceService):
|
|
552 |
try:
|
553 |
logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
|
554 |
logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
|
555 |
-
|
556 |
# Construct the path in the dataset
|
557 |
-
org_or_user =
|
|
|
|
|
|
|
|
|
558 |
model_path = model_data["model_id"].split("/")[-1]
|
559 |
relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
|
560 |
-
|
561 |
# Create a temporary file with the request
|
562 |
-
with tempfile.NamedTemporaryFile(
|
|
|
|
|
563 |
json.dump(eval_entry, temp_file, indent=2)
|
564 |
temp_file.flush()
|
565 |
temp_path = temp_file.name
|
566 |
-
|
567 |
# Upload file directly
|
568 |
self.hf_api.upload_file(
|
569 |
path_or_fileobj=temp_path,
|
@@ -571,14 +639,14 @@ class ModelService(HuggingFaceService):
|
|
571 |
repo_id=QUEUE_REPO,
|
572 |
repo_type="dataset",
|
573 |
commit_message=f"Add {model_data['model_id']} to eval queue",
|
574 |
-
token=self.token
|
575 |
)
|
576 |
-
|
577 |
# Clean up temp file
|
578 |
os.unlink(temp_path)
|
579 |
-
|
580 |
logger.info(LogFormatter.success("Upload successful"))
|
581 |
-
|
582 |
except Exception as e:
|
583 |
logger.error(LogFormatter.error("Upload failed", e))
|
584 |
raise
|
@@ -586,15 +654,19 @@ class ModelService(HuggingFaceService):
|
|
586 |
# Add automatic vote
|
587 |
try:
|
588 |
logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
|
589 |
-
logger.info(
|
|
|
|
|
|
|
|
|
590 |
await self.vote_service.add_vote(
|
591 |
model_data["model_id"],
|
592 |
user_id,
|
593 |
"up",
|
594 |
{
|
595 |
"precision": model_data["precision"],
|
596 |
-
"revision": model_data["revision"]
|
597 |
-
}
|
598 |
)
|
599 |
logger.info(LogFormatter.success("Vote recorded successfully"))
|
600 |
except Exception as e:
|
@@ -603,14 +675,14 @@ class ModelService(HuggingFaceService):
|
|
603 |
|
604 |
return {
|
605 |
"status": "success",
|
606 |
-
"message": "The model was submitted successfully, and the vote has been recorded"
|
607 |
}
|
608 |
|
609 |
async def get_model_status(self, model_id: str) -> Dict[str, Any]:
|
610 |
"""Get evaluation status of a model"""
|
611 |
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
612 |
eval_path = self.eval_requests_path
|
613 |
-
|
614 |
for user_folder in eval_path.iterdir():
|
615 |
if user_folder.is_dir():
|
616 |
for file in user_folder.glob("*.json"):
|
@@ -620,24 +692,26 @@ class ModelService(HuggingFaceService):
|
|
620 |
status = {
|
621 |
"status": data["status"],
|
622 |
"submitted_time": data["submitted_time"],
|
623 |
-
"job_id": data.get("job_id", -1)
|
624 |
}
|
625 |
logger.info(LogFormatter.success("Status found"))
|
626 |
for line in LogFormatter.tree(status, "Model Status"):
|
627 |
logger.info(line)
|
628 |
return status
|
629 |
-
|
630 |
logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
|
631 |
return {"status": "not_found"}
|
632 |
|
633 |
-
async def get_organization_submissions(
|
|
|
|
|
634 |
"""Get all submissions from a user in the last n days"""
|
635 |
try:
|
636 |
# Get all models
|
637 |
all_models = await self.get_models()
|
638 |
current_time = datetime.now(timezone.utc)
|
639 |
cutoff_time = current_time - timedelta(days=days)
|
640 |
-
|
641 |
# Filter models by submitter and submission time
|
642 |
user_submissions = []
|
643 |
for status, models in all_models.items():
|
@@ -650,19 +724,21 @@ class ModelService(HuggingFaceService):
|
|
650 |
)
|
651 |
# Check if within time window
|
652 |
if submit_time > cutoff_time:
|
653 |
-
user_submissions.append(
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
|
|
|
|
660 |
return sorted(
|
661 |
-
user_submissions,
|
662 |
-
key=lambda x: x["submission_time"],
|
663 |
-
reverse=True
|
664 |
)
|
665 |
-
|
666 |
except Exception as e:
|
667 |
-
logger.error(
|
668 |
-
|
|
|
|
|
|
5 |
from pathlib import Path
|
6 |
import logging
|
7 |
import aiohttp
|
|
|
8 |
import time
|
9 |
+
from huggingface_hub import HfApi
|
10 |
from huggingface_hub.utils import build_hf_headers
|
11 |
from datasets import disable_progress_bar
|
12 |
import sys
|
13 |
import contextlib
|
|
|
14 |
import tempfile
|
15 |
|
16 |
+
from app.config import HF_TOKEN
|
17 |
+
from app.config.hf_config import QUEUE_REPO
|
|
|
|
|
|
|
|
|
18 |
from app.services.hf_service import HuggingFaceService
|
19 |
from app.utils.model_validation import ModelValidator
|
20 |
from app.services.votes import VoteService
|
|
|
26 |
|
27 |
logger = logging.getLogger(__name__)
|
28 |
|
29 |
+
|
30 |
# Context manager to temporarily disable stdout and stderr
|
31 |
@contextlib.contextmanager
|
32 |
def suppress_output():
|
33 |
stdout = sys.stdout
|
34 |
stderr = sys.stderr
|
35 |
+
devnull = open(os.devnull, "w")
|
36 |
try:
|
37 |
sys.stdout = devnull
|
38 |
sys.stderr = devnull
|
|
|
42 |
sys.stderr = stderr
|
43 |
devnull.close()
|
44 |
|
45 |
+
|
46 |
class ProgressTracker:
|
47 |
def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
|
48 |
self.total = total
|
|
|
51 |
self.start_time = time.time()
|
52 |
self.update_frequency = update_frequency # Percentage steps
|
53 |
self.last_update = -1
|
54 |
+
|
55 |
# Initial log with fancy formatting
|
56 |
logger.info(LogFormatter.section(desc))
|
57 |
logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
|
58 |
sys.stdout.flush()
|
59 |
+
|
60 |
def update(self, n: int = 1):
|
61 |
self.current += n
|
62 |
current_percentage = (self.current * 100) // self.total
|
63 |
+
|
64 |
# Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
|
65 |
+
if (
|
66 |
+
current_percentage >= self.last_update + self.update_frequency
|
67 |
+
or current_percentage == 100
|
68 |
+
):
|
69 |
elapsed = time.time() - self.start_time
|
70 |
rate = self.current / elapsed if elapsed > 0 else 0
|
71 |
remaining = (self.total - self.current) / rate if rate > 0 else 0
|
72 |
+
|
73 |
# Create progress stats
|
74 |
stats = {
|
75 |
"Progress": LogFormatter.progress_bar(self.current, self.total),
|
76 |
"Items": f"{self.current:,}/{self.total:,}",
|
77 |
"Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
|
78 |
+
"Rate": f"🚀 {rate:.1f} items/s",
|
79 |
}
|
80 |
+
|
81 |
# Log progress using tree format
|
82 |
for line in LogFormatter.tree(stats):
|
83 |
logger.info(line)
|
84 |
sys.stdout.flush()
|
85 |
+
|
86 |
+
self.last_update = (
|
87 |
+
current_percentage // self.update_frequency
|
88 |
+
) * self.update_frequency
|
89 |
+
|
90 |
def close(self):
|
91 |
elapsed = time.time() - self.start_time
|
92 |
rate = self.total / elapsed if elapsed > 0 else 0
|
93 |
+
|
94 |
# Final summary with fancy formatting
|
95 |
logger.info(LogFormatter.section("COMPLETED"))
|
96 |
stats = {
|
97 |
"Total": f"{self.total:,} items",
|
98 |
"Time": f"{elapsed:.1f}s",
|
99 |
+
"Rate": f"{rate:.1f} items/s",
|
100 |
}
|
101 |
for line in LogFormatter.stats(stats):
|
102 |
logger.info(line)
|
103 |
+
logger.info("=" * 50)
|
104 |
sys.stdout.flush()
|
105 |
|
106 |
+
|
107 |
class ModelService(HuggingFaceService):
|
108 |
+
_instance: Optional["ModelService"] = None
|
109 |
_initialized = False
|
110 |
+
|
111 |
def __new__(cls):
|
112 |
if cls._instance is None:
|
113 |
logger.info(LogFormatter.info("Creating new ModelService instance"))
|
|
|
115 |
return cls._instance
|
116 |
|
117 |
def __init__(self):
|
118 |
+
if not hasattr(self, "_init_done"):
|
119 |
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
120 |
super().__init__()
|
121 |
self.validator = ModelValidator()
|
122 |
self.vote_service = VoteService()
|
123 |
self.eval_requests_path = cache_config.eval_requests_file
|
124 |
+
logger.info(
|
125 |
+
LogFormatter.info(
|
126 |
+
f"Using eval requests path: {self.eval_requests_path}"
|
127 |
+
)
|
128 |
+
)
|
129 |
+
|
130 |
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
131 |
self.hf_api = HfApi(token=HF_TOKEN)
|
132 |
self.cached_models = None
|
|
|
135 |
self._init_done = True
|
136 |
logger.info(LogFormatter.success("Initialization complete"))
|
137 |
|
138 |
+
async def _download_and_process_file(
|
139 |
+
self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker
|
140 |
+
) -> Optional[Dict]:
|
141 |
"""Download and process a file asynchronously"""
|
142 |
try:
|
143 |
# Build file URL
|
144 |
url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
|
145 |
headers = build_hf_headers(token=self.token)
|
146 |
+
|
147 |
# Download file
|
148 |
async with session.get(url, headers=headers) as response:
|
149 |
if response.status != 200:
|
150 |
+
logger.error(
|
151 |
+
LogFormatter.error(
|
152 |
+
f"Failed to download {file}", f"HTTP {response.status}"
|
153 |
+
)
|
154 |
+
)
|
155 |
progress.update()
|
156 |
return None
|
157 |
+
|
158 |
try:
|
159 |
# First read content as text
|
160 |
text_content = await response.text()
|
161 |
# Then parse JSON
|
162 |
content = json.loads(text_content)
|
163 |
except json.JSONDecodeError as e:
|
164 |
+
logger.error(
|
165 |
+
LogFormatter.error(f"Failed to decode JSON from {file}", e)
|
166 |
+
)
|
167 |
progress.update()
|
168 |
return None
|
169 |
+
|
170 |
# Get status and determine target status
|
171 |
status = content.get("status", "PENDING").upper()
|
172 |
target_status = None
|
173 |
status_map = {
|
174 |
"PENDING": ["PENDING"],
|
175 |
"EVALUATING": ["RUNNING"],
|
176 |
+
"FINISHED": ["FINISHED"],
|
177 |
}
|
178 |
+
|
179 |
for target, source_statuses in status_map.items():
|
180 |
if status in source_statuses:
|
181 |
target_status = target
|
182 |
break
|
183 |
+
|
184 |
if not target_status:
|
185 |
progress.update()
|
186 |
return None
|
187 |
+
|
188 |
# Calculate wait time
|
189 |
try:
|
190 |
+
submit_time = datetime.fromisoformat(
|
191 |
+
content["submitted_time"].replace("Z", "+00:00")
|
192 |
+
)
|
193 |
if submit_time.tzinfo is None:
|
194 |
submit_time = submit_time.replace(tzinfo=timezone.utc)
|
195 |
current_time = datetime.now(timezone.utc)
|
196 |
wait_time = current_time - submit_time
|
197 |
+
|
198 |
model_info = {
|
199 |
"name": content["model"],
|
200 |
"submitter": content.get("sender", "Unknown"),
|
|
|
202 |
"wait_time": f"{wait_time.total_seconds():.1f}s",
|
203 |
"submission_time": content["submitted_time"],
|
204 |
"status": target_status,
|
205 |
+
"precision": content.get("precision", "Unknown"),
|
206 |
}
|
207 |
+
|
208 |
progress.update()
|
209 |
return model_info
|
210 |
+
|
211 |
except (ValueError, TypeError) as e:
|
212 |
logger.error(LogFormatter.error(f"Failed to process {file}", e))
|
213 |
progress.update()
|
214 |
return None
|
215 |
+
|
216 |
except Exception as e:
|
217 |
logger.error(LogFormatter.error(f"Failed to load {file}", e))
|
218 |
progress.update()
|
|
|
223 |
try:
|
224 |
logger.info(LogFormatter.section("CACHE REFRESH"))
|
225 |
self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
|
226 |
+
|
227 |
# Initialize models dictionary
|
228 |
+
models = {"finished": [], "evaluating": [], "pending": []}
|
229 |
+
|
|
|
|
|
|
|
|
|
230 |
try:
|
231 |
logger.info(LogFormatter.subsection("DATASET LOADING"))
|
232 |
logger.info(LogFormatter.info("Loading dataset..."))
|
233 |
+
|
234 |
# Download entire dataset snapshot
|
235 |
with suppress_output():
|
236 |
local_dir = self.hf_api.snapshot_download(
|
237 |
+
repo_id=QUEUE_REPO, repo_type="dataset", token=self.token
|
|
|
|
|
238 |
)
|
239 |
+
|
240 |
# List JSON files in local directory
|
241 |
local_path = Path(local_dir)
|
242 |
json_files = list(local_path.glob("**/*.json"))
|
243 |
total_files = len(json_files)
|
244 |
+
|
245 |
# Log repository stats
|
246 |
stats = {
|
247 |
"Total_Files": total_files,
|
|
|
249 |
}
|
250 |
for line in LogFormatter.stats(stats, "Repository Statistics"):
|
251 |
logger.info(line)
|
252 |
+
|
253 |
if not json_files:
|
254 |
raise Exception("No JSON files found in repository")
|
255 |
+
|
256 |
# Initialize progress tracker
|
257 |
progress = ProgressTracker(total_files, "PROCESSING FILES")
|
258 |
+
|
259 |
# Process local files
|
260 |
model_submissions = {} # Dict to track latest submission for each (model_id, revision, precision)
|
261 |
for file_path in json_files:
|
262 |
try:
|
263 |
+
with open(file_path, "r") as f:
|
264 |
content = json.load(f)
|
265 |
+
|
266 |
# Get status and determine target status
|
267 |
status = content.get("status", "PENDING").upper()
|
268 |
target_status = None
|
269 |
status_map = {
|
270 |
"PENDING": ["PENDING"],
|
271 |
"EVALUATING": ["RUNNING"],
|
272 |
+
"FINISHED": ["FINISHED"],
|
273 |
}
|
274 |
+
|
275 |
for target, source_statuses in status_map.items():
|
276 |
if status in source_statuses:
|
277 |
target_status = target
|
278 |
break
|
279 |
+
|
280 |
if not target_status:
|
281 |
progress.update()
|
282 |
continue
|
283 |
+
|
284 |
# Calculate wait time
|
285 |
try:
|
286 |
+
submit_time = datetime.fromisoformat(
|
287 |
+
content["submitted_time"].replace("Z", "+00:00")
|
288 |
+
)
|
289 |
if submit_time.tzinfo is None:
|
290 |
submit_time = submit_time.replace(tzinfo=timezone.utc)
|
291 |
current_time = datetime.now(timezone.utc)
|
292 |
wait_time = current_time - submit_time
|
293 |
+
|
294 |
model_info = {
|
295 |
"name": content["model"],
|
296 |
"submitter": content.get("sender", "Unknown"),
|
|
|
298 |
"wait_time": f"{wait_time.total_seconds():.1f}s",
|
299 |
"submission_time": content["submitted_time"],
|
300 |
"status": target_status,
|
301 |
+
"precision": content.get("precision", "Unknown"),
|
302 |
}
|
303 |
+
|
304 |
# Use (model_id, revision, precision) as key to track latest submission
|
305 |
+
key = (
|
306 |
+
content["model"],
|
307 |
+
content["revision"],
|
308 |
+
content.get("precision", "Unknown"),
|
309 |
+
)
|
310 |
+
if (
|
311 |
+
key not in model_submissions
|
312 |
+
or submit_time
|
313 |
+
> datetime.fromisoformat(
|
314 |
+
model_submissions[key]["submission_time"].replace(
|
315 |
+
"Z", "+00:00"
|
316 |
+
)
|
317 |
+
)
|
318 |
+
):
|
319 |
model_submissions[key] = model_info
|
320 |
+
|
321 |
except (ValueError, TypeError) as e:
|
322 |
+
logger.error(
|
323 |
+
LogFormatter.error(
|
324 |
+
f"Failed to process {file_path.name}", e
|
325 |
+
)
|
326 |
+
)
|
327 |
+
|
328 |
except Exception as e:
|
329 |
+
logger.error(
|
330 |
+
LogFormatter.error(f"Failed to load {file_path.name}", e)
|
331 |
+
)
|
332 |
finally:
|
333 |
progress.update()
|
334 |
+
|
335 |
# Populate models dict with deduplicated submissions
|
336 |
for model_info in model_submissions.values():
|
337 |
models[model_info["status"].lower()].append(model_info)
|
338 |
+
|
339 |
progress.close()
|
340 |
+
|
341 |
# Final summary with fancy formatting
|
342 |
logger.info(LogFormatter.section("CACHE SUMMARY"))
|
343 |
stats = {
|
344 |
"Finished": len(models["finished"]),
|
345 |
"Evaluating": len(models["evaluating"]),
|
346 |
+
"Pending": len(models["pending"]),
|
347 |
}
|
348 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
349 |
logger.info(line)
|
350 |
+
logger.info("=" * 50)
|
351 |
+
|
352 |
except Exception as e:
|
353 |
logger.error(LogFormatter.error("Error processing files", e))
|
354 |
raise
|
355 |
+
|
356 |
# Update cache
|
357 |
self.cached_models = models
|
358 |
self.last_cache_update = time.time()
|
359 |
logger.info(LogFormatter.success("Cache updated successfully"))
|
360 |
+
|
361 |
return models
|
362 |
+
|
363 |
except Exception as e:
|
364 |
logger.error(LogFormatter.error("Cache refresh failed", e))
|
365 |
raise
|
|
|
367 |
async def initialize(self):
|
368 |
"""Initialize the model service"""
|
369 |
if self._initialized:
|
370 |
+
logger.info(
|
371 |
+
LogFormatter.info("Service already initialized, using cached data")
|
372 |
+
)
|
373 |
return
|
374 |
+
|
375 |
try:
|
376 |
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
377 |
+
|
378 |
# Check if cache already exists
|
379 |
cache_path = cache_config.get_cache_path("datasets")
|
380 |
if not cache_path.exists() or not any(cache_path.iterdir()):
|
381 |
+
logger.info(
|
382 |
+
LogFormatter.info(
|
383 |
+
"No existing cache found, initializing datasets cache..."
|
384 |
+
)
|
385 |
+
)
|
386 |
cache_config.flush_cache("datasets")
|
387 |
else:
|
388 |
logger.info(LogFormatter.info("Using existing datasets cache"))
|
389 |
+
|
390 |
# Ensure eval requests directory exists
|
391 |
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
392 |
+
logger.info(
|
393 |
+
LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}")
|
394 |
+
)
|
395 |
+
|
396 |
# List existing files
|
397 |
if self.eval_requests_path.exists():
|
398 |
files = list(self.eval_requests_path.glob("**/*.json"))
|
399 |
stats = {
|
400 |
"Total_Files": len(files),
|
401 |
+
"Directory": str(self.eval_requests_path),
|
402 |
}
|
403 |
for line in LogFormatter.stats(stats, "Eval Requests"):
|
404 |
logger.info(line)
|
405 |
+
|
406 |
# Load initial cache
|
407 |
await self._refresh_models_cache()
|
408 |
+
|
409 |
self._initialized = True
|
410 |
logger.info(LogFormatter.success("Model service initialization complete"))
|
411 |
+
|
412 |
except Exception as e:
|
413 |
logger.error(LogFormatter.error("Initialization failed", e))
|
414 |
raise
|
|
|
416 |
async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
|
417 |
"""Get all models with their status"""
|
418 |
if not self._initialized:
|
419 |
+
logger.info(
|
420 |
+
LogFormatter.info("Service not initialized, initializing now...")
|
421 |
+
)
|
422 |
await self.initialize()
|
423 |
+
|
424 |
current_time = time.time()
|
425 |
cache_age = current_time - self.last_cache_update
|
426 |
+
|
427 |
# Check if cache needs refresh
|
428 |
if not self.cached_models:
|
429 |
+
logger.info(
|
430 |
+
LogFormatter.info("No cached data available, refreshing cache...")
|
431 |
+
)
|
432 |
return await self._refresh_models_cache()
|
433 |
elif cache_age > self.cache_ttl:
|
434 |
+
logger.info(
|
435 |
+
LogFormatter.info(
|
436 |
+
f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"
|
437 |
+
)
|
438 |
+
)
|
439 |
return await self._refresh_models_cache()
|
440 |
else:
|
441 |
logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
|
442 |
return self.cached_models
|
443 |
|
444 |
async def submit_model(
|
445 |
+
self, model_data: Dict[str, Any], user_id: str
|
|
|
|
|
446 |
) -> Dict[str, Any]:
|
447 |
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
448 |
+
self._log_repo_operation(
|
449 |
+
"write",
|
450 |
+
QUEUE_REPO,
|
451 |
+
f"Submitting model {model_data['model_id']} by {user_id}",
|
452 |
+
)
|
453 |
stats = {
|
454 |
"Model": model_data["model_id"],
|
455 |
"User": user_id,
|
456 |
"Revision": model_data["revision"],
|
457 |
"Precision": model_data["precision"],
|
458 |
+
"Type": model_data["model_type"],
|
459 |
}
|
460 |
for line in LogFormatter.tree(stats, "Submission Details"):
|
461 |
logger.info(line)
|
462 |
+
|
463 |
# Validate required fields
|
464 |
required_fields = [
|
465 |
+
"model_id",
|
466 |
+
"base_model",
|
467 |
+
"revision",
|
468 |
+
"precision",
|
469 |
+
"weight_type",
|
470 |
+
"model_type",
|
471 |
+
"use_chat_template",
|
472 |
]
|
473 |
for field in required_fields:
|
474 |
if field not in model_data:
|
|
|
477 |
# Get model info and validate it exists on HuggingFace
|
478 |
try:
|
479 |
logger.info(LogFormatter.subsection("MODEL VALIDATION"))
|
480 |
+
|
481 |
# Get the model info to check if it exists
|
482 |
model_info = self.hf_api.model_info(
|
483 |
model_data["model_id"],
|
484 |
revision=model_data["revision"],
|
485 |
+
token=self.token,
|
486 |
)
|
487 |
+
|
488 |
if not model_info:
|
489 |
+
raise Exception(
|
490 |
+
f"Model {model_data['model_id']} not found on HuggingFace Hub"
|
491 |
+
)
|
492 |
+
|
493 |
logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
|
494 |
+
|
495 |
except Exception as e:
|
496 |
logger.error(LogFormatter.error("Model validation failed", e))
|
497 |
raise
|
498 |
+
|
499 |
# Update model revision with commit sha
|
500 |
model_data["revision"] = model_info.sha
|
501 |
|
|
|
503 |
try:
|
504 |
logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
|
505 |
existing_models = await self.get_models()
|
506 |
+
|
507 |
# Call the official provider status check
|
508 |
+
(
|
509 |
+
is_valid,
|
510 |
+
error_message,
|
511 |
+
) = await self.validator.check_official_provider_status(
|
512 |
+
model_data["model_id"], existing_models
|
513 |
)
|
514 |
if not is_valid:
|
515 |
raise ValueError(error_message)
|
|
|
517 |
# Check in all statuses (pending, evaluating, finished)
|
518 |
for status, models in existing_models.items():
|
519 |
for model in models:
|
520 |
+
if (
|
521 |
+
model["name"] == model_data["model_id"]
|
522 |
+
and model["revision"] == model_data["revision"]
|
523 |
+
):
|
524 |
error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
|
525 |
+
logger.error(
|
526 |
+
LogFormatter.error("Submission rejected", error_msg)
|
527 |
+
)
|
528 |
raise ValueError(error_msg)
|
529 |
+
|
530 |
logger.info(LogFormatter.success("No existing submission found"))
|
531 |
except ValueError:
|
532 |
raise
|
|
|
536 |
|
537 |
# Check that model on hub and valid
|
538 |
valid, error, model_config = await self.validator.is_model_on_hub(
|
539 |
+
model_data["model_id"], model_data["revision"], test_tokenizer=True
|
|
|
|
|
540 |
)
|
541 |
if not valid:
|
542 |
logger.error(LogFormatter.error("Model on hub validation failed", error))
|
|
|
557 |
model_info,
|
558 |
model_data["precision"],
|
559 |
model_data["base_model"],
|
560 |
+
revision=model_data["revision"],
|
561 |
)
|
562 |
if model_size is None:
|
563 |
logger.error(LogFormatter.error("Model size validation failed", error))
|
564 |
raise Exception(error)
|
565 |
+
logger.info(
|
566 |
+
LogFormatter.success(f"Model size validation passed: {model_size:.1f}B")
|
567 |
+
)
|
568 |
|
569 |
# Size limits based on precision
|
570 |
if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
|
|
|
575 |
# Chat template validation if requested
|
576 |
if model_data["use_chat_template"]:
|
577 |
valid, error = await self.validator.check_chat_template(
|
578 |
+
model_data["model_id"], model_data["revision"]
|
|
|
579 |
)
|
580 |
if not valid:
|
581 |
+
logger.error(
|
582 |
+
LogFormatter.error("Chat template validation failed", error)
|
583 |
+
)
|
584 |
raise Exception(error)
|
585 |
logger.info(LogFormatter.success("Chat template validation passed"))
|
586 |
|
587 |
+
architectures = model_info.config.get("architectures", "")
|
|
|
588 |
if architectures:
|
589 |
architectures = ";".join(architectures)
|
590 |
|
|
|
603 |
"job_id": -1,
|
604 |
"job_start_time": None,
|
605 |
"use_chat_template": model_data["use_chat_template"],
|
606 |
+
"sender": user_id,
|
607 |
}
|
608 |
+
|
609 |
logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
|
610 |
for line in LogFormatter.tree(eval_entry):
|
611 |
logger.info(line)
|
|
|
614 |
try:
|
615 |
logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
|
616 |
logger.info(LogFormatter.info(f"Uploading to {QUEUE_REPO}..."))
|
617 |
+
|
618 |
# Construct the path in the dataset
|
619 |
+
org_or_user = (
|
620 |
+
model_data["model_id"].split("/")[0]
|
621 |
+
if "/" in model_data["model_id"]
|
622 |
+
else ""
|
623 |
+
)
|
624 |
model_path = model_data["model_id"].split("/")[-1]
|
625 |
relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
|
626 |
+
|
627 |
# Create a temporary file with the request
|
628 |
+
with tempfile.NamedTemporaryFile(
|
629 |
+
mode="w", suffix=".json", delete=False
|
630 |
+
) as temp_file:
|
631 |
json.dump(eval_entry, temp_file, indent=2)
|
632 |
temp_file.flush()
|
633 |
temp_path = temp_file.name
|
634 |
+
|
635 |
# Upload file directly
|
636 |
self.hf_api.upload_file(
|
637 |
path_or_fileobj=temp_path,
|
|
|
639 |
repo_id=QUEUE_REPO,
|
640 |
repo_type="dataset",
|
641 |
commit_message=f"Add {model_data['model_id']} to eval queue",
|
642 |
+
token=self.token,
|
643 |
)
|
644 |
+
|
645 |
# Clean up temp file
|
646 |
os.unlink(temp_path)
|
647 |
+
|
648 |
logger.info(LogFormatter.success("Upload successful"))
|
649 |
+
|
650 |
except Exception as e:
|
651 |
logger.error(LogFormatter.error("Upload failed", e))
|
652 |
raise
|
|
|
654 |
# Add automatic vote
|
655 |
try:
|
656 |
logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
|
657 |
+
logger.info(
|
658 |
+
LogFormatter.info(
|
659 |
+
f"Adding upvote for {model_data['model_id']} by {user_id}"
|
660 |
+
)
|
661 |
+
)
|
662 |
await self.vote_service.add_vote(
|
663 |
model_data["model_id"],
|
664 |
user_id,
|
665 |
"up",
|
666 |
{
|
667 |
"precision": model_data["precision"],
|
668 |
+
"revision": model_data["revision"],
|
669 |
+
},
|
670 |
)
|
671 |
logger.info(LogFormatter.success("Vote recorded successfully"))
|
672 |
except Exception as e:
|
|
|
675 |
|
676 |
return {
|
677 |
"status": "success",
|
678 |
+
"message": "The model was submitted successfully, and the vote has been recorded",
|
679 |
}
|
680 |
|
681 |
async def get_model_status(self, model_id: str) -> Dict[str, Any]:
|
682 |
"""Get evaluation status of a model"""
|
683 |
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
684 |
eval_path = self.eval_requests_path
|
685 |
+
|
686 |
for user_folder in eval_path.iterdir():
|
687 |
if user_folder.is_dir():
|
688 |
for file in user_folder.glob("*.json"):
|
|
|
692 |
status = {
|
693 |
"status": data["status"],
|
694 |
"submitted_time": data["submitted_time"],
|
695 |
+
"job_id": data.get("job_id", -1),
|
696 |
}
|
697 |
logger.info(LogFormatter.success("Status found"))
|
698 |
for line in LogFormatter.tree(status, "Model Status"):
|
699 |
logger.info(line)
|
700 |
return status
|
701 |
+
|
702 |
logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
|
703 |
return {"status": "not_found"}
|
704 |
|
705 |
+
async def get_organization_submissions(
|
706 |
+
self, organization: str, days: int = 7
|
707 |
+
) -> List[Dict[str, Any]]:
|
708 |
"""Get all submissions from a user in the last n days"""
|
709 |
try:
|
710 |
# Get all models
|
711 |
all_models = await self.get_models()
|
712 |
current_time = datetime.now(timezone.utc)
|
713 |
cutoff_time = current_time - timedelta(days=days)
|
714 |
+
|
715 |
# Filter models by submitter and submission time
|
716 |
user_submissions = []
|
717 |
for status, models in all_models.items():
|
|
|
724 |
)
|
725 |
# Check if within time window
|
726 |
if submit_time > cutoff_time:
|
727 |
+
user_submissions.append(
|
728 |
+
{
|
729 |
+
"name": model["name"],
|
730 |
+
"status": status,
|
731 |
+
"submission_time": model["submission_time"],
|
732 |
+
"precision": model["precision"],
|
733 |
+
}
|
734 |
+
)
|
735 |
+
|
736 |
return sorted(
|
737 |
+
user_submissions, key=lambda x: x["submission_time"], reverse=True
|
|
|
|
|
738 |
)
|
739 |
+
|
740 |
except Exception as e:
|
741 |
+
logger.error(
|
742 |
+
LogFormatter.error(f"Failed to get submissions for {organization}", e)
|
743 |
+
)
|
744 |
+
raise
|
backend/app/services/rate_limiter.py
DELETED
@@ -1,72 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
import logging
|
3 |
-
from datetime import datetime, timedelta, timezone
|
4 |
-
from typing import Tuple, Dict, List
|
5 |
-
|
6 |
-
logger = logging.getLogger(__name__)
|
7 |
-
|
8 |
-
class RateLimiter:
|
9 |
-
def __init__(self, period_days: int = 7, quota: int = 5):
|
10 |
-
self.period_days = period_days
|
11 |
-
self.quota = quota
|
12 |
-
self.submission_history: Dict[str, List[datetime]] = {}
|
13 |
-
self.higher_quota_users = set() # Users with higher quotas
|
14 |
-
self.unlimited_users = set() # Users with no quota limits
|
15 |
-
|
16 |
-
def add_unlimited_user(self, user_id: str):
|
17 |
-
"""Add a user to the unlimited users list"""
|
18 |
-
self.unlimited_users.add(user_id)
|
19 |
-
|
20 |
-
def add_higher_quota_user(self, user_id: str):
|
21 |
-
"""Add a user to the higher quota users list"""
|
22 |
-
self.higher_quota_users.add(user_id)
|
23 |
-
|
24 |
-
def record_submission(self, user_id: str):
|
25 |
-
"""Record a new submission for a user"""
|
26 |
-
current_time = datetime.now(timezone.utc)
|
27 |
-
if user_id not in self.submission_history:
|
28 |
-
self.submission_history[user_id] = []
|
29 |
-
self.submission_history[user_id].append(current_time)
|
30 |
-
|
31 |
-
def clean_old_submissions(self, user_id: str):
|
32 |
-
"""Remove submissions older than the period"""
|
33 |
-
if user_id not in self.submission_history:
|
34 |
-
return
|
35 |
-
|
36 |
-
current_time = datetime.now(timezone.utc)
|
37 |
-
cutoff_time = current_time - timedelta(days=self.period_days)
|
38 |
-
|
39 |
-
self.submission_history[user_id] = [
|
40 |
-
time for time in self.submission_history[user_id]
|
41 |
-
if time > cutoff_time
|
42 |
-
]
|
43 |
-
|
44 |
-
async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
|
45 |
-
"""Check if a user has exceeded their rate limit
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
Tuple[bool, str]: (is_allowed, error_message)
|
49 |
-
"""
|
50 |
-
# Unlimited users bypass all checks
|
51 |
-
if user_id in self.unlimited_users:
|
52 |
-
return True, ""
|
53 |
-
|
54 |
-
# Clean old submissions
|
55 |
-
self.clean_old_submissions(user_id)
|
56 |
-
|
57 |
-
# Get current submission count
|
58 |
-
submission_count = len(self.submission_history.get(user_id, []))
|
59 |
-
|
60 |
-
# Calculate user's quota
|
61 |
-
user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
|
62 |
-
|
63 |
-
# Check if user has exceeded their quota
|
64 |
-
if submission_count >= user_quota:
|
65 |
-
error_msg = (
|
66 |
-
f"User '{user_id}' has reached the limit of {user_quota} submissions "
|
67 |
-
f"in the last {self.period_days} days. Please wait before submitting again."
|
68 |
-
)
|
69 |
-
return False, error_msg
|
70 |
-
|
71 |
-
return True, ""
|
72 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/votes.py
CHANGED
@@ -3,7 +3,6 @@ from typing import Dict, Any, List, Set, Tuple, Optional
|
|
3 |
import json
|
4 |
import logging
|
5 |
import asyncio
|
6 |
-
from pathlib import Path
|
7 |
import aiohttp
|
8 |
from huggingface_hub import HfApi
|
9 |
import tempfile
|
@@ -11,23 +10,24 @@ import os
|
|
11 |
|
12 |
from app.services.hf_service import HuggingFaceService
|
13 |
from app.config import HF_TOKEN
|
14 |
-
from app.config.hf_config import
|
15 |
from app.core.cache import cache_config
|
16 |
from app.core.formatting import LogFormatter
|
17 |
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
|
|
20 |
class VoteService(HuggingFaceService):
|
21 |
-
_instance: Optional[
|
22 |
_initialized = False
|
23 |
-
|
24 |
def __new__(cls):
|
25 |
if cls._instance is None:
|
26 |
cls._instance = super(VoteService, cls).__new__(cls)
|
27 |
return cls._instance
|
28 |
|
29 |
def __init__(self):
|
30 |
-
if not hasattr(self,
|
31 |
super().__init__()
|
32 |
self.votes_file = cache_config.votes_file
|
33 |
self.votes_to_upload: List[Dict[str, Any]] = []
|
@@ -48,41 +48,43 @@ class VoteService(HuggingFaceService):
|
|
48 |
if self._initialized:
|
49 |
await self._check_for_new_votes()
|
50 |
return
|
51 |
-
|
52 |
try:
|
53 |
logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
|
54 |
-
|
55 |
# Ensure votes directory exists
|
56 |
self.votes_file.parent.mkdir(parents=True, exist_ok=True)
|
57 |
-
|
58 |
# Load remote votes
|
59 |
remote_votes = await self._fetch_remote_votes()
|
60 |
if remote_votes:
|
61 |
-
logger.info(
|
62 |
-
|
|
|
|
|
63 |
# Save to local file
|
64 |
-
with open(self.votes_file,
|
65 |
for vote in remote_votes:
|
66 |
json.dump(vote, f)
|
67 |
-
f.write(
|
68 |
-
|
69 |
# Load into memory
|
70 |
await self._load_existing_votes()
|
71 |
else:
|
72 |
logger.warning(LogFormatter.warning("No votes found on hub"))
|
73 |
-
|
74 |
self._initialized = True
|
75 |
self._last_sync = datetime.now(timezone.utc)
|
76 |
-
|
77 |
# Final summary
|
78 |
stats = {
|
79 |
"Total_Votes": self._total_votes,
|
80 |
-
"Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
|
81 |
}
|
82 |
logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
|
83 |
for line in LogFormatter.stats(stats):
|
84 |
logger.info(line)
|
85 |
-
|
86 |
except Exception as e:
|
87 |
logger.error(LogFormatter.error("Initialization failed", e))
|
88 |
raise
|
@@ -91,7 +93,7 @@ class VoteService(HuggingFaceService):
|
|
91 |
"""Fetch votes from HF hub"""
|
92 |
url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
|
93 |
headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
94 |
-
|
95 |
try:
|
96 |
async with aiohttp.ClientSession() as session:
|
97 |
async with session.get(url, headers=headers) as response:
|
@@ -106,7 +108,9 @@ class VoteService(HuggingFaceService):
|
|
106 |
continue
|
107 |
return votes
|
108 |
else:
|
109 |
-
logger.error(
|
|
|
|
|
110 |
return []
|
111 |
except Exception as e:
|
112 |
logger.error(f"Error fetching remote votes: {str(e)}")
|
@@ -117,18 +121,20 @@ class VoteService(HuggingFaceService):
|
|
117 |
try:
|
118 |
remote_votes = await self._fetch_remote_votes()
|
119 |
if len(remote_votes) != self._total_votes:
|
120 |
-
logger.info(
|
|
|
|
|
121 |
# Save to local file
|
122 |
-
with open(self.votes_file,
|
123 |
for vote in remote_votes:
|
124 |
json.dump(vote, f)
|
125 |
-
f.write(
|
126 |
-
|
127 |
# Reload into memory
|
128 |
await self._load_existing_votes()
|
129 |
else:
|
130 |
logger.info("Votes are in sync")
|
131 |
-
|
132 |
except Exception as e:
|
133 |
logger.error(f"Error checking for new votes: {str(e)}")
|
134 |
|
@@ -136,25 +142,31 @@ class VoteService(HuggingFaceService):
|
|
136 |
"""Sync votes with HuggingFace hub"""
|
137 |
try:
|
138 |
logger.info(LogFormatter.section("VOTE SYNC"))
|
139 |
-
|
140 |
# Get current remote votes
|
141 |
remote_votes = await self._fetch_remote_votes()
|
142 |
logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
|
143 |
-
|
144 |
# If we have pending votes to upload
|
145 |
if self.votes_to_upload:
|
146 |
-
logger.info(
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
# Add new votes to remote votes
|
149 |
remote_votes.extend(self.votes_to_upload)
|
150 |
-
|
151 |
# Create temporary file with all votes
|
152 |
-
with tempfile.NamedTemporaryFile(
|
|
|
|
|
153 |
for vote in remote_votes:
|
154 |
json.dump(vote, temp_file)
|
155 |
-
temp_file.write(
|
156 |
temp_path = temp_file.name
|
157 |
-
|
158 |
try:
|
159 |
# Upload JSONL file directly
|
160 |
self.hf_api.upload_file(
|
@@ -163,32 +175,34 @@ class VoteService(HuggingFaceService):
|
|
163 |
repo_id=VOTES_REPO,
|
164 |
repo_type="dataset",
|
165 |
commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
|
166 |
-
token=self.token
|
167 |
)
|
168 |
-
|
169 |
# Clear pending votes only if upload succeeded
|
170 |
self.votes_to_upload.clear()
|
171 |
-
logger.info(
|
172 |
-
|
|
|
|
|
173 |
except Exception as e:
|
174 |
logger.error(LogFormatter.error("Failed to upload votes to hub", e))
|
175 |
raise
|
176 |
finally:
|
177 |
# Clean up temp file
|
178 |
os.unlink(temp_path)
|
179 |
-
|
180 |
# Update local state
|
181 |
-
with open(self.votes_file,
|
182 |
for vote in remote_votes:
|
183 |
json.dump(vote, f)
|
184 |
-
f.write(
|
185 |
-
|
186 |
# Reload votes in memory
|
187 |
await self._load_existing_votes()
|
188 |
logger.info(LogFormatter.success("Sync completed successfully"))
|
189 |
|
190 |
self._last_sync = datetime.now(timezone.utc)
|
191 |
-
|
192 |
except Exception as e:
|
193 |
logger.error(LogFormatter.error("Sync failed", e))
|
194 |
raise
|
@@ -201,58 +215,73 @@ class VoteService(HuggingFaceService):
|
|
201 |
|
202 |
try:
|
203 |
logger.info(LogFormatter.section("LOADING VOTES"))
|
204 |
-
|
205 |
# Clear existing data structures
|
206 |
self.vote_check_set.clear()
|
207 |
self._votes_by_model.clear()
|
208 |
self._votes_by_user.clear()
|
209 |
-
|
210 |
vote_count = 0
|
211 |
latest_timestamp = None
|
212 |
-
|
213 |
with open(self.votes_file, "r") as f:
|
214 |
for line in f:
|
215 |
try:
|
216 |
vote = json.loads(line.strip())
|
217 |
vote_count += 1
|
218 |
-
|
219 |
# Track latest timestamp
|
220 |
try:
|
221 |
-
vote_timestamp = datetime.fromisoformat(
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
223 |
latest_timestamp = vote_timestamp
|
224 |
-
vote["timestamp"] = vote_timestamp.strftime(
|
|
|
|
|
225 |
except (KeyError, ValueError) as e:
|
226 |
-
logger.warning(
|
|
|
|
|
|
|
|
|
227 |
continue
|
228 |
-
|
229 |
if vote_count % 1000 == 0:
|
230 |
-
logger.info(
|
231 |
-
|
|
|
|
|
232 |
self._add_vote_to_memory(vote)
|
233 |
-
|
234 |
except json.JSONDecodeError as e:
|
235 |
logger.error(LogFormatter.error("Vote parsing failed", e))
|
236 |
continue
|
237 |
except Exception as e:
|
238 |
logger.error(LogFormatter.error("Vote processing failed", e))
|
239 |
continue
|
240 |
-
|
241 |
self._total_votes = vote_count
|
242 |
self._last_vote_timestamp = latest_timestamp
|
243 |
-
|
244 |
# Final summary
|
245 |
stats = {
|
246 |
"Total_Votes": vote_count,
|
247 |
-
"Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
|
|
|
248 |
"Unique_Models": len(self._votes_by_model),
|
249 |
-
"Unique_Users": len(self._votes_by_user)
|
250 |
}
|
251 |
-
|
252 |
logger.info(LogFormatter.section("VOTE SUMMARY"))
|
253 |
for line in LogFormatter.stats(stats):
|
254 |
logger.info(line)
|
255 |
-
|
256 |
except Exception as e:
|
257 |
logger.error(LogFormatter.error("Failed to load votes", e))
|
258 |
raise
|
@@ -265,25 +294,25 @@ class VoteService(HuggingFaceService):
|
|
265 |
vote["model"],
|
266 |
vote.get("revision", "main"),
|
267 |
vote["username"],
|
268 |
-
vote.get("precision", "unknown")
|
269 |
)
|
270 |
-
|
271 |
# Skip if we already have this vote
|
272 |
if check_tuple in self.vote_check_set:
|
273 |
return
|
274 |
-
|
275 |
self.vote_check_set.add(check_tuple)
|
276 |
-
|
277 |
# Update model votes
|
278 |
if vote["model"] not in self._votes_by_model:
|
279 |
self._votes_by_model[vote["model"]] = []
|
280 |
self._votes_by_model[vote["model"]].append(vote)
|
281 |
-
|
282 |
# Update user votes
|
283 |
if vote["username"] not in self._votes_by_user:
|
284 |
self._votes_by_user[vote["username"]] = []
|
285 |
self._votes_by_user[vote["username"]].append(vote)
|
286 |
-
|
287 |
except KeyError as e:
|
288 |
logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
|
289 |
except Exception as e:
|
@@ -292,12 +321,14 @@ class VoteService(HuggingFaceService):
|
|
292 |
async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
|
293 |
"""Get all votes from a specific user"""
|
294 |
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
295 |
-
|
296 |
# Check if we need to refresh votes
|
297 |
-
if (
|
|
|
|
|
298 |
logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
|
299 |
await self._check_for_new_votes()
|
300 |
-
|
301 |
votes = self._votes_by_user.get(user_id, [])
|
302 |
logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
|
303 |
return votes
|
@@ -305,14 +336,16 @@ class VoteService(HuggingFaceService):
|
|
305 |
async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
|
306 |
"""Get all votes for a specific model"""
|
307 |
logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
|
308 |
-
|
309 |
# Check if we need to refresh votes
|
310 |
-
if (
|
|
|
|
|
311 |
logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
|
312 |
await self._check_for_new_votes()
|
313 |
-
|
314 |
votes = self._votes_by_model.get(model_id, [])
|
315 |
-
|
316 |
# Group votes by revision and precision
|
317 |
votes_by_config = {}
|
318 |
for vote in votes:
|
@@ -323,23 +356,23 @@ class VoteService(HuggingFaceService):
|
|
323 |
votes_by_config[config_key] = {
|
324 |
"revision": revision,
|
325 |
"precision": precision,
|
326 |
-
"count": 0
|
327 |
}
|
328 |
votes_by_config[config_key]["count"] += 1
|
329 |
-
|
330 |
stats = {
|
331 |
"Total_Votes": len(votes),
|
332 |
-
**{f"Config_{k}": v["count"] for k, v in votes_by_config.items()}
|
333 |
}
|
334 |
-
|
335 |
logger.info(LogFormatter.section("VOTE STATISTICS"))
|
336 |
for line in LogFormatter.stats(stats):
|
337 |
logger.info(line)
|
338 |
-
|
339 |
return {
|
340 |
"total_votes": len(votes),
|
341 |
"votes_by_config": votes_by_config,
|
342 |
-
"votes": votes
|
343 |
}
|
344 |
|
345 |
async def _get_model_revision(self, model_id: str) -> str:
|
@@ -348,60 +381,86 @@ class VoteService(HuggingFaceService):
|
|
348 |
for attempt in range(self._max_retries):
|
349 |
try:
|
350 |
model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
|
351 |
-
logger.info(
|
|
|
|
|
352 |
return model_info.sha
|
353 |
except Exception as e:
|
354 |
-
logger.error(
|
|
|
|
|
355 |
if attempt < self._max_retries - 1:
|
356 |
retry_delay = self._retry_delay * (attempt + 1)
|
357 |
logger.info(f"Retrying in {retry_delay} seconds...")
|
358 |
await asyncio.sleep(retry_delay)
|
359 |
else:
|
360 |
-
logger.warning(
|
|
|
|
|
361 |
return "main"
|
362 |
|
363 |
-
async def add_vote(
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
"""Add a vote for a model"""
|
365 |
try:
|
366 |
-
self._log_repo_operation(
|
|
|
|
|
|
|
|
|
367 |
logger.info(LogFormatter.section("NEW VOTE"))
|
368 |
stats = {
|
369 |
"Model": model_id,
|
370 |
"User": user_id,
|
371 |
"Type": vote_type,
|
372 |
-
"Config": vote_data or {}
|
373 |
}
|
374 |
for line in LogFormatter.tree(stats, "Vote Details"):
|
375 |
logger.info(line)
|
376 |
-
|
377 |
# Use provided configuration or fallback to model info
|
378 |
precision = None
|
379 |
revision = None
|
380 |
-
|
381 |
if vote_data:
|
382 |
precision = vote_data.get("precision")
|
383 |
revision = vote_data.get("revision")
|
384 |
-
|
385 |
# If any info is missing, try to get it from model info
|
386 |
if not all([precision, revision]):
|
387 |
try:
|
388 |
-
model_info = await asyncio.to_thread(
|
389 |
-
|
390 |
-
|
|
|
|
|
|
|
|
|
391 |
if not precision:
|
392 |
precision = model_card_data.get("precision", "unknown")
|
393 |
if not revision:
|
394 |
revision = model_info.sha
|
395 |
except Exception as e:
|
396 |
-
logger.warning(
|
|
|
|
|
|
|
|
|
397 |
precision = precision or "unknown"
|
398 |
revision = revision or "main"
|
399 |
-
|
400 |
# Check if vote already exists with this configuration
|
401 |
check_tuple = (model_id, revision, user_id, precision)
|
402 |
-
|
403 |
if check_tuple in self.vote_check_set:
|
404 |
-
raise ValueError(
|
|
|
|
|
405 |
|
406 |
vote = {
|
407 |
"model": model_id,
|
@@ -409,33 +468,33 @@ class VoteService(HuggingFaceService):
|
|
409 |
"username": user_id,
|
410 |
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
411 |
"vote_type": vote_type,
|
412 |
-
"precision": precision
|
413 |
}
|
414 |
|
415 |
# Update local storage
|
416 |
with open(self.votes_file, "a") as f:
|
417 |
f.write(json.dumps(vote) + "\n")
|
418 |
-
|
419 |
self._add_vote_to_memory(vote)
|
420 |
self.votes_to_upload.append(vote)
|
421 |
-
|
422 |
stats = {
|
423 |
"Status": "Success",
|
424 |
"Queue_Size": len(self.votes_to_upload),
|
425 |
"Model_Config": {
|
426 |
"Precision": precision,
|
427 |
-
"Revision": revision[:7] if revision else "unknown"
|
428 |
-
}
|
429 |
}
|
430 |
for line in LogFormatter.stats(stats):
|
431 |
logger.info(line)
|
432 |
-
|
433 |
# Force immediate sync
|
434 |
logger.info(LogFormatter.info("Forcing immediate sync with hub"))
|
435 |
await self._sync_with_hub()
|
436 |
-
|
437 |
return {"status": "success", "message": "Vote added successfully"}
|
438 |
-
|
439 |
except Exception as e:
|
440 |
logger.error(LogFormatter.error("Failed to add vote", e))
|
441 |
raise
|
|
|
3 |
import json
|
4 |
import logging
|
5 |
import asyncio
|
|
|
6 |
import aiohttp
|
7 |
from huggingface_hub import HfApi
|
8 |
import tempfile
|
|
|
10 |
|
11 |
from app.services.hf_service import HuggingFaceService
|
12 |
from app.config import HF_TOKEN
|
13 |
+
from app.config.hf_config import VOTES_REPO
|
14 |
from app.core.cache import cache_config
|
15 |
from app.core.formatting import LogFormatter
|
16 |
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
+
|
20 |
class VoteService(HuggingFaceService):
|
21 |
+
_instance: Optional["VoteService"] = None
|
22 |
_initialized = False
|
23 |
+
|
24 |
def __new__(cls):
|
25 |
if cls._instance is None:
|
26 |
cls._instance = super(VoteService, cls).__new__(cls)
|
27 |
return cls._instance
|
28 |
|
29 |
def __init__(self):
|
30 |
+
if not hasattr(self, "_init_done"):
|
31 |
super().__init__()
|
32 |
self.votes_file = cache_config.votes_file
|
33 |
self.votes_to_upload: List[Dict[str, Any]] = []
|
|
|
48 |
if self._initialized:
|
49 |
await self._check_for_new_votes()
|
50 |
return
|
51 |
+
|
52 |
try:
|
53 |
logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
|
54 |
+
|
55 |
# Ensure votes directory exists
|
56 |
self.votes_file.parent.mkdir(parents=True, exist_ok=True)
|
57 |
+
|
58 |
# Load remote votes
|
59 |
remote_votes = await self._fetch_remote_votes()
|
60 |
if remote_votes:
|
61 |
+
logger.info(
|
62 |
+
LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub")
|
63 |
+
)
|
64 |
+
|
65 |
# Save to local file
|
66 |
+
with open(self.votes_file, "w") as f:
|
67 |
for vote in remote_votes:
|
68 |
json.dump(vote, f)
|
69 |
+
f.write("\n")
|
70 |
+
|
71 |
# Load into memory
|
72 |
await self._load_existing_votes()
|
73 |
else:
|
74 |
logger.warning(LogFormatter.warning("No votes found on hub"))
|
75 |
+
|
76 |
self._initialized = True
|
77 |
self._last_sync = datetime.now(timezone.utc)
|
78 |
+
|
79 |
# Final summary
|
80 |
stats = {
|
81 |
"Total_Votes": self._total_votes,
|
82 |
+
"Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC"),
|
83 |
}
|
84 |
logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
|
85 |
for line in LogFormatter.stats(stats):
|
86 |
logger.info(line)
|
87 |
+
|
88 |
except Exception as e:
|
89 |
logger.error(LogFormatter.error("Initialization failed", e))
|
90 |
raise
|
|
|
93 |
"""Fetch votes from HF hub"""
|
94 |
url = f"https://huggingface.co/datasets/{VOTES_REPO}/raw/main/votes_data.jsonl"
|
95 |
headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
96 |
+
|
97 |
try:
|
98 |
async with aiohttp.ClientSession() as session:
|
99 |
async with session.get(url, headers=headers) as response:
|
|
|
108 |
continue
|
109 |
return votes
|
110 |
else:
|
111 |
+
logger.error(
|
112 |
+
f"Failed to get remote votes: HTTP {response.status}"
|
113 |
+
)
|
114 |
return []
|
115 |
except Exception as e:
|
116 |
logger.error(f"Error fetching remote votes: {str(e)}")
|
|
|
121 |
try:
|
122 |
remote_votes = await self._fetch_remote_votes()
|
123 |
if len(remote_votes) != self._total_votes:
|
124 |
+
logger.info(
|
125 |
+
f"Vote count changed: Local ({self._total_votes}) ≠ Remote ({len(remote_votes)})"
|
126 |
+
)
|
127 |
# Save to local file
|
128 |
+
with open(self.votes_file, "w") as f:
|
129 |
for vote in remote_votes:
|
130 |
json.dump(vote, f)
|
131 |
+
f.write("\n")
|
132 |
+
|
133 |
# Reload into memory
|
134 |
await self._load_existing_votes()
|
135 |
else:
|
136 |
logger.info("Votes are in sync")
|
137 |
+
|
138 |
except Exception as e:
|
139 |
logger.error(f"Error checking for new votes: {str(e)}")
|
140 |
|
|
|
142 |
"""Sync votes with HuggingFace hub"""
|
143 |
try:
|
144 |
logger.info(LogFormatter.section("VOTE SYNC"))
|
145 |
+
|
146 |
# Get current remote votes
|
147 |
remote_votes = await self._fetch_remote_votes()
|
148 |
logger.info(LogFormatter.info(f"Loaded {len(remote_votes)} votes from hub"))
|
149 |
+
|
150 |
# If we have pending votes to upload
|
151 |
if self.votes_to_upload:
|
152 |
+
logger.info(
|
153 |
+
LogFormatter.info(
|
154 |
+
f"Adding {len(self.votes_to_upload)} pending votes..."
|
155 |
+
)
|
156 |
+
)
|
157 |
+
|
158 |
# Add new votes to remote votes
|
159 |
remote_votes.extend(self.votes_to_upload)
|
160 |
+
|
161 |
# Create temporary file with all votes
|
162 |
+
with tempfile.NamedTemporaryFile(
|
163 |
+
mode="w", suffix=".jsonl", delete=False
|
164 |
+
) as temp_file:
|
165 |
for vote in remote_votes:
|
166 |
json.dump(vote, temp_file)
|
167 |
+
temp_file.write("\n")
|
168 |
temp_path = temp_file.name
|
169 |
+
|
170 |
try:
|
171 |
# Upload JSONL file directly
|
172 |
self.hf_api.upload_file(
|
|
|
175 |
repo_id=VOTES_REPO,
|
176 |
repo_type="dataset",
|
177 |
commit_message=f"Update votes: +{len(self.votes_to_upload)} new votes",
|
178 |
+
token=self.token,
|
179 |
)
|
180 |
+
|
181 |
# Clear pending votes only if upload succeeded
|
182 |
self.votes_to_upload.clear()
|
183 |
+
logger.info(
|
184 |
+
LogFormatter.success("Pending votes uploaded successfully")
|
185 |
+
)
|
186 |
+
|
187 |
except Exception as e:
|
188 |
logger.error(LogFormatter.error("Failed to upload votes to hub", e))
|
189 |
raise
|
190 |
finally:
|
191 |
# Clean up temp file
|
192 |
os.unlink(temp_path)
|
193 |
+
|
194 |
# Update local state
|
195 |
+
with open(self.votes_file, "w") as f:
|
196 |
for vote in remote_votes:
|
197 |
json.dump(vote, f)
|
198 |
+
f.write("\n")
|
199 |
+
|
200 |
# Reload votes in memory
|
201 |
await self._load_existing_votes()
|
202 |
logger.info(LogFormatter.success("Sync completed successfully"))
|
203 |
|
204 |
self._last_sync = datetime.now(timezone.utc)
|
205 |
+
|
206 |
except Exception as e:
|
207 |
logger.error(LogFormatter.error("Sync failed", e))
|
208 |
raise
|
|
|
215 |
|
216 |
try:
|
217 |
logger.info(LogFormatter.section("LOADING VOTES"))
|
218 |
+
|
219 |
# Clear existing data structures
|
220 |
self.vote_check_set.clear()
|
221 |
self._votes_by_model.clear()
|
222 |
self._votes_by_user.clear()
|
223 |
+
|
224 |
vote_count = 0
|
225 |
latest_timestamp = None
|
226 |
+
|
227 |
with open(self.votes_file, "r") as f:
|
228 |
for line in f:
|
229 |
try:
|
230 |
vote = json.loads(line.strip())
|
231 |
vote_count += 1
|
232 |
+
|
233 |
# Track latest timestamp
|
234 |
try:
|
235 |
+
vote_timestamp = datetime.fromisoformat(
|
236 |
+
vote["timestamp"].replace("Z", "+00:00")
|
237 |
+
)
|
238 |
+
if (
|
239 |
+
not latest_timestamp
|
240 |
+
or vote_timestamp > latest_timestamp
|
241 |
+
):
|
242 |
latest_timestamp = vote_timestamp
|
243 |
+
vote["timestamp"] = vote_timestamp.strftime(
|
244 |
+
"%Y-%m-%dT%H:%M:%SZ"
|
245 |
+
)
|
246 |
except (KeyError, ValueError) as e:
|
247 |
+
logger.warning(
|
248 |
+
LogFormatter.warning(
|
249 |
+
f"Invalid timestamp in vote: {str(e)}"
|
250 |
+
)
|
251 |
+
)
|
252 |
continue
|
253 |
+
|
254 |
if vote_count % 1000 == 0:
|
255 |
+
logger.info(
|
256 |
+
LogFormatter.info(f"Processed {vote_count:,} votes...")
|
257 |
+
)
|
258 |
+
|
259 |
self._add_vote_to_memory(vote)
|
260 |
+
|
261 |
except json.JSONDecodeError as e:
|
262 |
logger.error(LogFormatter.error("Vote parsing failed", e))
|
263 |
continue
|
264 |
except Exception as e:
|
265 |
logger.error(LogFormatter.error("Vote processing failed", e))
|
266 |
continue
|
267 |
+
|
268 |
self._total_votes = vote_count
|
269 |
self._last_vote_timestamp = latest_timestamp
|
270 |
+
|
271 |
# Final summary
|
272 |
stats = {
|
273 |
"Total_Votes": vote_count,
|
274 |
+
"Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC")
|
275 |
+
if latest_timestamp
|
276 |
+
else "None",
|
277 |
"Unique_Models": len(self._votes_by_model),
|
278 |
+
"Unique_Users": len(self._votes_by_user),
|
279 |
}
|
280 |
+
|
281 |
logger.info(LogFormatter.section("VOTE SUMMARY"))
|
282 |
for line in LogFormatter.stats(stats):
|
283 |
logger.info(line)
|
284 |
+
|
285 |
except Exception as e:
|
286 |
logger.error(LogFormatter.error("Failed to load votes", e))
|
287 |
raise
|
|
|
294 |
vote["model"],
|
295 |
vote.get("revision", "main"),
|
296 |
vote["username"],
|
297 |
+
vote.get("precision", "unknown"),
|
298 |
)
|
299 |
+
|
300 |
# Skip if we already have this vote
|
301 |
if check_tuple in self.vote_check_set:
|
302 |
return
|
303 |
+
|
304 |
self.vote_check_set.add(check_tuple)
|
305 |
+
|
306 |
# Update model votes
|
307 |
if vote["model"] not in self._votes_by_model:
|
308 |
self._votes_by_model[vote["model"]] = []
|
309 |
self._votes_by_model[vote["model"]].append(vote)
|
310 |
+
|
311 |
# Update user votes
|
312 |
if vote["username"] not in self._votes_by_user:
|
313 |
self._votes_by_user[vote["username"]] = []
|
314 |
self._votes_by_user[vote["username"]].append(vote)
|
315 |
+
|
316 |
except KeyError as e:
|
317 |
logger.error(LogFormatter.error("Malformed vote data, missing key", str(e)))
|
318 |
except Exception as e:
|
|
|
321 |
async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
|
322 |
"""Get all votes from a specific user"""
|
323 |
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
324 |
+
|
325 |
# Check if we need to refresh votes
|
326 |
+
if (
|
327 |
+
datetime.now(timezone.utc) - self._last_sync
|
328 |
+
).total_seconds() > self._sync_interval:
|
329 |
logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
|
330 |
await self._check_for_new_votes()
|
331 |
+
|
332 |
votes = self._votes_by_user.get(user_id, [])
|
333 |
logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
|
334 |
return votes
|
|
|
336 |
async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
|
337 |
"""Get all votes for a specific model"""
|
338 |
logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
|
339 |
+
|
340 |
# Check if we need to refresh votes
|
341 |
+
if (
|
342 |
+
datetime.now(timezone.utc) - self._last_sync
|
343 |
+
).total_seconds() > self._sync_interval:
|
344 |
logger.info(LogFormatter.info("Cache expired, refreshing votes..."))
|
345 |
await self._check_for_new_votes()
|
346 |
+
|
347 |
votes = self._votes_by_model.get(model_id, [])
|
348 |
+
|
349 |
# Group votes by revision and precision
|
350 |
votes_by_config = {}
|
351 |
for vote in votes:
|
|
|
356 |
votes_by_config[config_key] = {
|
357 |
"revision": revision,
|
358 |
"precision": precision,
|
359 |
+
"count": 0,
|
360 |
}
|
361 |
votes_by_config[config_key]["count"] += 1
|
362 |
+
|
363 |
stats = {
|
364 |
"Total_Votes": len(votes),
|
365 |
+
**{f"Config_{k}": v["count"] for k, v in votes_by_config.items()},
|
366 |
}
|
367 |
+
|
368 |
logger.info(LogFormatter.section("VOTE STATISTICS"))
|
369 |
for line in LogFormatter.stats(stats):
|
370 |
logger.info(line)
|
371 |
+
|
372 |
return {
|
373 |
"total_votes": len(votes),
|
374 |
"votes_by_config": votes_by_config,
|
375 |
+
"votes": votes,
|
376 |
}
|
377 |
|
378 |
async def _get_model_revision(self, model_id: str) -> str:
|
|
|
381 |
for attempt in range(self._max_retries):
|
382 |
try:
|
383 |
model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
|
384 |
+
logger.info(
|
385 |
+
f"Successfully got revision {model_info.sha} for model {model_id}"
|
386 |
+
)
|
387 |
return model_info.sha
|
388 |
except Exception as e:
|
389 |
+
logger.error(
|
390 |
+
f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}"
|
391 |
+
)
|
392 |
if attempt < self._max_retries - 1:
|
393 |
retry_delay = self._retry_delay * (attempt + 1)
|
394 |
logger.info(f"Retrying in {retry_delay} seconds...")
|
395 |
await asyncio.sleep(retry_delay)
|
396 |
else:
|
397 |
+
logger.warning(
|
398 |
+
f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts"
|
399 |
+
)
|
400 |
return "main"
|
401 |
|
402 |
+
async def add_vote(
|
403 |
+
self,
|
404 |
+
model_id: str,
|
405 |
+
user_id: str,
|
406 |
+
vote_type: str,
|
407 |
+
vote_data: Dict[str, Any] = None,
|
408 |
+
) -> Dict[str, Any]:
|
409 |
"""Add a vote for a model"""
|
410 |
try:
|
411 |
+
self._log_repo_operation(
|
412 |
+
"add",
|
413 |
+
VOTES_REPO,
|
414 |
+
f"Adding {vote_type} vote for {model_id} by {user_id}",
|
415 |
+
)
|
416 |
logger.info(LogFormatter.section("NEW VOTE"))
|
417 |
stats = {
|
418 |
"Model": model_id,
|
419 |
"User": user_id,
|
420 |
"Type": vote_type,
|
421 |
+
"Config": vote_data or {},
|
422 |
}
|
423 |
for line in LogFormatter.tree(stats, "Vote Details"):
|
424 |
logger.info(line)
|
425 |
+
|
426 |
# Use provided configuration or fallback to model info
|
427 |
precision = None
|
428 |
revision = None
|
429 |
+
|
430 |
if vote_data:
|
431 |
precision = vote_data.get("precision")
|
432 |
revision = vote_data.get("revision")
|
433 |
+
|
434 |
# If any info is missing, try to get it from model info
|
435 |
if not all([precision, revision]):
|
436 |
try:
|
437 |
+
model_info = await asyncio.to_thread(
|
438 |
+
self.hf_api.model_info, model_id
|
439 |
+
)
|
440 |
+
model_card_data = (
|
441 |
+
model_info.cardData if hasattr(model_info, "cardData") else {}
|
442 |
+
)
|
443 |
+
|
444 |
if not precision:
|
445 |
precision = model_card_data.get("precision", "unknown")
|
446 |
if not revision:
|
447 |
revision = model_info.sha
|
448 |
except Exception as e:
|
449 |
+
logger.warning(
|
450 |
+
LogFormatter.warning(
|
451 |
+
f"Failed to get model info: {str(e)}. Using default values."
|
452 |
+
)
|
453 |
+
)
|
454 |
precision = precision or "unknown"
|
455 |
revision = revision or "main"
|
456 |
+
|
457 |
# Check if vote already exists with this configuration
|
458 |
check_tuple = (model_id, revision, user_id, precision)
|
459 |
+
|
460 |
if check_tuple in self.vote_check_set:
|
461 |
+
raise ValueError(
|
462 |
+
f"Vote already recorded for this model configuration (precision: {precision}, revision: {revision[:7] if revision else 'unknown'})"
|
463 |
+
)
|
464 |
|
465 |
vote = {
|
466 |
"model": model_id,
|
|
|
468 |
"username": user_id,
|
469 |
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
470 |
"vote_type": vote_type,
|
471 |
+
"precision": precision,
|
472 |
}
|
473 |
|
474 |
# Update local storage
|
475 |
with open(self.votes_file, "a") as f:
|
476 |
f.write(json.dumps(vote) + "\n")
|
477 |
+
|
478 |
self._add_vote_to_memory(vote)
|
479 |
self.votes_to_upload.append(vote)
|
480 |
+
|
481 |
stats = {
|
482 |
"Status": "Success",
|
483 |
"Queue_Size": len(self.votes_to_upload),
|
484 |
"Model_Config": {
|
485 |
"Precision": precision,
|
486 |
+
"Revision": revision[:7] if revision else "unknown",
|
487 |
+
},
|
488 |
}
|
489 |
for line in LogFormatter.stats(stats):
|
490 |
logger.info(line)
|
491 |
+
|
492 |
# Force immediate sync
|
493 |
logger.info(LogFormatter.info("Forcing immediate sync with hub"))
|
494 |
await self._sync_with_hub()
|
495 |
+
|
496 |
return {"status": "success", "message": "Vote added successfully"}
|
497 |
+
|
498 |
except Exception as e:
|
499 |
logger.error(LogFormatter.error("Failed to add vote", e))
|
500 |
raise
|
backend/app/utils/logging.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
from app.core.formatting import LogFormatter
|
2 |
|
3 |
-
__all__ = [
|
|
|
1 |
from app.core.formatting import LogFormatter
|
2 |
|
3 |
+
__all__ = ["LogFormatter"]
|
backend/app/utils/model_validation.py
CHANGED
@@ -12,50 +12,56 @@ from app.core.formatting import LogFormatter
|
|
12 |
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
class ModelValidator:
|
16 |
def __init__(self):
|
17 |
self.token = HF_TOKEN
|
18 |
self.api = HfApi(token=self.token)
|
19 |
self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
20 |
-
|
21 |
-
async def check_model_card(
|
|
|
|
|
22 |
"""Check if model has a valid model card"""
|
23 |
try:
|
24 |
logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
|
25 |
-
|
26 |
# Get model card content using ModelCard.load
|
27 |
try:
|
28 |
-
model_card = await asyncio.to_thread(
|
29 |
-
ModelCard.load,
|
30 |
-
model_id
|
31 |
-
)
|
32 |
logger.info(LogFormatter.success("Model card found"))
|
33 |
except Exception as e:
|
34 |
error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
|
35 |
logger.error(LogFormatter.error(error_msg, e))
|
36 |
return False, error_msg, None
|
37 |
-
|
38 |
# Check license in model card data
|
39 |
-
if model_card.data.license is None and not (
|
|
|
|
|
40 |
error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
|
41 |
logger.warning(LogFormatter.warning(error_msg))
|
42 |
return False, error_msg, None
|
43 |
|
44 |
# Enforce card content length
|
45 |
if len(model_card.text) < 200:
|
46 |
-
error_msg =
|
|
|
|
|
47 |
logger.warning(LogFormatter.warning(error_msg))
|
48 |
return False, error_msg, None
|
49 |
-
|
50 |
logger.info(LogFormatter.success("Model card validation passed"))
|
51 |
return True, "", model_card
|
52 |
-
|
53 |
except Exception as e:
|
54 |
error_msg = "Failed to validate model card"
|
55 |
logger.error(LogFormatter.error(error_msg, e))
|
56 |
return False, str(e), None
|
57 |
-
|
58 |
-
async def get_safetensors_metadata(
|
|
|
|
|
59 |
"""Get metadata from a safetensors file"""
|
60 |
try:
|
61 |
if is_adapter:
|
@@ -80,26 +86,32 @@ class ModelValidator:
|
|
80 |
return None
|
81 |
|
82 |
async def get_model_size(
|
83 |
-
self,
|
84 |
-
model_info: Any,
|
85 |
-
precision: str,
|
86 |
-
base_model: str,
|
87 |
-
revision: str
|
88 |
) -> Tuple[Optional[float], Optional[str]]:
|
89 |
"""Get model size in billions of parameters"""
|
90 |
try:
|
91 |
-
logger.info(
|
|
|
|
|
92 |
|
93 |
# Check if model is adapter
|
94 |
-
is_adapter = any(
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# Try to get size from safetensors first
|
97 |
model_size = None
|
98 |
|
99 |
if is_adapter and base_model:
|
100 |
# For adapters, we need both adapter and base model sizes
|
101 |
-
adapter_meta = await self.get_safetensors_metadata(
|
102 |
-
|
|
|
|
|
|
|
|
|
103 |
|
104 |
if adapter_meta and base_meta:
|
105 |
adapter_size = sum(adapter_meta.parameter_count.values())
|
@@ -107,16 +119,20 @@ class ModelValidator:
|
|
107 |
model_size = adapter_size + base_size
|
108 |
else:
|
109 |
# For regular models, just get the model size
|
110 |
-
meta = await self.get_safetensors_metadata(
|
|
|
|
|
111 |
if meta:
|
112 |
-
model_size = sum(meta.parameter_count.values())
|
113 |
|
114 |
if model_size is None:
|
115 |
# If model size could not be determined, return an error
|
116 |
return None, "Model size could not be determined"
|
117 |
|
118 |
# Adjust size for GPTQ models
|
119 |
-
size_factor =
|
|
|
|
|
120 |
model_size = model_size / 1e9 # Convert to billions, assuming float16
|
121 |
model_size = round(size_factor * model_size, 3)
|
122 |
|
@@ -127,52 +143,49 @@ class ModelValidator:
|
|
127 |
logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
|
128 |
return None, str(e)
|
129 |
|
130 |
-
|
131 |
async def check_chat_template(
|
132 |
-
self,
|
133 |
-
model_id: str,
|
134 |
-
revision: str
|
135 |
) -> Tuple[bool, Optional[str]]:
|
136 |
"""Check if model has a valid chat template"""
|
137 |
try:
|
138 |
logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
|
139 |
-
|
140 |
try:
|
141 |
config_file = await asyncio.to_thread(
|
142 |
hf_hub_download,
|
143 |
repo_id=model_id,
|
144 |
filename="tokenizer_config.json",
|
145 |
revision=revision,
|
146 |
-
repo_type="model"
|
147 |
)
|
148 |
-
|
149 |
-
with open(config_file,
|
150 |
tokenizer_config = json.load(f)
|
151 |
-
|
152 |
-
if
|
153 |
error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
|
154 |
logger.error(LogFormatter.error(error_msg))
|
155 |
return False, error_msg
|
156 |
-
|
157 |
logger.info(LogFormatter.success("Valid chat template found"))
|
158 |
return True, None
|
159 |
-
|
160 |
except Exception as e:
|
161 |
error_msg = f"Error checking chat_template: {str(e)}"
|
162 |
logger.error(LogFormatter.error(error_msg))
|
163 |
return False, error_msg
|
164 |
-
|
165 |
except Exception as e:
|
166 |
error_msg = "Failed to check chat template"
|
167 |
logger.error(LogFormatter.error(error_msg, e))
|
168 |
return False, str(e)
|
169 |
-
|
170 |
async def is_model_on_hub(
|
171 |
self,
|
172 |
model_name: str,
|
173 |
revision: str,
|
174 |
test_tokenizer: bool = False,
|
175 |
-
trust_remote_code: bool = False
|
176 |
) -> Tuple[bool, Optional[str], Optional[Any]]:
|
177 |
"""Check if model exists and is properly configured on the Hub"""
|
178 |
try:
|
@@ -182,9 +195,9 @@ class ModelValidator:
|
|
182 |
revision=revision,
|
183 |
trust_remote_code=trust_remote_code,
|
184 |
token=self.token,
|
185 |
-
force_download=True
|
186 |
)
|
187 |
-
|
188 |
if test_tokenizer:
|
189 |
try:
|
190 |
await asyncio.to_thread(
|
@@ -192,56 +205,80 @@ class ModelValidator:
|
|
192 |
model_name,
|
193 |
revision=revision,
|
194 |
trust_remote_code=trust_remote_code,
|
195 |
-
token=self.token
|
196 |
)
|
197 |
except ValueError as e:
|
198 |
-
return
|
|
|
|
|
|
|
|
|
199 |
except Exception:
|
200 |
-
return
|
201 |
-
|
|
|
|
|
|
|
|
|
202 |
return True, None, config
|
203 |
-
|
204 |
except ValueError:
|
205 |
-
return
|
|
|
|
|
|
|
|
|
206 |
except Exception as e:
|
207 |
if "You are trying to access a gated repo." in str(e):
|
208 |
-
return
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
async def check_official_provider_status(
|
212 |
-
self,
|
213 |
-
model_id: str,
|
214 |
-
existing_models: Dict[str, list]
|
215 |
) -> Tuple[bool, Optional[str]]:
|
216 |
"""
|
217 |
Check if model is from official provider and has finished submission.
|
218 |
-
|
219 |
Args:
|
220 |
model_id: The model identifier (org/model-name)
|
221 |
existing_models: Dictionary of models by status from get_models()
|
222 |
-
|
223 |
Returns:
|
224 |
Tuple[bool, Optional[str]]: (is_valid, error_message)
|
225 |
"""
|
226 |
try:
|
227 |
-
logger.info(
|
228 |
-
|
|
|
|
|
229 |
# Get model organization
|
230 |
-
model_org = model_id.split(
|
231 |
-
|
232 |
if not model_org:
|
233 |
return True, None
|
234 |
-
|
235 |
# Load official providers dataset
|
236 |
dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
|
237 |
official_providers = dataset["train"][0]["CURATED_SET"]
|
238 |
-
|
239 |
# Check if model org is in official providers
|
240 |
is_official = model_org in official_providers
|
241 |
-
|
242 |
if is_official:
|
243 |
-
logger.info(
|
244 |
-
|
|
|
|
|
|
|
|
|
245 |
# Check for finished submissions
|
246 |
if "finished" in existing_models:
|
247 |
for model in existing_models["finished"]:
|
@@ -251,15 +288,25 @@ class ModelValidator:
|
|
251 |
f"with a completed evaluation. "
|
252 |
f"To re-evaluate, please open a discussion."
|
253 |
)
|
254 |
-
logger.error(
|
|
|
|
|
255 |
return False, error_msg
|
256 |
-
|
257 |
-
logger.info(
|
|
|
|
|
|
|
|
|
258 |
else:
|
259 |
-
logger.info(
|
260 |
-
|
|
|
|
|
|
|
|
|
261 |
return True, None
|
262 |
-
|
263 |
except Exception as e:
|
264 |
error_msg = f"Failed to check official provider status: {str(e)}"
|
265 |
logger.error(LogFormatter.error(error_msg))
|
|
|
12 |
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
+
|
16 |
class ModelValidator:
|
17 |
def __init__(self):
|
18 |
self.token = HF_TOKEN
|
19 |
self.api = HfApi(token=self.token)
|
20 |
self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
21 |
+
|
22 |
+
async def check_model_card(
|
23 |
+
self, model_id: str
|
24 |
+
) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
|
25 |
"""Check if model has a valid model card"""
|
26 |
try:
|
27 |
logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
|
28 |
+
|
29 |
# Get model card content using ModelCard.load
|
30 |
try:
|
31 |
+
model_card = await asyncio.to_thread(ModelCard.load, model_id)
|
|
|
|
|
|
|
32 |
logger.info(LogFormatter.success("Model card found"))
|
33 |
except Exception as e:
|
34 |
error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
|
35 |
logger.error(LogFormatter.error(error_msg, e))
|
36 |
return False, error_msg, None
|
37 |
+
|
38 |
# Check license in model card data
|
39 |
+
if model_card.data.license is None and not (
|
40 |
+
"license_name" in model_card.data and "license_link" in model_card.data
|
41 |
+
):
|
42 |
error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
|
43 |
logger.warning(LogFormatter.warning(error_msg))
|
44 |
return False, error_msg, None
|
45 |
|
46 |
# Enforce card content length
|
47 |
if len(model_card.text) < 200:
|
48 |
+
error_msg = (
|
49 |
+
"Please add a description to your model card, it is too short."
|
50 |
+
)
|
51 |
logger.warning(LogFormatter.warning(error_msg))
|
52 |
return False, error_msg, None
|
53 |
+
|
54 |
logger.info(LogFormatter.success("Model card validation passed"))
|
55 |
return True, "", model_card
|
56 |
+
|
57 |
except Exception as e:
|
58 |
error_msg = "Failed to validate model card"
|
59 |
logger.error(LogFormatter.error(error_msg, e))
|
60 |
return False, str(e), None
|
61 |
+
|
62 |
+
async def get_safetensors_metadata(
|
63 |
+
self, model_id: str, is_adapter: bool = False, revision: str = "main"
|
64 |
+
) -> Optional[Dict]:
|
65 |
"""Get metadata from a safetensors file"""
|
66 |
try:
|
67 |
if is_adapter:
|
|
|
86 |
return None
|
87 |
|
88 |
async def get_model_size(
|
89 |
+
self, model_info: Any, precision: str, base_model: str, revision: str
|
|
|
|
|
|
|
|
|
90 |
) -> Tuple[Optional[float], Optional[str]]:
|
91 |
"""Get model size in billions of parameters"""
|
92 |
try:
|
93 |
+
logger.info(
|
94 |
+
LogFormatter.info(f"Checking model size for {model_info.modelId}")
|
95 |
+
)
|
96 |
|
97 |
# Check if model is adapter
|
98 |
+
is_adapter = any(
|
99 |
+
s.rfilename == "adapter_config.json"
|
100 |
+
for s in model_info.siblings
|
101 |
+
if hasattr(s, "rfilename")
|
102 |
+
)
|
103 |
|
104 |
# Try to get size from safetensors first
|
105 |
model_size = None
|
106 |
|
107 |
if is_adapter and base_model:
|
108 |
# For adapters, we need both adapter and base model sizes
|
109 |
+
adapter_meta = await self.get_safetensors_metadata(
|
110 |
+
model_info.id, is_adapter=True, revision=revision
|
111 |
+
)
|
112 |
+
base_meta = await self.get_safetensors_metadata(
|
113 |
+
base_model, revision="main"
|
114 |
+
)
|
115 |
|
116 |
if adapter_meta and base_meta:
|
117 |
adapter_size = sum(adapter_meta.parameter_count.values())
|
|
|
119 |
model_size = adapter_size + base_size
|
120 |
else:
|
121 |
# For regular models, just get the model size
|
122 |
+
meta = await self.get_safetensors_metadata(
|
123 |
+
model_info.id, revision=revision
|
124 |
+
)
|
125 |
if meta:
|
126 |
+
model_size = sum(meta.parameter_count.values()) # total params
|
127 |
|
128 |
if model_size is None:
|
129 |
# If model size could not be determined, return an error
|
130 |
return None, "Model size could not be determined"
|
131 |
|
132 |
# Adjust size for GPTQ models
|
133 |
+
size_factor = (
|
134 |
+
8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
135 |
+
)
|
136 |
model_size = model_size / 1e9 # Convert to billions, assuming float16
|
137 |
model_size = round(size_factor * model_size, 3)
|
138 |
|
|
|
143 |
logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
|
144 |
return None, str(e)
|
145 |
|
|
|
146 |
async def check_chat_template(
|
147 |
+
self, model_id: str, revision: str
|
|
|
|
|
148 |
) -> Tuple[bool, Optional[str]]:
|
149 |
"""Check if model has a valid chat template"""
|
150 |
try:
|
151 |
logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
|
152 |
+
|
153 |
try:
|
154 |
config_file = await asyncio.to_thread(
|
155 |
hf_hub_download,
|
156 |
repo_id=model_id,
|
157 |
filename="tokenizer_config.json",
|
158 |
revision=revision,
|
159 |
+
repo_type="model",
|
160 |
)
|
161 |
+
|
162 |
+
with open(config_file, "r") as f:
|
163 |
tokenizer_config = json.load(f)
|
164 |
+
|
165 |
+
if "chat_template" not in tokenizer_config:
|
166 |
error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
|
167 |
logger.error(LogFormatter.error(error_msg))
|
168 |
return False, error_msg
|
169 |
+
|
170 |
logger.info(LogFormatter.success("Valid chat template found"))
|
171 |
return True, None
|
172 |
+
|
173 |
except Exception as e:
|
174 |
error_msg = f"Error checking chat_template: {str(e)}"
|
175 |
logger.error(LogFormatter.error(error_msg))
|
176 |
return False, error_msg
|
177 |
+
|
178 |
except Exception as e:
|
179 |
error_msg = "Failed to check chat template"
|
180 |
logger.error(LogFormatter.error(error_msg, e))
|
181 |
return False, str(e)
|
182 |
+
|
183 |
async def is_model_on_hub(
|
184 |
self,
|
185 |
model_name: str,
|
186 |
revision: str,
|
187 |
test_tokenizer: bool = False,
|
188 |
+
trust_remote_code: bool = False,
|
189 |
) -> Tuple[bool, Optional[str], Optional[Any]]:
|
190 |
"""Check if model exists and is properly configured on the Hub"""
|
191 |
try:
|
|
|
195 |
revision=revision,
|
196 |
trust_remote_code=trust_remote_code,
|
197 |
token=self.token,
|
198 |
+
force_download=True,
|
199 |
)
|
200 |
+
|
201 |
if test_tokenizer:
|
202 |
try:
|
203 |
await asyncio.to_thread(
|
|
|
205 |
model_name,
|
206 |
revision=revision,
|
207 |
trust_remote_code=trust_remote_code,
|
208 |
+
token=self.token,
|
209 |
)
|
210 |
except ValueError as e:
|
211 |
+
return (
|
212 |
+
False,
|
213 |
+
f"The tokenizer is not available in an official Transformers release: {e}",
|
214 |
+
None,
|
215 |
+
)
|
216 |
except Exception:
|
217 |
+
return (
|
218 |
+
False,
|
219 |
+
"The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.",
|
220 |
+
None,
|
221 |
+
)
|
222 |
+
|
223 |
return True, None, config
|
224 |
+
|
225 |
except ValueError:
|
226 |
+
return (
|
227 |
+
False,
|
228 |
+
"The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.",
|
229 |
+
None,
|
230 |
+
)
|
231 |
except Exception as e:
|
232 |
if "You are trying to access a gated repo." in str(e):
|
233 |
+
return (
|
234 |
+
True,
|
235 |
+
"The model is gated and requires special access permissions.",
|
236 |
+
None,
|
237 |
+
)
|
238 |
+
return (
|
239 |
+
False,
|
240 |
+
f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}",
|
241 |
+
None,
|
242 |
+
)
|
243 |
|
244 |
async def check_official_provider_status(
|
245 |
+
self, model_id: str, existing_models: Dict[str, list]
|
|
|
|
|
246 |
) -> Tuple[bool, Optional[str]]:
|
247 |
"""
|
248 |
Check if model is from official provider and has finished submission.
|
249 |
+
|
250 |
Args:
|
251 |
model_id: The model identifier (org/model-name)
|
252 |
existing_models: Dictionary of models by status from get_models()
|
253 |
+
|
254 |
Returns:
|
255 |
Tuple[bool, Optional[str]]: (is_valid, error_message)
|
256 |
"""
|
257 |
try:
|
258 |
+
logger.info(
|
259 |
+
LogFormatter.info(f"Checking official provider status for {model_id}")
|
260 |
+
)
|
261 |
+
|
262 |
# Get model organization
|
263 |
+
model_org = model_id.split("/")[0] if "/" in model_id else None
|
264 |
+
|
265 |
if not model_org:
|
266 |
return True, None
|
267 |
+
|
268 |
# Load official providers dataset
|
269 |
dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
|
270 |
official_providers = dataset["train"][0]["CURATED_SET"]
|
271 |
+
|
272 |
# Check if model org is in official providers
|
273 |
is_official = model_org in official_providers
|
274 |
+
|
275 |
if is_official:
|
276 |
+
logger.info(
|
277 |
+
LogFormatter.info(
|
278 |
+
f"Model organization '{model_org}' is an official provider"
|
279 |
+
)
|
280 |
+
)
|
281 |
+
|
282 |
# Check for finished submissions
|
283 |
if "finished" in existing_models:
|
284 |
for model in existing_models["finished"]:
|
|
|
288 |
f"with a completed evaluation. "
|
289 |
f"To re-evaluate, please open a discussion."
|
290 |
)
|
291 |
+
logger.error(
|
292 |
+
LogFormatter.error("Validation failed", error_msg)
|
293 |
+
)
|
294 |
return False, error_msg
|
295 |
+
|
296 |
+
logger.info(
|
297 |
+
LogFormatter.success(
|
298 |
+
"No finished submission found for this official provider model"
|
299 |
+
)
|
300 |
+
)
|
301 |
else:
|
302 |
+
logger.info(
|
303 |
+
LogFormatter.info(
|
304 |
+
f"Model organization '{model_org}' is not an official provider"
|
305 |
+
)
|
306 |
+
)
|
307 |
+
|
308 |
return True, None
|
309 |
+
|
310 |
except Exception as e:
|
311 |
error_msg = f"Failed to check official provider status: {str(e)}"
|
312 |
logger.error(LogFormatter.error(error_msg))
|
backend/utils/analyze_prod_datasets.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
-
from typing import Dict, Any
|
7 |
from huggingface_hub import HfApi
|
8 |
from dotenv import load_dotenv
|
9 |
from app.config.hf_config import HF_ORGANIZATION
|
@@ -16,10 +16,7 @@ ROOT_DIR = BACKEND_DIR.parent
|
|
16 |
load_dotenv(ROOT_DIR / ".env")
|
17 |
|
18 |
# Configure logging
|
19 |
-
logging.basicConfig(
|
20 |
-
level=logging.INFO,
|
21 |
-
format='%(message)s'
|
22 |
-
)
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
25 |
# Initialize Hugging Face API
|
@@ -28,53 +25,50 @@ if not HF_TOKEN:
|
|
28 |
raise ValueError("HF_TOKEN not found in environment variables")
|
29 |
api = HfApi(token=HF_TOKEN)
|
30 |
|
|
|
31 |
def analyze_dataset(repo_id: str) -> Dict[str, Any]:
|
32 |
"""Analyze a dataset and return statistics"""
|
33 |
try:
|
34 |
# Get dataset info
|
35 |
dataset_info = api.dataset_info(repo_id=repo_id)
|
36 |
-
|
37 |
# Get file list
|
38 |
files = api.list_repo_files(repo_id, repo_type="dataset")
|
39 |
-
|
40 |
# Get last commit info
|
41 |
commits = api.list_repo_commits(repo_id, repo_type="dataset")
|
42 |
last_commit = next(commits, None)
|
43 |
-
|
44 |
# Count lines in jsonl files
|
45 |
total_entries = 0
|
46 |
for file in files:
|
47 |
-
if file.endswith(
|
48 |
try:
|
49 |
# Download file content
|
50 |
content = api.hf_hub_download(
|
51 |
-
repo_id=repo_id,
|
52 |
-
filename=file,
|
53 |
-
repo_type="dataset"
|
54 |
)
|
55 |
-
|
56 |
# Count lines
|
57 |
-
with open(content,
|
58 |
for _ in f:
|
59 |
total_entries += 1
|
60 |
-
|
61 |
except Exception as e:
|
62 |
logger.error(f"Error processing file {file}: {str(e)}")
|
63 |
continue
|
64 |
-
|
65 |
# Special handling for requests dataset
|
66 |
if repo_id == f"{HF_ORGANIZATION}/requests":
|
67 |
pending_count = 0
|
68 |
completed_count = 0
|
69 |
-
|
70 |
try:
|
71 |
content = api.hf_hub_download(
|
72 |
-
repo_id=repo_id,
|
73 |
-
filename="eval_requests.jsonl",
|
74 |
-
repo_type="dataset"
|
75 |
)
|
76 |
-
|
77 |
-
with open(content,
|
78 |
for line in f:
|
79 |
try:
|
80 |
entry = json.loads(line)
|
@@ -84,10 +78,10 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
|
|
84 |
completed_count += 1
|
85 |
except json.JSONDecodeError:
|
86 |
continue
|
87 |
-
|
88 |
except Exception as e:
|
89 |
logger.error(f"Error analyzing requests: {str(e)}")
|
90 |
-
|
91 |
# Build response
|
92 |
response = {
|
93 |
"id": repo_id,
|
@@ -97,22 +91,22 @@ def analyze_dataset(repo_id: str) -> Dict[str, Any]:
|
|
97 |
"size_bytes": dataset_info.size_in_bytes,
|
98 |
"downloads": dataset_info.downloads,
|
99 |
}
|
100 |
-
|
101 |
# Add request-specific info if applicable
|
102 |
if repo_id == f"{HF_ORGANIZATION}/requests":
|
103 |
-
response.update(
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
108 |
return response
|
109 |
-
|
110 |
except Exception as e:
|
111 |
logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
|
112 |
-
return {
|
113 |
-
|
114 |
-
"error": str(e)
|
115 |
-
}
|
116 |
|
117 |
def main():
|
118 |
"""Main function to analyze all datasets"""
|
@@ -121,50 +115,49 @@ def main():
|
|
121 |
datasets = [
|
122 |
{
|
123 |
"id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
124 |
-
"description": "Aggregated results"
|
125 |
-
},
|
126 |
-
{
|
127 |
-
"id": f"{HF_ORGANIZATION}/requests",
|
128 |
-
"description": "Evaluation requests"
|
129 |
},
|
|
|
|
|
130 |
{
|
131 |
-
"id":
|
132 |
-
"description": "
|
133 |
},
|
134 |
-
{
|
135 |
-
"id": f"open-llm-leaderboard/official-providers",
|
136 |
-
"description": "Highlighted models"
|
137 |
-
}
|
138 |
]
|
139 |
-
|
140 |
# Analyze each dataset
|
141 |
results = []
|
142 |
for dataset in datasets:
|
143 |
logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
|
144 |
-
result = analyze_dataset(dataset[
|
145 |
results.append(result)
|
146 |
-
|
147 |
-
if
|
148 |
logger.error(f"❌ Error: {result['error']}")
|
149 |
else:
|
150 |
logger.info(f"✓ {result['total_entries']} entries")
|
151 |
logger.info(f"✓ {result['file_count']} files")
|
152 |
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
153 |
logger.info(f"✓ {result['downloads']} downloads")
|
154 |
-
|
155 |
-
if
|
156 |
logger.info(f"✓ {result['pending_requests']} pending requests")
|
157 |
logger.info(f"✓ {result['completed_requests']} completed requests")
|
158 |
-
|
159 |
-
if result[
|
160 |
-
last_modified = datetime.fromisoformat(
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
163 |
return results
|
164 |
-
|
165 |
except Exception as e:
|
166 |
logger.error(f"Global error: {str(e)}")
|
167 |
return []
|
168 |
|
|
|
169 |
if __name__ == "__main__":
|
170 |
-
main()
|
|
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
+
from typing import Dict, Any
|
7 |
from huggingface_hub import HfApi
|
8 |
from dotenv import load_dotenv
|
9 |
from app.config.hf_config import HF_ORGANIZATION
|
|
|
16 |
load_dotenv(ROOT_DIR / ".env")
|
17 |
|
18 |
# Configure logging
|
19 |
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
|
|
|
|
|
20 |
logger = logging.getLogger(__name__)
|
21 |
|
22 |
# Initialize Hugging Face API
|
|
|
25 |
raise ValueError("HF_TOKEN not found in environment variables")
|
26 |
api = HfApi(token=HF_TOKEN)
|
27 |
|
28 |
+
|
29 |
def analyze_dataset(repo_id: str) -> Dict[str, Any]:
|
30 |
"""Analyze a dataset and return statistics"""
|
31 |
try:
|
32 |
# Get dataset info
|
33 |
dataset_info = api.dataset_info(repo_id=repo_id)
|
34 |
+
|
35 |
# Get file list
|
36 |
files = api.list_repo_files(repo_id, repo_type="dataset")
|
37 |
+
|
38 |
# Get last commit info
|
39 |
commits = api.list_repo_commits(repo_id, repo_type="dataset")
|
40 |
last_commit = next(commits, None)
|
41 |
+
|
42 |
# Count lines in jsonl files
|
43 |
total_entries = 0
|
44 |
for file in files:
|
45 |
+
if file.endswith(".jsonl"):
|
46 |
try:
|
47 |
# Download file content
|
48 |
content = api.hf_hub_download(
|
49 |
+
repo_id=repo_id, filename=file, repo_type="dataset"
|
|
|
|
|
50 |
)
|
51 |
+
|
52 |
# Count lines
|
53 |
+
with open(content, "r") as f:
|
54 |
for _ in f:
|
55 |
total_entries += 1
|
56 |
+
|
57 |
except Exception as e:
|
58 |
logger.error(f"Error processing file {file}: {str(e)}")
|
59 |
continue
|
60 |
+
|
61 |
# Special handling for requests dataset
|
62 |
if repo_id == f"{HF_ORGANIZATION}/requests":
|
63 |
pending_count = 0
|
64 |
completed_count = 0
|
65 |
+
|
66 |
try:
|
67 |
content = api.hf_hub_download(
|
68 |
+
repo_id=repo_id, filename="eval_requests.jsonl", repo_type="dataset"
|
|
|
|
|
69 |
)
|
70 |
+
|
71 |
+
with open(content, "r") as f:
|
72 |
for line in f:
|
73 |
try:
|
74 |
entry = json.loads(line)
|
|
|
78 |
completed_count += 1
|
79 |
except json.JSONDecodeError:
|
80 |
continue
|
81 |
+
|
82 |
except Exception as e:
|
83 |
logger.error(f"Error analyzing requests: {str(e)}")
|
84 |
+
|
85 |
# Build response
|
86 |
response = {
|
87 |
"id": repo_id,
|
|
|
91 |
"size_bytes": dataset_info.size_in_bytes,
|
92 |
"downloads": dataset_info.downloads,
|
93 |
}
|
94 |
+
|
95 |
# Add request-specific info if applicable
|
96 |
if repo_id == f"{HF_ORGANIZATION}/requests":
|
97 |
+
response.update(
|
98 |
+
{
|
99 |
+
"pending_requests": pending_count,
|
100 |
+
"completed_requests": completed_count,
|
101 |
+
}
|
102 |
+
)
|
103 |
+
|
104 |
return response
|
105 |
+
|
106 |
except Exception as e:
|
107 |
logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
|
108 |
+
return {"id": repo_id, "error": str(e)}
|
109 |
+
|
|
|
|
|
110 |
|
111 |
def main():
|
112 |
"""Main function to analyze all datasets"""
|
|
|
115 |
datasets = [
|
116 |
{
|
117 |
"id": f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
118 |
+
"description": "Aggregated results",
|
|
|
|
|
|
|
|
|
119 |
},
|
120 |
+
{"id": f"{HF_ORGANIZATION}/requests", "description": "Evaluation requests"},
|
121 |
+
{"id": f"{HF_ORGANIZATION}/votes", "description": "User votes"},
|
122 |
{
|
123 |
+
"id": "open-llm-leaderboard/official-providers",
|
124 |
+
"description": "Highlighted models",
|
125 |
},
|
|
|
|
|
|
|
|
|
126 |
]
|
127 |
+
|
128 |
# Analyze each dataset
|
129 |
results = []
|
130 |
for dataset in datasets:
|
131 |
logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
|
132 |
+
result = analyze_dataset(dataset["id"])
|
133 |
results.append(result)
|
134 |
+
|
135 |
+
if "error" in result:
|
136 |
logger.error(f"❌ Error: {result['error']}")
|
137 |
else:
|
138 |
logger.info(f"✓ {result['total_entries']} entries")
|
139 |
logger.info(f"✓ {result['file_count']} files")
|
140 |
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
141 |
logger.info(f"✓ {result['downloads']} downloads")
|
142 |
+
|
143 |
+
if "pending_requests" in result:
|
144 |
logger.info(f"✓ {result['pending_requests']} pending requests")
|
145 |
logger.info(f"✓ {result['completed_requests']} completed requests")
|
146 |
+
|
147 |
+
if result["last_modified"]:
|
148 |
+
last_modified = datetime.fromisoformat(
|
149 |
+
result["last_modified"].replace("Z", "+00:00")
|
150 |
+
)
|
151 |
+
logger.info(
|
152 |
+
f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
|
153 |
+
)
|
154 |
+
|
155 |
return results
|
156 |
+
|
157 |
except Exception as e:
|
158 |
logger.error(f"Global error: {str(e)}")
|
159 |
return []
|
160 |
|
161 |
+
|
162 |
if __name__ == "__main__":
|
163 |
+
main()
|
backend/utils/analyze_prod_models.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import json
|
3 |
import logging
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
|
|
15 |
load_dotenv(ROOT_DIR / ".env")
|
16 |
|
17 |
# Configure logging
|
18 |
-
logging.basicConfig(
|
19 |
-
level=logging.INFO,
|
20 |
-
format='%(message)s'
|
21 |
-
)
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
# Initialize Hugging Face API
|
@@ -27,80 +23,92 @@ if not HF_TOKEN:
|
|
27 |
raise ValueError("HF_TOKEN not found in environment variables")
|
28 |
api = HfApi(token=HF_TOKEN)
|
29 |
|
|
|
30 |
def count_evaluated_models():
|
31 |
"""Count the number of evaluated models"""
|
32 |
try:
|
33 |
# Get dataset info
|
34 |
-
dataset_info = api.dataset_info(
|
35 |
-
|
|
|
|
|
|
|
36 |
# Get file list
|
37 |
-
files = api.list_repo_files(
|
38 |
-
|
|
|
|
|
39 |
# Get last commit info
|
40 |
-
commits = api.list_repo_commits(
|
|
|
|
|
41 |
last_commit = next(commits, None)
|
42 |
-
|
43 |
# Count lines in jsonl files
|
44 |
total_entries = 0
|
45 |
for file in files:
|
46 |
-
if file.endswith(
|
47 |
try:
|
48 |
# Download file content
|
49 |
content = api.hf_hub_download(
|
50 |
repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
51 |
filename=file,
|
52 |
-
repo_type="dataset"
|
53 |
)
|
54 |
-
|
55 |
# Count lines
|
56 |
-
with open(content,
|
57 |
for _ in f:
|
58 |
total_entries += 1
|
59 |
-
|
60 |
except Exception as e:
|
61 |
logger.error(f"Error processing file {file}: {str(e)}")
|
62 |
continue
|
63 |
-
|
64 |
# Build response
|
65 |
response = {
|
66 |
"total_models": total_entries,
|
67 |
"last_modified": last_commit.created_at if last_commit else None,
|
68 |
"file_count": len(files),
|
69 |
"size_bytes": dataset_info.size_in_bytes,
|
70 |
-
"downloads": dataset_info.downloads
|
71 |
}
|
72 |
-
|
73 |
return response
|
74 |
-
|
75 |
except Exception as e:
|
76 |
logger.error(f"Error counting evaluated models: {str(e)}")
|
77 |
-
return {
|
78 |
-
|
79 |
-
}
|
80 |
|
81 |
def main():
|
82 |
"""Main function to count evaluated models"""
|
83 |
try:
|
84 |
logger.info("\nAnalyzing evaluated models...")
|
85 |
result = count_evaluated_models()
|
86 |
-
|
87 |
-
if
|
88 |
logger.error(f"❌ Error: {result['error']}")
|
89 |
else:
|
90 |
logger.info(f"✓ {result['total_models']} models evaluated")
|
91 |
logger.info(f"✓ {result['file_count']} files")
|
92 |
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
93 |
logger.info(f"✓ {result['downloads']} downloads")
|
94 |
-
|
95 |
-
if result[
|
96 |
-
last_modified = datetime.fromisoformat(
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
return result
|
100 |
-
|
101 |
except Exception as e:
|
102 |
logger.error(f"Global error: {str(e)}")
|
103 |
return {"error": str(e)}
|
104 |
|
|
|
105 |
if __name__ == "__main__":
|
106 |
-
main()
|
|
|
1 |
import os
|
|
|
2 |
import logging
|
3 |
from datetime import datetime
|
4 |
from pathlib import Path
|
|
|
14 |
load_dotenv(ROOT_DIR / ".env")
|
15 |
|
16 |
# Configure logging
|
17 |
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
|
|
|
|
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
20 |
# Initialize Hugging Face API
|
|
|
23 |
raise ValueError("HF_TOKEN not found in environment variables")
|
24 |
api = HfApi(token=HF_TOKEN)
|
25 |
|
26 |
+
|
27 |
def count_evaluated_models():
|
28 |
"""Count the number of evaluated models"""
|
29 |
try:
|
30 |
# Get dataset info
|
31 |
+
dataset_info = api.dataset_info(
|
32 |
+
repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
33 |
+
repo_type="dataset",
|
34 |
+
)
|
35 |
+
|
36 |
# Get file list
|
37 |
+
files = api.list_repo_files(
|
38 |
+
f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
|
39 |
+
)
|
40 |
+
|
41 |
# Get last commit info
|
42 |
+
commits = api.list_repo_commits(
|
43 |
+
f"{HF_ORGANIZATION}/llm-security-leaderboard-contents", repo_type="dataset"
|
44 |
+
)
|
45 |
last_commit = next(commits, None)
|
46 |
+
|
47 |
# Count lines in jsonl files
|
48 |
total_entries = 0
|
49 |
for file in files:
|
50 |
+
if file.endswith(".jsonl"):
|
51 |
try:
|
52 |
# Download file content
|
53 |
content = api.hf_hub_download(
|
54 |
repo_id=f"{HF_ORGANIZATION}/llm-security-leaderboard-contents",
|
55 |
filename=file,
|
56 |
+
repo_type="dataset",
|
57 |
)
|
58 |
+
|
59 |
# Count lines
|
60 |
+
with open(content, "r") as f:
|
61 |
for _ in f:
|
62 |
total_entries += 1
|
63 |
+
|
64 |
except Exception as e:
|
65 |
logger.error(f"Error processing file {file}: {str(e)}")
|
66 |
continue
|
67 |
+
|
68 |
# Build response
|
69 |
response = {
|
70 |
"total_models": total_entries,
|
71 |
"last_modified": last_commit.created_at if last_commit else None,
|
72 |
"file_count": len(files),
|
73 |
"size_bytes": dataset_info.size_in_bytes,
|
74 |
+
"downloads": dataset_info.downloads,
|
75 |
}
|
76 |
+
|
77 |
return response
|
78 |
+
|
79 |
except Exception as e:
|
80 |
logger.error(f"Error counting evaluated models: {str(e)}")
|
81 |
+
return {"error": str(e)}
|
82 |
+
|
|
|
83 |
|
84 |
def main():
|
85 |
"""Main function to count evaluated models"""
|
86 |
try:
|
87 |
logger.info("\nAnalyzing evaluated models...")
|
88 |
result = count_evaluated_models()
|
89 |
+
|
90 |
+
if "error" in result:
|
91 |
logger.error(f"❌ Error: {result['error']}")
|
92 |
else:
|
93 |
logger.info(f"✓ {result['total_models']} models evaluated")
|
94 |
logger.info(f"✓ {result['file_count']} files")
|
95 |
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
96 |
logger.info(f"✓ {result['downloads']} downloads")
|
97 |
+
|
98 |
+
if result["last_modified"]:
|
99 |
+
last_modified = datetime.fromisoformat(
|
100 |
+
result["last_modified"].replace("Z", "+00:00")
|
101 |
+
)
|
102 |
+
logger.info(
|
103 |
+
f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}"
|
104 |
+
)
|
105 |
+
|
106 |
return result
|
107 |
+
|
108 |
except Exception as e:
|
109 |
logger.error(f"Global error: {str(e)}")
|
110 |
return {"error": str(e)}
|
111 |
|
112 |
+
|
113 |
if __name__ == "__main__":
|
114 |
+
main()
|
backend/utils/fix_wrong_model_size.py
CHANGED
@@ -1,15 +1,11 @@
|
|
1 |
-
import os
|
2 |
import json
|
3 |
-
import pytz
|
4 |
import logging
|
5 |
import asyncio
|
6 |
from datetime import datetime
|
7 |
-
from pathlib import Path
|
8 |
import huggingface_hub
|
9 |
from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
|
10 |
from dotenv import load_dotenv
|
11 |
from git import Repo
|
12 |
-
from datetime import datetime
|
13 |
from tqdm.auto import tqdm
|
14 |
from tqdm.contrib.logging import logging_redirect_tqdm
|
15 |
|
@@ -20,22 +16,22 @@ from app.utils.model_validation import ModelValidator
|
|
20 |
huggingface_hub.logging.set_verbosity_error()
|
21 |
huggingface_hub.utils.disable_progress_bars()
|
22 |
|
23 |
-
logging.basicConfig(
|
24 |
-
level=logging.ERROR,
|
25 |
-
format='%(message)s'
|
26 |
-
)
|
27 |
logger = logging.getLogger(__name__)
|
28 |
load_dotenv()
|
29 |
|
30 |
validator = ModelValidator()
|
31 |
|
|
|
32 |
def get_changed_files(repo_path, start_date, end_date):
|
33 |
repo = Repo(repo_path)
|
34 |
-
start = datetime.strptime(start_date,
|
35 |
-
end = datetime.strptime(end_date,
|
36 |
-
|
37 |
changed_files = set()
|
38 |
-
pbar = tqdm(
|
|
|
|
|
39 |
for commit in pbar:
|
40 |
commit_date = datetime.fromtimestamp(commit.committed_date)
|
41 |
pbar.set_postfix_str(f"Commit date: {commit_date}")
|
@@ -62,44 +58,52 @@ def main():
|
|
62 |
requests_path = "/requests"
|
63 |
start_date = "2024-12-09"
|
64 |
end_date = "2025-01-07"
|
65 |
-
|
66 |
changed_files = get_changed_files(requests_path, start_date, end_date)
|
67 |
|
68 |
for file in tqdm(changed_files):
|
69 |
try:
|
70 |
request_data = read_json(requests_path, file)
|
71 |
-
except FileNotFoundError
|
72 |
tqdm.write(f"File {file} not found")
|
73 |
continue
|
74 |
-
|
75 |
try:
|
76 |
model_info = API.model_info(
|
77 |
repo_id=request_data["model"],
|
78 |
revision=request_data["revision"],
|
79 |
-
token=HF_TOKEN
|
80 |
)
|
81 |
-
except (RepositoryNotFoundError, RevisionNotFoundError)
|
82 |
-
tqdm.write(f"Model info for {request_data[
|
83 |
continue
|
84 |
-
|
85 |
with logging_redirect_tqdm():
|
86 |
-
new_model_size, error = asyncio.run(
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
92 |
|
93 |
if error:
|
94 |
-
tqdm.write(
|
|
|
|
|
95 |
continue
|
96 |
-
|
97 |
old_model_size = request_data["params"]
|
98 |
if old_model_size != new_model_size:
|
99 |
if new_model_size > 100:
|
100 |
-
tqdm.write(
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
103 |
tqdm.write(f"Updating request file {file}")
|
104 |
|
105 |
request_data["params"] = new_model_size
|
|
|
|
|
1 |
import json
|
|
|
2 |
import logging
|
3 |
import asyncio
|
4 |
from datetime import datetime
|
|
|
5 |
import huggingface_hub
|
6 |
from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
|
7 |
from dotenv import load_dotenv
|
8 |
from git import Repo
|
|
|
9 |
from tqdm.auto import tqdm
|
10 |
from tqdm.contrib.logging import logging_redirect_tqdm
|
11 |
|
|
|
16 |
huggingface_hub.logging.set_verbosity_error()
|
17 |
huggingface_hub.utils.disable_progress_bars()
|
18 |
|
19 |
+
logging.basicConfig(level=logging.ERROR, format="%(message)s")
|
|
|
|
|
|
|
20 |
logger = logging.getLogger(__name__)
|
21 |
load_dotenv()
|
22 |
|
23 |
validator = ModelValidator()
|
24 |
|
25 |
+
|
26 |
def get_changed_files(repo_path, start_date, end_date):
|
27 |
repo = Repo(repo_path)
|
28 |
+
start = datetime.strptime(start_date, "%Y-%m-%d")
|
29 |
+
end = datetime.strptime(end_date, "%Y-%m-%d")
|
30 |
+
|
31 |
changed_files = set()
|
32 |
+
pbar = tqdm(
|
33 |
+
repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}"
|
34 |
+
)
|
35 |
for commit in pbar:
|
36 |
commit_date = datetime.fromtimestamp(commit.committed_date)
|
37 |
pbar.set_postfix_str(f"Commit date: {commit_date}")
|
|
|
58 |
requests_path = "/requests"
|
59 |
start_date = "2024-12-09"
|
60 |
end_date = "2025-01-07"
|
61 |
+
|
62 |
changed_files = get_changed_files(requests_path, start_date, end_date)
|
63 |
|
64 |
for file in tqdm(changed_files):
|
65 |
try:
|
66 |
request_data = read_json(requests_path, file)
|
67 |
+
except FileNotFoundError:
|
68 |
tqdm.write(f"File {file} not found")
|
69 |
continue
|
70 |
+
|
71 |
try:
|
72 |
model_info = API.model_info(
|
73 |
repo_id=request_data["model"],
|
74 |
revision=request_data["revision"],
|
75 |
+
token=HF_TOKEN,
|
76 |
)
|
77 |
+
except (RepositoryNotFoundError, RevisionNotFoundError):
|
78 |
+
tqdm.write(f"Model info for {request_data['model']} not found")
|
79 |
continue
|
80 |
+
|
81 |
with logging_redirect_tqdm():
|
82 |
+
new_model_size, error = asyncio.run(
|
83 |
+
validator.get_model_size(
|
84 |
+
model_info=model_info,
|
85 |
+
precision=request_data["precision"],
|
86 |
+
base_model=request_data["base_model"],
|
87 |
+
revision=request_data["revision"],
|
88 |
+
)
|
89 |
+
)
|
90 |
|
91 |
if error:
|
92 |
+
tqdm.write(
|
93 |
+
f"Error getting model size info for {request_data['model']}, {error}"
|
94 |
+
)
|
95 |
continue
|
96 |
+
|
97 |
old_model_size = request_data["params"]
|
98 |
if old_model_size != new_model_size:
|
99 |
if new_model_size > 100:
|
100 |
+
tqdm.write(
|
101 |
+
f"Model: {request_data['model']}, size is more 100B: {new_model_size}"
|
102 |
+
)
|
103 |
+
|
104 |
+
tqdm.write(
|
105 |
+
f"Model: {request_data['model']}, old size: {request_data['params']} new size: {new_model_size}"
|
106 |
+
)
|
107 |
tqdm.write(f"Updating request file {file}")
|
108 |
|
109 |
request_data["params"] = new_model_size
|
backend/utils/last_activity.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
import logging
|
4 |
-
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
-
from typing import Dict,
|
7 |
from huggingface_hub import HfApi
|
8 |
from dotenv import load_dotenv
|
9 |
|
@@ -15,10 +14,7 @@ ROOT_DIR = BACKEND_DIR.parent
|
|
15 |
load_dotenv(ROOT_DIR / ".env")
|
16 |
|
17 |
# Configure logging
|
18 |
-
logging.basicConfig(
|
19 |
-
level=logging.INFO,
|
20 |
-
format='%(message)s'
|
21 |
-
)
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
# Initialize Hugging Face API
|
@@ -28,107 +24,117 @@ if not HF_TOKEN:
|
|
28 |
api = HfApi(token=HF_TOKEN)
|
29 |
|
30 |
# Default organization
|
31 |
-
HF_ORGANIZATION = os.getenv(
|
|
|
32 |
|
33 |
def get_last_votes(limit: int = 5) -> List[Dict]:
|
34 |
"""Get the last votes from the votes dataset"""
|
35 |
try:
|
36 |
logger.info("\nFetching last votes...")
|
37 |
-
|
38 |
# Download and read votes file
|
39 |
logger.info("Downloading votes file...")
|
40 |
votes_file = api.hf_hub_download(
|
41 |
repo_id=f"{HF_ORGANIZATION}/votes",
|
42 |
filename="votes_data.jsonl",
|
43 |
-
repo_type="dataset"
|
44 |
)
|
45 |
-
|
46 |
logger.info("Reading votes file...")
|
47 |
votes = []
|
48 |
-
with open(votes_file,
|
49 |
for line in f:
|
50 |
try:
|
51 |
vote = json.loads(line)
|
52 |
votes.append(vote)
|
53 |
except json.JSONDecodeError:
|
54 |
continue
|
55 |
-
|
56 |
# Sort by timestamp and get last n votes
|
57 |
logger.info("Sorting votes...")
|
58 |
-
votes.sort(key=lambda x: x.get(
|
59 |
last_votes = votes[:limit]
|
60 |
-
|
61 |
logger.info(f"✓ Found {len(last_votes)} recent votes")
|
62 |
return last_votes
|
63 |
-
|
64 |
except Exception as e:
|
65 |
logger.error(f"Error reading votes: {str(e)}")
|
66 |
return []
|
67 |
|
|
|
68 |
def get_last_models(limit: int = 5) -> List[Dict]:
|
69 |
"""Get the last models from the requests dataset using commit history"""
|
70 |
try:
|
71 |
logger.info("\nFetching last model submissions...")
|
72 |
-
|
73 |
# Get commit history
|
74 |
logger.info("Getting commit history...")
|
75 |
-
commits = list(
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
79 |
logger.info(f"Found {len(commits)} commits")
|
80 |
-
|
81 |
# Track processed files to avoid duplicates
|
82 |
processed_files = set()
|
83 |
models = []
|
84 |
-
|
85 |
# Process commits until we have enough models
|
86 |
for i, commit in enumerate(commits):
|
87 |
-
logger.info(
|
88 |
-
|
|
|
|
|
89 |
# Look at added/modified files in this commit
|
90 |
-
files_to_process = [
|
|
|
|
|
91 |
if files_to_process:
|
92 |
logger.info(f"Found {len(files_to_process)} JSON files in commit")
|
93 |
-
|
94 |
for file in files_to_process:
|
95 |
if file in processed_files:
|
96 |
continue
|
97 |
-
|
98 |
processed_files.add(file)
|
99 |
logger.info(f"Downloading {file}...")
|
100 |
-
|
101 |
try:
|
102 |
# Download and read the file
|
103 |
content = api.hf_hub_download(
|
104 |
repo_id=f"{HF_ORGANIZATION}/requests",
|
105 |
filename=file,
|
106 |
-
repo_type="dataset"
|
107 |
)
|
108 |
-
|
109 |
-
with open(content,
|
110 |
model_data = json.load(f)
|
111 |
models.append(model_data)
|
112 |
-
logger.info(
|
113 |
-
|
|
|
|
|
114 |
if len(models) >= limit:
|
115 |
logger.info("Reached desired number of models")
|
116 |
break
|
117 |
-
|
118 |
except Exception as e:
|
119 |
logger.error(f"Error reading file {file}: {str(e)}")
|
120 |
continue
|
121 |
-
|
122 |
if len(models) >= limit:
|
123 |
break
|
124 |
-
|
125 |
logger.info(f"✓ Found {len(models)} recent model submissions")
|
126 |
return models
|
127 |
-
|
128 |
except Exception as e:
|
129 |
logger.error(f"Error reading models: {str(e)}")
|
130 |
return []
|
131 |
|
|
|
132 |
def main():
|
133 |
"""Display last activities from the leaderboard"""
|
134 |
try:
|
@@ -142,7 +148,7 @@ def main():
|
|
142 |
logger.info(f"Timestamp: {vote.get('timestamp')}")
|
143 |
else:
|
144 |
logger.info("No votes found")
|
145 |
-
|
146 |
# Get last model submissions
|
147 |
logger.info("\n=== Last Model Submissions ===")
|
148 |
last_models = get_last_models()
|
@@ -151,14 +157,17 @@ def main():
|
|
151 |
logger.info(f"\nModel: {model.get('model')}")
|
152 |
logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
|
153 |
logger.info(f"Status: {model.get('status', 'Unknown')}")
|
154 |
-
logger.info(
|
|
|
|
|
155 |
logger.info(f"Precision: {model.get('precision', 'Unknown')}")
|
156 |
logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
|
157 |
else:
|
158 |
logger.info("No models found")
|
159 |
-
|
160 |
except Exception as e:
|
161 |
logger.error(f"Global error: {str(e)}")
|
162 |
|
|
|
163 |
if __name__ == "__main__":
|
164 |
-
main()
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import logging
|
|
|
4 |
from pathlib import Path
|
5 |
+
from typing import Dict, List
|
6 |
from huggingface_hub import HfApi
|
7 |
from dotenv import load_dotenv
|
8 |
|
|
|
14 |
load_dotenv(ROOT_DIR / ".env")
|
15 |
|
16 |
# Configure logging
|
17 |
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
|
|
|
|
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
20 |
# Initialize Hugging Face API
|
|
|
24 |
api = HfApi(token=HF_TOKEN)
|
25 |
|
26 |
# Default organization
|
27 |
+
HF_ORGANIZATION = os.getenv("HF_ORGANIZATION", "stacklok")
|
28 |
+
|
29 |
|
30 |
def get_last_votes(limit: int = 5) -> List[Dict]:
|
31 |
"""Get the last votes from the votes dataset"""
|
32 |
try:
|
33 |
logger.info("\nFetching last votes...")
|
34 |
+
|
35 |
# Download and read votes file
|
36 |
logger.info("Downloading votes file...")
|
37 |
votes_file = api.hf_hub_download(
|
38 |
repo_id=f"{HF_ORGANIZATION}/votes",
|
39 |
filename="votes_data.jsonl",
|
40 |
+
repo_type="dataset",
|
41 |
)
|
42 |
+
|
43 |
logger.info("Reading votes file...")
|
44 |
votes = []
|
45 |
+
with open(votes_file, "r") as f:
|
46 |
for line in f:
|
47 |
try:
|
48 |
vote = json.loads(line)
|
49 |
votes.append(vote)
|
50 |
except json.JSONDecodeError:
|
51 |
continue
|
52 |
+
|
53 |
# Sort by timestamp and get last n votes
|
54 |
logger.info("Sorting votes...")
|
55 |
+
votes.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
|
56 |
last_votes = votes[:limit]
|
57 |
+
|
58 |
logger.info(f"✓ Found {len(last_votes)} recent votes")
|
59 |
return last_votes
|
60 |
+
|
61 |
except Exception as e:
|
62 |
logger.error(f"Error reading votes: {str(e)}")
|
63 |
return []
|
64 |
|
65 |
+
|
66 |
def get_last_models(limit: int = 5) -> List[Dict]:
|
67 |
"""Get the last models from the requests dataset using commit history"""
|
68 |
try:
|
69 |
logger.info("\nFetching last model submissions...")
|
70 |
+
|
71 |
# Get commit history
|
72 |
logger.info("Getting commit history...")
|
73 |
+
commits = list(
|
74 |
+
api.list_repo_commits(
|
75 |
+
repo_id=f"{HF_ORGANIZATION}/requests", repo_type="dataset"
|
76 |
+
)
|
77 |
+
)
|
78 |
logger.info(f"Found {len(commits)} commits")
|
79 |
+
|
80 |
# Track processed files to avoid duplicates
|
81 |
processed_files = set()
|
82 |
models = []
|
83 |
+
|
84 |
# Process commits until we have enough models
|
85 |
for i, commit in enumerate(commits):
|
86 |
+
logger.info(
|
87 |
+
f"Processing commit {i + 1}/{len(commits)} ({commit.created_at})"
|
88 |
+
)
|
89 |
+
|
90 |
# Look at added/modified files in this commit
|
91 |
+
files_to_process = [
|
92 |
+
f for f in (commit.added + commit.modified) if f.endswith(".json")
|
93 |
+
]
|
94 |
if files_to_process:
|
95 |
logger.info(f"Found {len(files_to_process)} JSON files in commit")
|
96 |
+
|
97 |
for file in files_to_process:
|
98 |
if file in processed_files:
|
99 |
continue
|
100 |
+
|
101 |
processed_files.add(file)
|
102 |
logger.info(f"Downloading {file}...")
|
103 |
+
|
104 |
try:
|
105 |
# Download and read the file
|
106 |
content = api.hf_hub_download(
|
107 |
repo_id=f"{HF_ORGANIZATION}/requests",
|
108 |
filename=file,
|
109 |
+
repo_type="dataset",
|
110 |
)
|
111 |
+
|
112 |
+
with open(content, "r") as f:
|
113 |
model_data = json.load(f)
|
114 |
models.append(model_data)
|
115 |
+
logger.info(
|
116 |
+
f"✓ Added model {model_data.get('model', 'Unknown')}"
|
117 |
+
)
|
118 |
+
|
119 |
if len(models) >= limit:
|
120 |
logger.info("Reached desired number of models")
|
121 |
break
|
122 |
+
|
123 |
except Exception as e:
|
124 |
logger.error(f"Error reading file {file}: {str(e)}")
|
125 |
continue
|
126 |
+
|
127 |
if len(models) >= limit:
|
128 |
break
|
129 |
+
|
130 |
logger.info(f"✓ Found {len(models)} recent model submissions")
|
131 |
return models
|
132 |
+
|
133 |
except Exception as e:
|
134 |
logger.error(f"Error reading models: {str(e)}")
|
135 |
return []
|
136 |
|
137 |
+
|
138 |
def main():
|
139 |
"""Display last activities from the leaderboard"""
|
140 |
try:
|
|
|
148 |
logger.info(f"Timestamp: {vote.get('timestamp')}")
|
149 |
else:
|
150 |
logger.info("No votes found")
|
151 |
+
|
152 |
# Get last model submissions
|
153 |
logger.info("\n=== Last Model Submissions ===")
|
154 |
last_models = get_last_models()
|
|
|
157 |
logger.info(f"\nModel: {model.get('model')}")
|
158 |
logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
|
159 |
logger.info(f"Status: {model.get('status', 'Unknown')}")
|
160 |
+
logger.info(
|
161 |
+
f"Submission Time: {model.get('submitted_time', 'Unknown')}"
|
162 |
+
)
|
163 |
logger.info(f"Precision: {model.get('precision', 'Unknown')}")
|
164 |
logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
|
165 |
else:
|
166 |
logger.info("No models found")
|
167 |
+
|
168 |
except Exception as e:
|
169 |
logger.error(f"Global error: {str(e)}")
|
170 |
|
171 |
+
|
172 |
if __name__ == "__main__":
|
173 |
+
main()
|
backend/utils/sync_datasets_locally.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import os
|
2 |
-
import shutil
|
3 |
import tempfile
|
4 |
import logging
|
5 |
from pathlib import Path
|
6 |
-
from huggingface_hub import HfApi, snapshot_download,
|
7 |
from dotenv import load_dotenv
|
8 |
|
9 |
# Configure source and destination usernames
|
@@ -18,10 +17,7 @@ ROOT_DIR = BACKEND_DIR.parent
|
|
18 |
load_dotenv(ROOT_DIR / ".env")
|
19 |
|
20 |
# Configure logging
|
21 |
-
logging.basicConfig(
|
22 |
-
level=logging.INFO,
|
23 |
-
format='%(message)s'
|
24 |
-
)
|
25 |
logger = logging.getLogger(__name__)
|
26 |
|
27 |
# List of dataset names to sync
|
@@ -38,12 +34,17 @@ DATASETS = [
|
|
38 |
(name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
|
39 |
for name in DATASET_NAMES
|
40 |
] + [
|
41 |
-
(
|
42 |
-
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# Initialize Hugging Face API
|
45 |
api = HfApi()
|
46 |
|
|
|
47 |
def ensure_repo_exists(repo_id, token):
|
48 |
"""Ensure the repository exists, create it if it doesn't"""
|
49 |
try:
|
@@ -51,23 +52,19 @@ def ensure_repo_exists(repo_id, token):
|
|
51 |
logger.info(f"✓ Repository {repo_id} already exists")
|
52 |
except Exception:
|
53 |
logger.info(f"Creating repository {repo_id}...")
|
54 |
-
create_repo(
|
55 |
-
repo_id=repo_id,
|
56 |
-
repo_type="dataset",
|
57 |
-
token=token,
|
58 |
-
private=True
|
59 |
-
)
|
60 |
logger.info(f"✓ Repository {repo_id} created")
|
61 |
|
|
|
62 |
def process_dataset(dataset_info, token):
|
63 |
"""Process a single dataset"""
|
64 |
name, source_dataset, destination_dataset = dataset_info
|
65 |
try:
|
66 |
logger.info(f"\n📥 Processing dataset: {name}")
|
67 |
-
|
68 |
# Ensure destination repository exists
|
69 |
ensure_repo_exists(destination_dataset, token)
|
70 |
-
|
71 |
# Create a temporary directory for this dataset
|
72 |
with tempfile.TemporaryDirectory() as temp_dir:
|
73 |
try:
|
@@ -75,28 +72,28 @@ def process_dataset(dataset_info, token):
|
|
75 |
logger.info(f"Listing files in {source_dataset}...")
|
76 |
files = api.list_repo_files(source_dataset, repo_type="dataset")
|
77 |
logger.info(f"Detected structure: {len(files)} files")
|
78 |
-
|
79 |
# Download dataset
|
80 |
logger.info(f"Downloading from {source_dataset}...")
|
81 |
local_dir = snapshot_download(
|
82 |
repo_id=source_dataset,
|
83 |
repo_type="dataset",
|
84 |
local_dir=temp_dir,
|
85 |
-
token=token
|
86 |
)
|
87 |
-
logger.info(
|
88 |
-
|
89 |
# Upload to destination while preserving structure
|
90 |
logger.info(f"📤 Uploading to {destination_dataset}...")
|
91 |
api.upload_folder(
|
92 |
folder_path=local_dir,
|
93 |
repo_id=destination_dataset,
|
94 |
repo_type="dataset",
|
95 |
-
token=token
|
96 |
)
|
97 |
logger.info(f"✅ {name} copied successfully!")
|
98 |
return True
|
99 |
-
|
100 |
except Exception as e:
|
101 |
logger.error(f"❌ Error processing {name}: {str(e)}")
|
102 |
return False
|
@@ -105,6 +102,7 @@ def process_dataset(dataset_info, token):
|
|
105 |
logger.error(f"❌ Error for {name}: {str(e)}")
|
106 |
return False
|
107 |
|
|
|
108 |
def copy_datasets():
|
109 |
try:
|
110 |
logger.info("🔑 Checking authentication...")
|
@@ -112,21 +110,22 @@ def copy_datasets():
|
|
112 |
token = os.getenv("HF_TOKEN")
|
113 |
if not token:
|
114 |
raise ValueError("HF_TOKEN not found in .env file")
|
115 |
-
|
116 |
# Process datasets sequentially
|
117 |
results = []
|
118 |
for dataset_info in DATASETS:
|
119 |
success = process_dataset(dataset_info, token)
|
120 |
results.append((dataset_info[0], success))
|
121 |
-
|
122 |
# Print final summary
|
123 |
logger.info("\n📊 Final summary:")
|
124 |
for dataset, success in results:
|
125 |
status = "✅ Success" if success else "❌ Failure"
|
126 |
logger.info(f"{dataset}: {status}")
|
127 |
-
|
128 |
except Exception as e:
|
129 |
logger.error(f"❌ Global error: {str(e)}")
|
130 |
|
|
|
131 |
if __name__ == "__main__":
|
132 |
-
copy_datasets()
|
|
|
1 |
import os
|
|
|
2 |
import tempfile
|
3 |
import logging
|
4 |
from pathlib import Path
|
5 |
+
from huggingface_hub import HfApi, snapshot_download, create_repo
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
# Configure source and destination usernames
|
|
|
17 |
load_dotenv(ROOT_DIR / ".env")
|
18 |
|
19 |
# Configure logging
|
20 |
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
|
|
|
|
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
23 |
# List of dataset names to sync
|
|
|
34 |
(name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
|
35 |
for name in DATASET_NAMES
|
36 |
] + [
|
37 |
+
(
|
38 |
+
"official-providers",
|
39 |
+
"open-llm-leaderboard/official-providers",
|
40 |
+
f"{DESTINATION_USERNAME}/official-providers",
|
41 |
+
)
|
42 |
+
]
|
43 |
|
44 |
# Initialize Hugging Face API
|
45 |
api = HfApi()
|
46 |
|
47 |
+
|
48 |
def ensure_repo_exists(repo_id, token):
|
49 |
"""Ensure the repository exists, create it if it doesn't"""
|
50 |
try:
|
|
|
52 |
logger.info(f"✓ Repository {repo_id} already exists")
|
53 |
except Exception:
|
54 |
logger.info(f"Creating repository {repo_id}...")
|
55 |
+
create_repo(repo_id=repo_id, repo_type="dataset", token=token, private=True)
|
|
|
|
|
|
|
|
|
|
|
56 |
logger.info(f"✓ Repository {repo_id} created")
|
57 |
|
58 |
+
|
59 |
def process_dataset(dataset_info, token):
|
60 |
"""Process a single dataset"""
|
61 |
name, source_dataset, destination_dataset = dataset_info
|
62 |
try:
|
63 |
logger.info(f"\n📥 Processing dataset: {name}")
|
64 |
+
|
65 |
# Ensure destination repository exists
|
66 |
ensure_repo_exists(destination_dataset, token)
|
67 |
+
|
68 |
# Create a temporary directory for this dataset
|
69 |
with tempfile.TemporaryDirectory() as temp_dir:
|
70 |
try:
|
|
|
72 |
logger.info(f"Listing files in {source_dataset}...")
|
73 |
files = api.list_repo_files(source_dataset, repo_type="dataset")
|
74 |
logger.info(f"Detected structure: {len(files)} files")
|
75 |
+
|
76 |
# Download dataset
|
77 |
logger.info(f"Downloading from {source_dataset}...")
|
78 |
local_dir = snapshot_download(
|
79 |
repo_id=source_dataset,
|
80 |
repo_type="dataset",
|
81 |
local_dir=temp_dir,
|
82 |
+
token=token,
|
83 |
)
|
84 |
+
logger.info("✓ Download complete")
|
85 |
+
|
86 |
# Upload to destination while preserving structure
|
87 |
logger.info(f"📤 Uploading to {destination_dataset}...")
|
88 |
api.upload_folder(
|
89 |
folder_path=local_dir,
|
90 |
repo_id=destination_dataset,
|
91 |
repo_type="dataset",
|
92 |
+
token=token,
|
93 |
)
|
94 |
logger.info(f"✅ {name} copied successfully!")
|
95 |
return True
|
96 |
+
|
97 |
except Exception as e:
|
98 |
logger.error(f"❌ Error processing {name}: {str(e)}")
|
99 |
return False
|
|
|
102 |
logger.error(f"❌ Error for {name}: {str(e)}")
|
103 |
return False
|
104 |
|
105 |
+
|
106 |
def copy_datasets():
|
107 |
try:
|
108 |
logger.info("🔑 Checking authentication...")
|
|
|
110 |
token = os.getenv("HF_TOKEN")
|
111 |
if not token:
|
112 |
raise ValueError("HF_TOKEN not found in .env file")
|
113 |
+
|
114 |
# Process datasets sequentially
|
115 |
results = []
|
116 |
for dataset_info in DATASETS:
|
117 |
success = process_dataset(dataset_info, token)
|
118 |
results.append((dataset_info[0], success))
|
119 |
+
|
120 |
# Print final summary
|
121 |
logger.info("\n📊 Final summary:")
|
122 |
for dataset, success in results:
|
123 |
status = "✅ Success" if success else "❌ Failure"
|
124 |
logger.info(f"{dataset}: {status}")
|
125 |
+
|
126 |
except Exception as e:
|
127 |
logger.error(f"❌ Global error: {str(e)}")
|
128 |
|
129 |
+
|
130 |
if __name__ == "__main__":
|
131 |
+
copy_datasets()
|
docker-compose.yml
CHANGED
@@ -30,4 +30,4 @@ services:
|
|
30 |
- PORT=${FRONTEND_PORT:-7860}
|
31 |
command: npm start
|
32 |
stdin_open: true
|
33 |
-
tty: true
|
|
|
30 |
- PORT=${FRONTEND_PORT:-7860}
|
31 |
command: npm start
|
32 |
stdin_open: true
|
33 |
+
tty: true
|
frontend/Dockerfile.dev
CHANGED
@@ -12,4 +12,4 @@ COPY package*.json ./
|
|
12 |
RUN npm install
|
13 |
|
14 |
# Volume will be mounted here, no need for COPY
|
15 |
-
CMD ["npm", "start"]
|
|
|
12 |
RUN npm install
|
13 |
|
14 |
# Volume will be mounted here, no need for COPY
|
15 |
+
CMD ["npm", "start"]
|
frontend/src/components/Logo/HFLogo.js
CHANGED
@@ -16,4 +16,4 @@ const HFLogo = () => (
|
|
16 |
</svg>
|
17 |
);
|
18 |
|
19 |
-
export default HFLogo;
|
|
|
16 |
</svg>
|
17 |
);
|
18 |
|
19 |
+
export default HFLogo;
|
frontend/src/components/shared/CodeBlock.js
CHANGED
@@ -34,4 +34,4 @@ const CodeBlock = ({ code }) => (
|
|
34 |
</Box>
|
35 |
);
|
36 |
|
37 |
-
export default CodeBlock;
|
|
|
34 |
</Box>
|
35 |
);
|
36 |
|
37 |
+
export default CodeBlock;
|
frontend/src/config/auth.js
CHANGED
@@ -4,4 +4,4 @@ export const HF_CONFIG = {
|
|
4 |
SCOPE: "openid profile",
|
5 |
PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
|
6 |
DEV_URL: "http://localhost:7860"
|
7 |
-
};
|
|
|
4 |
SCOPE: "openid profile",
|
5 |
PROD_URL: "https://open-llm-leaderboard-open-llm-leaderboard.hf.space",
|
6 |
DEV_URL: "http://localhost:7860"
|
7 |
+
};
|
frontend/src/hooks/useThemeMode.js
CHANGED
@@ -15,7 +15,7 @@ export const useThemeMode = () => {
|
|
15 |
const handleChange = (e) => {
|
16 |
setMode(e.matches ? 'dark' : 'light');
|
17 |
};
|
18 |
-
|
19 |
mediaQuery.addEventListener('change', handleChange);
|
20 |
return () => mediaQuery.removeEventListener('change', handleChange);
|
21 |
}, []);
|
@@ -25,4 +25,4 @@ export const useThemeMode = () => {
|
|
25 |
};
|
26 |
|
27 |
return { mode, toggleTheme };
|
28 |
-
};
|
|
|
15 |
const handleChange = (e) => {
|
16 |
setMode(e.matches ? 'dark' : 'light');
|
17 |
};
|
18 |
+
|
19 |
mediaQuery.addEventListener('change', handleChange);
|
20 |
return () => mediaQuery.removeEventListener('change', handleChange);
|
21 |
}, []);
|
|
|
25 |
};
|
26 |
|
27 |
return { mode, toggleTheme };
|
28 |
+
};
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelTypes.js
CHANGED
@@ -48,7 +48,7 @@ export const MODEL_TYPES = {
|
|
48 |
|
49 |
export const getModelTypeIcon = (type) => {
|
50 |
const cleanType = type.toLowerCase().trim();
|
51 |
-
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
52 |
cleanType.includes(key)
|
53 |
);
|
54 |
return matchedType ? matchedType[1].icon : '❓';
|
@@ -56,7 +56,7 @@ export const getModelTypeIcon = (type) => {
|
|
56 |
|
57 |
export const getModelTypeLabel = (type) => {
|
58 |
const cleanType = type.toLowerCase().trim();
|
59 |
-
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
60 |
cleanType.includes(key)
|
61 |
);
|
62 |
return matchedType ? matchedType[1].label : type;
|
@@ -64,7 +64,7 @@ export const getModelTypeLabel = (type) => {
|
|
64 |
|
65 |
export const getModelTypeDescription = (type) => {
|
66 |
const cleanType = type.toLowerCase().trim();
|
67 |
-
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
68 |
cleanType.includes(key)
|
69 |
);
|
70 |
return matchedType ? matchedType[1].description : 'Unknown model type';
|
@@ -72,8 +72,8 @@ export const getModelTypeDescription = (type) => {
|
|
72 |
|
73 |
export const getModelTypeOrder = (type) => {
|
74 |
const cleanType = type.toLowerCase().trim();
|
75 |
-
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
76 |
cleanType.includes(key)
|
77 |
);
|
78 |
return matchedType ? matchedType[1].order : Infinity;
|
79 |
-
};
|
|
|
48 |
|
49 |
export const getModelTypeIcon = (type) => {
|
50 |
const cleanType = type.toLowerCase().trim();
|
51 |
+
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
52 |
cleanType.includes(key)
|
53 |
);
|
54 |
return matchedType ? matchedType[1].icon : '❓';
|
|
|
56 |
|
57 |
export const getModelTypeLabel = (type) => {
|
58 |
const cleanType = type.toLowerCase().trim();
|
59 |
+
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
60 |
cleanType.includes(key)
|
61 |
);
|
62 |
return matchedType ? matchedType[1].label : type;
|
|
|
64 |
|
65 |
export const getModelTypeDescription = (type) => {
|
66 |
const cleanType = type.toLowerCase().trim();
|
67 |
+
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
68 |
cleanType.includes(key)
|
69 |
);
|
70 |
return matchedType ? matchedType[1].description : 'Unknown model type';
|
|
|
72 |
|
73 |
export const getModelTypeOrder = (type) => {
|
74 |
const cleanType = type.toLowerCase().trim();
|
75 |
+
const matchedType = Object.entries(MODEL_TYPES).find(([key]) =>
|
76 |
cleanType.includes(key)
|
77 |
);
|
78 |
return matchedType ? matchedType[1].order : Infinity;
|
79 |
+
};
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/quickFilters.js
CHANGED
@@ -48,4 +48,4 @@ export const QUICK_FILTER_PRESETS = [
|
|
48 |
selectedBooleanFilters: ['is_official_provider']
|
49 |
}
|
50 |
}
|
51 |
-
];
|
|
|
48 |
selectedBooleanFilters: ['is_official_provider']
|
49 |
}
|
50 |
}
|
51 |
+
];
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useBatchedState.js
CHANGED
@@ -28,4 +28,4 @@ export const useBatchedState = (initialState, options = {}) => {
|
|
28 |
}, [batchDelay, useTransitions]);
|
29 |
|
30 |
return [state, setBatchedState, isPending];
|
31 |
-
};
|
|
|
28 |
}, [batchDelay, useTransitions]);
|
29 |
|
30 |
return [state, setBatchedState, isPending];
|
31 |
+
};
|