Daniel Kantor
commited on
Commit
·
f9e5c6c
1
Parent(s):
aa23200
show failed models on dashboard
Browse files
backend/app/services/models.py
CHANGED
@@ -4,10 +4,8 @@ import json
|
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
import logging
|
7 |
-
import aiohttp
|
8 |
import time
|
9 |
from huggingface_hub import HfApi
|
10 |
-
from huggingface_hub.utils import build_hf_headers
|
11 |
from datasets import disable_progress_bar
|
12 |
import sys
|
13 |
import contextlib
|
@@ -142,7 +140,7 @@ class ModelService(HuggingFaceService):
|
|
142 |
self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
|
143 |
|
144 |
# Initialize models dictionary
|
145 |
-
models = {"finished": [], "evaluating": [], "pending": []}
|
146 |
|
147 |
try:
|
148 |
logger.info(LogFormatter.subsection("DATASET LOADING"))
|
@@ -184,6 +182,7 @@ class ModelService(HuggingFaceService):
|
|
184 |
"PENDING": ["PENDING"],
|
185 |
"EVALUATING": ["RUNNING"],
|
186 |
"FINISHED": ["FINISHED"],
|
|
|
187 |
}
|
188 |
|
189 |
for target, source_statuses in status_map.items():
|
@@ -254,6 +253,7 @@ class ModelService(HuggingFaceService):
|
|
254 |
"Finished": len(models["finished"]),
|
255 |
"Evaluating": len(models["evaluating"]),
|
256 |
"Pending": len(models["pending"]),
|
|
|
257 |
}
|
258 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
259 |
logger.info(line)
|
@@ -425,7 +425,6 @@ class ModelService(HuggingFaceService):
|
|
425 |
# Check in all statuses (pending, evaluating, finished)
|
426 |
for status, models in existing_models.items():
|
427 |
for model in models:
|
428 |
-
print(model)
|
429 |
if (
|
430 |
model["name"] == model_data["model_id"]
|
431 |
and model["revision"] == model_data["revision"]
|
@@ -480,7 +479,7 @@ class ModelService(HuggingFaceService):
|
|
480 |
|
481 |
# Size limits based on precision
|
482 |
if model_size > 15:
|
483 |
-
error_msg =
|
484 |
logger.error(LogFormatter.error("Size limit exceeded", error_msg))
|
485 |
raise Exception(error_msg)
|
486 |
|
@@ -488,9 +487,7 @@ class ModelService(HuggingFaceService):
|
|
488 |
model_data["model_id"], model_data["revision"]
|
489 |
)
|
490 |
if not valid:
|
491 |
-
logger.error(
|
492 |
-
LogFormatter.error("Chat template validation failed", error)
|
493 |
-
)
|
494 |
raise Exception(error)
|
495 |
logger.info(LogFormatter.success("Chat template validation passed"))
|
496 |
|
|
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
import logging
|
|
|
7 |
import time
|
8 |
from huggingface_hub import HfApi
|
|
|
9 |
from datasets import disable_progress_bar
|
10 |
import sys
|
11 |
import contextlib
|
|
|
140 |
self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
|
141 |
|
142 |
# Initialize models dictionary
|
143 |
+
models = {"finished": [], "evaluating": [], "pending": [], "failed": []}
|
144 |
|
145 |
try:
|
146 |
logger.info(LogFormatter.subsection("DATASET LOADING"))
|
|
|
182 |
"PENDING": ["PENDING"],
|
183 |
"EVALUATING": ["RUNNING"],
|
184 |
"FINISHED": ["FINISHED"],
|
185 |
+
"FAILED": ["FAILED"],
|
186 |
}
|
187 |
|
188 |
for target, source_statuses in status_map.items():
|
|
|
253 |
"Finished": len(models["finished"]),
|
254 |
"Evaluating": len(models["evaluating"]),
|
255 |
"Pending": len(models["pending"]),
|
256 |
+
"Failed": len(models["failed"]),
|
257 |
}
|
258 |
for line in LogFormatter.stats(stats, "Models by Status"):
|
259 |
logger.info(line)
|
|
|
425 |
# Check in all statuses (pending, evaluating, finished)
|
426 |
for status, models in existing_models.items():
|
427 |
for model in models:
|
|
|
428 |
if (
|
429 |
model["name"] == model_data["model_id"]
|
430 |
and model["revision"] == model_data["revision"]
|
|
|
479 |
|
480 |
# Size limits based on precision
|
481 |
if model_size > 15:
|
482 |
+
error_msg = "Model too large (limit: 15B)"
|
483 |
logger.error(LogFormatter.error("Size limit exceeded", error_msg))
|
484 |
raise Exception(error_msg)
|
485 |
|
|
|
487 |
model_data["model_id"], model_data["revision"]
|
488 |
)
|
489 |
if not valid:
|
490 |
+
logger.error(LogFormatter.error("Chat template validation failed", error))
|
|
|
|
|
491 |
raise Exception(error)
|
492 |
logger.info(LogFormatter.success("Chat template validation passed"))
|
493 |
|
frontend/src/pages/AddModelPage/components/EvaluationQueues/EvaluationQueues.js
CHANGED
@@ -414,11 +414,12 @@ const QueueAccordion = ({
|
|
414 |
label={models.length}
|
415 |
size={isMobile ? "small" : "medium"}
|
416 |
color={
|
417 |
-
|
418 |
-
|
419 |
-
:
|
420 |
-
|
421 |
-
: "
|
|
|
422 |
}
|
423 |
variant="outlined"
|
424 |
sx={(theme) => ({
|
@@ -431,20 +432,20 @@ const QueueAccordion = ({
|
|
431 |
status === "finished"
|
432 |
? theme.palette.success[100]
|
433 |
: status === "evaluating"
|
434 |
-
|
435 |
-
|
436 |
borderColor:
|
437 |
status === "finished"
|
438 |
? theme.palette.success[400]
|
439 |
: status === "evaluating"
|
440 |
-
|
441 |
-
|
442 |
color:
|
443 |
status === "finished"
|
444 |
? theme.palette.success[700]
|
445 |
: status === "evaluating"
|
446 |
-
|
447 |
-
|
448 |
"& .MuiChip-label": {
|
449 |
px: { xs: 1, sm: 1.2 },
|
450 |
width: "100%",
|
@@ -454,8 +455,8 @@ const QueueAccordion = ({
|
|
454 |
status === "finished"
|
455 |
? theme.palette.success[200]
|
456 |
: status === "evaluating"
|
457 |
-
|
458 |
-
|
459 |
},
|
460 |
})}
|
461 |
/>
|
@@ -496,6 +497,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
|
|
496 |
pending: [],
|
497 |
evaluating: [],
|
498 |
finished: [],
|
|
|
499 |
});
|
500 |
const [loading, setLoading] = useState(true);
|
501 |
const [error, setError] = useState(null);
|
@@ -524,6 +526,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
|
|
524 |
finished: sortByDate(data.finished),
|
525 |
evaluating: sortByDate(data.evaluating),
|
526 |
pending: sortByDate(data.pending),
|
|
|
527 |
});
|
528 |
} catch (err) {
|
529 |
setError(err.message);
|
@@ -537,6 +540,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
|
|
537 |
return () => clearInterval(interval);
|
538 |
}, []);
|
539 |
|
|
|
540 |
const handleMainAccordionChange = (panel) => (event, isExpanded) => {
|
541 |
setExpanded(isExpanded ? panel : false);
|
542 |
};
|
@@ -721,6 +725,31 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
|
|
721 |
},
|
722 |
}}
|
723 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
724 |
</Stack>
|
725 |
)}
|
726 |
{loading && (
|
@@ -777,6 +806,16 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
|
|
777 |
onChange={handleQueueAccordionChange("finished")}
|
778 |
loading={loading}
|
779 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
780 |
</>
|
781 |
)}
|
782 |
</AccordionDetails>
|
|
|
414 |
label={models.length}
|
415 |
size={isMobile ? "small" : "medium"}
|
416 |
color={
|
417 |
+
{
|
418 |
+
"finished": "success",
|
419 |
+
"pending": "info",
|
420 |
+
"evaluating": "warning",
|
421 |
+
"failed": "error",
|
422 |
+
}[status]
|
423 |
}
|
424 |
variant="outlined"
|
425 |
sx={(theme) => ({
|
|
|
432 |
status === "finished"
|
433 |
? theme.palette.success[100]
|
434 |
: status === "evaluating"
|
435 |
+
? theme.palette.warning[100]
|
436 |
+
: theme.palette.info[100],
|
437 |
borderColor:
|
438 |
status === "finished"
|
439 |
? theme.palette.success[400]
|
440 |
: status === "evaluating"
|
441 |
+
? theme.palette.warning[400]
|
442 |
+
: theme.palette.info[400],
|
443 |
color:
|
444 |
status === "finished"
|
445 |
? theme.palette.success[700]
|
446 |
: status === "evaluating"
|
447 |
+
? theme.palette.warning[700]
|
448 |
+
: theme.palette.info[700],
|
449 |
"& .MuiChip-label": {
|
450 |
px: { xs: 1, sm: 1.2 },
|
451 |
width: "100%",
|
|
|
455 |
status === "finished"
|
456 |
? theme.palette.success[200]
|
457 |
: status === "evaluating"
|
458 |
+
? theme.palette.warning[200]
|
459 |
+
: theme.palette.info[200],
|
460 |
},
|
461 |
})}
|
462 |
/>
|
|
|
497 |
pending: [],
|
498 |
evaluating: [],
|
499 |
finished: [],
|
500 |
+
failed: [],
|
501 |
});
|
502 |
const [loading, setLoading] = useState(true);
|
503 |
const [error, setError] = useState(null);
|
|
|
526 |
finished: sortByDate(data.finished),
|
527 |
evaluating: sortByDate(data.evaluating),
|
528 |
pending: sortByDate(data.pending),
|
529 |
+
failed: sortByDate(data.failed),
|
530 |
});
|
531 |
} catch (err) {
|
532 |
setError(err.message);
|
|
|
540 |
return () => clearInterval(interval);
|
541 |
}, []);
|
542 |
|
543 |
+
|
544 |
const handleMainAccordionChange = (panel) => (event, isExpanded) => {
|
545 |
setExpanded(isExpanded ? panel : false);
|
546 |
};
|
|
|
725 |
},
|
726 |
}}
|
727 |
/>
|
728 |
+
<Chip
|
729 |
+
label={`${models.failed.length} Failed`}
|
730 |
+
size={isMobile ? "small" : "medium"}
|
731 |
+
color="error"
|
732 |
+
variant="outlined"
|
733 |
+
sx={{
|
734 |
+
borderWidth: 2,
|
735 |
+
fontWeight: 600,
|
736 |
+
fontSize: { xs: "0.75rem", sm: "0.875rem" },
|
737 |
+
height: { xs: "24px", sm: "32px" },
|
738 |
+
bgcolor: "success.100",
|
739 |
+
borderColor: "success.400",
|
740 |
+
color: "success.700",
|
741 |
+
width: { xs: "100%", sm: "auto" },
|
742 |
+
"& .MuiChip-label": {
|
743 |
+
px: { xs: 1, sm: 1.2 },
|
744 |
+
width: "100%",
|
745 |
+
display: "flex",
|
746 |
+
justifyContent: "center",
|
747 |
+
},
|
748 |
+
"&:hover": {
|
749 |
+
bgcolor: "success.200",
|
750 |
+
},
|
751 |
+
}}
|
752 |
+
/>
|
753 |
</Stack>
|
754 |
)}
|
755 |
{loading && (
|
|
|
806 |
onChange={handleQueueAccordionChange("finished")}
|
807 |
loading={loading}
|
808 |
/>
|
809 |
+
|
810 |
+
<QueueAccordion
|
811 |
+
title="Failed evaluations"
|
812 |
+
models={models.failed}
|
813 |
+
status="failed"
|
814 |
+
emptyMessage="No failed evaluations"
|
815 |
+
expanded={expandedQueues.has("failed")}
|
816 |
+
onChange={handleQueueAccordionChange("failed")}
|
817 |
+
loading={loading}
|
818 |
+
/>
|
819 |
</>
|
820 |
)}
|
821 |
</AccordionDetails>
|