Daniel Kantor commited on
Commit
f9e5c6c
·
1 Parent(s): aa23200

show failed models on dashboard

Browse files
backend/app/services/models.py CHANGED
@@ -4,10 +4,8 @@ import json
4
  import os
5
  from pathlib import Path
6
  import logging
7
- import aiohttp
8
  import time
9
  from huggingface_hub import HfApi
10
- from huggingface_hub.utils import build_hf_headers
11
  from datasets import disable_progress_bar
12
  import sys
13
  import contextlib
@@ -142,7 +140,7 @@ class ModelService(HuggingFaceService):
142
  self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
143
 
144
  # Initialize models dictionary
145
- models = {"finished": [], "evaluating": [], "pending": []}
146
 
147
  try:
148
  logger.info(LogFormatter.subsection("DATASET LOADING"))
@@ -184,6 +182,7 @@ class ModelService(HuggingFaceService):
184
  "PENDING": ["PENDING"],
185
  "EVALUATING": ["RUNNING"],
186
  "FINISHED": ["FINISHED"],
 
187
  }
188
 
189
  for target, source_statuses in status_map.items():
@@ -254,6 +253,7 @@ class ModelService(HuggingFaceService):
254
  "Finished": len(models["finished"]),
255
  "Evaluating": len(models["evaluating"]),
256
  "Pending": len(models["pending"]),
 
257
  }
258
  for line in LogFormatter.stats(stats, "Models by Status"):
259
  logger.info(line)
@@ -425,7 +425,6 @@ class ModelService(HuggingFaceService):
425
  # Check in all statuses (pending, evaluating, finished)
426
  for status, models in existing_models.items():
427
  for model in models:
428
- print(model)
429
  if (
430
  model["name"] == model_data["model_id"]
431
  and model["revision"] == model_data["revision"]
@@ -480,7 +479,7 @@ class ModelService(HuggingFaceService):
480
 
481
  # Size limits based on precision
482
  if model_size > 15:
483
- error_msg = f"Model too large (limit: 15B)"
484
  logger.error(LogFormatter.error("Size limit exceeded", error_msg))
485
  raise Exception(error_msg)
486
 
@@ -488,9 +487,7 @@ class ModelService(HuggingFaceService):
488
  model_data["model_id"], model_data["revision"]
489
  )
490
  if not valid:
491
- logger.error(
492
- LogFormatter.error("Chat template validation failed", error)
493
- )
494
  raise Exception(error)
495
  logger.info(LogFormatter.success("Chat template validation passed"))
496
 
 
4
  import os
5
  from pathlib import Path
6
  import logging
 
7
  import time
8
  from huggingface_hub import HfApi
 
9
  from datasets import disable_progress_bar
10
  import sys
11
  import contextlib
 
140
  self._log_repo_operation("read", QUEUE_REPO, "Refreshing models cache")
141
 
142
  # Initialize models dictionary
143
+ models = {"finished": [], "evaluating": [], "pending": [], "failed": []}
144
 
145
  try:
146
  logger.info(LogFormatter.subsection("DATASET LOADING"))
 
182
  "PENDING": ["PENDING"],
183
  "EVALUATING": ["RUNNING"],
184
  "FINISHED": ["FINISHED"],
185
+ "FAILED": ["FAILED"],
186
  }
187
 
188
  for target, source_statuses in status_map.items():
 
253
  "Finished": len(models["finished"]),
254
  "Evaluating": len(models["evaluating"]),
255
  "Pending": len(models["pending"]),
256
+ "Failed": len(models["failed"]),
257
  }
258
  for line in LogFormatter.stats(stats, "Models by Status"):
259
  logger.info(line)
 
425
  # Check in all statuses (pending, evaluating, finished)
426
  for status, models in existing_models.items():
427
  for model in models:
 
428
  if (
429
  model["name"] == model_data["model_id"]
430
  and model["revision"] == model_data["revision"]
 
479
 
480
  # Size limits based on precision
481
  if model_size > 15:
482
+ error_msg = "Model too large (limit: 15B)"
483
  logger.error(LogFormatter.error("Size limit exceeded", error_msg))
484
  raise Exception(error_msg)
485
 
 
487
  model_data["model_id"], model_data["revision"]
488
  )
489
  if not valid:
490
+ logger.error(LogFormatter.error("Chat template validation failed", error))
 
 
491
  raise Exception(error)
492
  logger.info(LogFormatter.success("Chat template validation passed"))
493
 
frontend/src/pages/AddModelPage/components/EvaluationQueues/EvaluationQueues.js CHANGED
@@ -414,11 +414,12 @@ const QueueAccordion = ({
414
  label={models.length}
415
  size={isMobile ? "small" : "medium"}
416
  color={
417
- status === "finished"
418
- ? "success"
419
- : status === "evaluating"
420
- ? "warning"
421
- : "info"
 
422
  }
423
  variant="outlined"
424
  sx={(theme) => ({
@@ -431,20 +432,20 @@ const QueueAccordion = ({
431
  status === "finished"
432
  ? theme.palette.success[100]
433
  : status === "evaluating"
434
- ? theme.palette.warning[100]
435
- : theme.palette.info[100],
436
  borderColor:
437
  status === "finished"
438
  ? theme.palette.success[400]
439
  : status === "evaluating"
440
- ? theme.palette.warning[400]
441
- : theme.palette.info[400],
442
  color:
443
  status === "finished"
444
  ? theme.palette.success[700]
445
  : status === "evaluating"
446
- ? theme.palette.warning[700]
447
- : theme.palette.info[700],
448
  "& .MuiChip-label": {
449
  px: { xs: 1, sm: 1.2 },
450
  width: "100%",
@@ -454,8 +455,8 @@ const QueueAccordion = ({
454
  status === "finished"
455
  ? theme.palette.success[200]
456
  : status === "evaluating"
457
- ? theme.palette.warning[200]
458
- : theme.palette.info[200],
459
  },
460
  })}
461
  />
@@ -496,6 +497,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
496
  pending: [],
497
  evaluating: [],
498
  finished: [],
 
499
  });
500
  const [loading, setLoading] = useState(true);
501
  const [error, setError] = useState(null);
@@ -524,6 +526,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
524
  finished: sortByDate(data.finished),
525
  evaluating: sortByDate(data.evaluating),
526
  pending: sortByDate(data.pending),
 
527
  });
528
  } catch (err) {
529
  setError(err.message);
@@ -537,6 +540,7 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
537
  return () => clearInterval(interval);
538
  }, []);
539
 
 
540
  const handleMainAccordionChange = (panel) => (event, isExpanded) => {
541
  setExpanded(isExpanded ? panel : false);
542
  };
@@ -721,6 +725,31 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
721
  },
722
  }}
723
  />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  </Stack>
725
  )}
726
  {loading && (
@@ -777,6 +806,16 @@ const EvaluationQueues = ({ defaultExpanded = true }) => {
777
  onChange={handleQueueAccordionChange("finished")}
778
  loading={loading}
779
  />
 
 
 
 
 
 
 
 
 
 
780
  </>
781
  )}
782
  </AccordionDetails>
 
414
  label={models.length}
415
  size={isMobile ? "small" : "medium"}
416
  color={
417
+ {
418
+ "finished": "success",
419
+ "pending": "info",
420
+ "evaluating": "warning",
421
+ "failed": "error",
422
+ }[status]
423
  }
424
  variant="outlined"
425
  sx={(theme) => ({
 
432
  status === "finished"
433
  ? theme.palette.success[100]
434
  : status === "evaluating"
435
+ ? theme.palette.warning[100]
436
+ : theme.palette.info[100],
437
  borderColor:
438
  status === "finished"
439
  ? theme.palette.success[400]
440
  : status === "evaluating"
441
+ ? theme.palette.warning[400]
442
+ : theme.palette.info[400],
443
  color:
444
  status === "finished"
445
  ? theme.palette.success[700]
446
  : status === "evaluating"
447
+ ? theme.palette.warning[700]
448
+ : theme.palette.info[700],
449
  "& .MuiChip-label": {
450
  px: { xs: 1, sm: 1.2 },
451
  width: "100%",
 
455
  status === "finished"
456
  ? theme.palette.success[200]
457
  : status === "evaluating"
458
+ ? theme.palette.warning[200]
459
+ : theme.palette.info[200],
460
  },
461
  })}
462
  />
 
497
  pending: [],
498
  evaluating: [],
499
  finished: [],
500
+ failed: [],
501
  });
502
  const [loading, setLoading] = useState(true);
503
  const [error, setError] = useState(null);
 
526
  finished: sortByDate(data.finished),
527
  evaluating: sortByDate(data.evaluating),
528
  pending: sortByDate(data.pending),
529
+ failed: sortByDate(data.failed),
530
  });
531
  } catch (err) {
532
  setError(err.message);
 
540
  return () => clearInterval(interval);
541
  }, []);
542
 
543
+
544
  const handleMainAccordionChange = (panel) => (event, isExpanded) => {
545
  setExpanded(isExpanded ? panel : false);
546
  };
 
725
  },
726
  }}
727
  />
728
+ <Chip
729
+ label={`${models.failed.length} Failed`}
730
+ size={isMobile ? "small" : "medium"}
731
+ color="error"
732
+ variant="outlined"
733
+ sx={{
734
+ borderWidth: 2,
735
+ fontWeight: 600,
736
+ fontSize: { xs: "0.75rem", sm: "0.875rem" },
737
+ height: { xs: "24px", sm: "32px" },
738
+ bgcolor: "success.100",
739
+ borderColor: "success.400",
740
+ color: "success.700",
741
+ width: { xs: "100%", sm: "auto" },
742
+ "& .MuiChip-label": {
743
+ px: { xs: 1, sm: 1.2 },
744
+ width: "100%",
745
+ display: "flex",
746
+ justifyContent: "center",
747
+ },
748
+ "&:hover": {
749
+ bgcolor: "success.200",
750
+ },
751
+ }}
752
+ />
753
  </Stack>
754
  )}
755
  {loading && (
 
806
  onChange={handleQueueAccordionChange("finished")}
807
  loading={loading}
808
  />
809
+
810
+ <QueueAccordion
811
+ title="Failed evaluations"
812
+ models={models.failed}
813
+ status="failed"
814
+ emptyMessage="No failed evaluations"
815
+ expanded={expandedQueues.has("failed")}
816
+ onChange={handleQueueAccordionChange("failed")}
817
+ loading={loading}
818
+ />
819
  </>
820
  )}
821
  </AccordionDetails>