Spaces:

fioriclass
/

emotion_classifier

Sleeping

App Files Files Community

fioriclass commited on 13 days ago

Commit

65e5e42

1 Parent(s): 89f0e63

correction bug

Browse files

Files changed (3) hide show

src/cuml_trainer.py +10 -2
src/interfaces/metrics_calculator.py +9 -30
src/trainers/huggingface/huggingface_transformer_trainer.py +11 -32

src/cuml_trainer.py CHANGED Viewed

@@ -71,11 +71,19 @@ class CuMLTrainer(BaseTrainer, ABC):
         data = cudf.read_csv(self.data_path)
-        # Identification et concaténation des features
         feature_columns = [col for col in data.columns if col != self.target_column]
         if not feature_columns:
             raise ValueError("Aucune colonne de feature trouvée.")
-        texts_concatenated = data[feature_columns].astype(str).agg(' '.join, axis=1)
         labels = data[self.target_column].astype(self._get_label_dtype()).values
         # Premier split: 80% train, 20% temp (pour val+test)

         data = cudf.read_csv(self.data_path)
+        # Identification des features
         feature_columns = [col for col in data.columns if col != self.target_column]
         if not feature_columns:
             raise ValueError("Aucune colonne de feature trouvée.")
+        # Concaténation manuelle des features (agg n'est pas supporté pour les colonnes string dans cuDF)
+        # Commencer avec la première colonne
+        texts_concatenated = data[feature_columns[0]].astype(str)
+        # Ajouter les autres colonnes avec un espace comme séparateur
+        for col in feature_columns[1:]:
+            texts_concatenated = texts_concatenated.str.cat(data[col].astype(str), sep=' ')
         labels = data[self.target_column].astype(self._get_label_dtype()).values
         # Premier split: 80% train, 20% temp (pour val+test)

src/interfaces/metrics_calculator.py CHANGED Viewed

@@ -95,40 +95,19 @@ class DefaultMetricsCalculator(MetricsCalculator):
             Tuple contenant (précision optimale, rappel optimal, F1 score optimal, seuil optimal)
         """
         # Ajouter le seuil 1.0 à thresholds (qui n'est pas inclus par défaut dans precision_recall_curve)
-        if len(thresholds) > 0:
-            thresholds_with_one = cp.append(thresholds, cp.array([1.0]))
-        else:
-            thresholds_with_one = cp.array([1.0])
         # Calculer le F1 score pour chaque point de la courbe
         # F1 = 2 * (precision * recall) / (precision + recall)
-        # Éviter la division par zéro
-        denominator = precision + recall
-        # Créer un masque pour éviter la division par zéro
-        mask = denominator > 0
-        # Initialiser le F1 score avec des zéros
-        f1_scores = cp.zeros_like(precision)
-        # Calculer le F1 score uniquement où le dénominateur n'est pas zéro
-        f1_scores[mask] = 2 * (precision[mask] * recall[mask]) / denominator[mask]
         # Trouver l'indice du F1 score maximal
-        if len(f1_scores) > 0:
-            best_idx = cp.argmax(f1_scores)
-            best_precision = float(precision[best_idx])
-            best_recall = float(recall[best_idx])
-            best_f1 = float(f1_scores[best_idx])
-            # Obtenir le seuil optimal
-            if best_idx < len(thresholds_with_one):
-                best_threshold = float(thresholds_with_one[best_idx])
-            else:
-                best_threshold = 0.5  # Valeur par défaut si l'indice est hors limites
-        else:
-            # Valeurs par défaut si les tableaux sont vides
-            best_precision = 0.0
-            best_recall = 0.0
-            best_f1 = 0.0
-            best_threshold = 0.5
         return best_precision, best_recall, best_f1, best_threshold

             Tuple contenant (précision optimale, rappel optimal, F1 score optimal, seuil optimal)
         """
         # Ajouter le seuil 1.0 à thresholds (qui n'est pas inclus par défaut dans precision_recall_curve)
+        thresholds_with_one = cp.append(thresholds, cp.array([1.0]))
         # Calculer le F1 score pour chaque point de la courbe
         # F1 = 2 * (precision * recall) / (precision + recall)
+        f1_scores = 2 * (precision * recall) / (precision + recall)
         # Trouver l'indice du F1 score maximal
+        best_idx = cp.argmax(f1_scores)
+        best_precision = float(precision[best_idx])
+        best_recall = float(recall[best_idx])
+        best_f1 = float(f1_scores[best_idx])
+        # Obtenir le seuil optimal
+        best_threshold = float(thresholds_with_one[best_idx])
         return best_precision, best_recall, best_f1, best_threshold

src/trainers/huggingface/huggingface_transformer_trainer.py CHANGED Viewed

@@ -79,42 +79,21 @@ def calculate_optimal_f1(precision: cp.ndarray, recall: cp.ndarray, thresholds:
         Tuple contenant (précision optimale, rappel optimal, F1 score optimal, seuil optimal)
     """
     # Ajouter le seuil 1.0 à thresholds (qui n'est pas inclus par défaut dans precision_recall_curve)
-    if len(thresholds) > 0:
-        thresholds_with_one = cp.append(thresholds, cp.array([1.0]))
-    else:
-        thresholds_with_one = cp.array([1.0])
     # Calculer le F1 score pour chaque point de la courbe
     # F1 = 2 * (precision * recall) / (precision + recall)
-    # Éviter la division par zéro
-    denominator = precision + recall
-    # Créer un masque pour éviter la division par zéro
-    mask = denominator > 0
-    # Initialiser le F1 score avec des zéros
-    f1_scores = cp.zeros_like(precision)
-    # Calculer le F1 score uniquement où le dénominateur n'est pas zéro
-    f1_scores[mask] = 2 * (precision[mask] * recall[mask]) / denominator[mask]
     # Trouver l'indice du F1 score maximal
-    if len(f1_scores) > 0:
-        best_idx = cp.argmax(f1_scores)
-        best_precision = float(precision[best_idx])
-        best_recall = float(recall[best_idx])
-        best_f1 = float(f1_scores[best_idx])
-        # Obtenir le seuil optimal
-        if best_idx < len(thresholds_with_one):
-            best_threshold = float(thresholds_with_one[best_idx])
-        else:
-            best_threshold = 0.5  # Valeur par défaut si l'indice est hors limites
-    else:
-        # Valeurs par défaut si les tableaux sont vides
-        best_precision = 0.0
-        best_recall = 0.0
-        best_f1 = 0.0
-        best_threshold = 0.5
     return best_precision, best_recall, best_f1, best_threshold
 class HuggingFaceTransformerTrainer(BaseTrainer):

         Tuple contenant (précision optimale, rappel optimal, F1 score optimal, seuil optimal)
     """
     # Ajouter le seuil 1.0 à thresholds (qui n'est pas inclus par défaut dans precision_recall_curve)
+    thresholds_with_one = cp.append(thresholds, cp.array([1.0]))
     # Calculer le F1 score pour chaque point de la courbe
     # F1 = 2 * (precision * recall) / (precision + recall)
+    f1_scores = 2 * (precision * recall) / (precision + recall)
     # Trouver l'indice du F1 score maximal
+    best_idx = cp.argmax(f1_scores)
+    best_precision = float(precision[best_idx])
+    best_recall = float(recall[best_idx])
+    best_f1 = float(f1_scores[best_idx])
+    # Obtenir le seuil optimal
+    best_threshold = float(thresholds_with_one[best_idx])
     return best_precision, best_recall, best_f1, best_threshold
 class HuggingFaceTransformerTrainer(BaseTrainer):