Spaces:

simondh
/

classifieur

Sleeping

simondh commited on 26 days ago

Commit

dc38c9a

1 Parent(s): a241f5a

fix mismatch due to parallelzation

Files changed (2) hide show

app.py CHANGED Viewed

@@ -366,10 +366,6 @@ with gr.Blocks(title="Text Classification System") as demo:
             if df is None:
                 return gr.Row(visible=False), gr.File(visible=False), gr.File(visible=False), gr.Dataframe(visible=False)
-            # Sort by category if it exists
-            if "Category" in df.columns:
-                df = df.sort_values("Category")
             # Export to both formats
             csv_path = export_results(df, "csv")
             excel_path = export_results(df, "excel")

             if df is None:
                 return gr.Row(visible=False), gr.File(visible=False), gr.File(visible=False), gr.Dataframe(visible=False)
             # Export to both formats
             csv_path = export_results(df, "csv")
             excel_path = export_results(df, "excel")

classifiers.py CHANGED Viewed

@@ -154,25 +154,28 @@ class LLMClassifier(BaseClassifier):
         # Process texts in parallel
         with ThreadPoolExecutor(max_workers=10) as executor:
-            # Submit all tasks
-            future_to_text = {
-                executor.submit(self._classify_text, text, categories): text
-                for text in texts
             }
             # Collect results as they complete
-            results = []
-            for future in as_completed(future_to_text):
                 try:
                     result = future.result()
-                    results.append(result)
                 except Exception as e:
                     print(f"Error processing text: {str(e)}")
-                    results.append({
                         "category": categories[0],
                         "confidence": 50,
                         "explanation": f"Error during classification: {str(e)}"
-                    })
         return results

         # Process texts in parallel
         with ThreadPoolExecutor(max_workers=10) as executor:
+            # Submit all tasks with their original indices
+            future_to_index = {
+                executor.submit(self._classify_text, text, categories): idx
+                for idx, text in enumerate(texts)
             }
+            # Initialize results list with None values
+            results = [None] * len(texts)
             # Collect results as they complete
+            for future in as_completed(future_to_index):
+                original_idx = future_to_index[future]
                 try:
                     result = future.result()
+                    results[original_idx] = result
                 except Exception as e:
                     print(f"Error processing text: {str(e)}")
+                    results[original_idx] = {
                         "category": categories[0],
                         "confidence": 50,
                         "explanation": f"Error during classification: {str(e)}"
+                    }
         return results