simondh commited on
Commit
dc38c9a
·
1 Parent(s): a241f5a

fix mismatch due to parallelzation

Browse files
Files changed (2) hide show
  1. app.py +0 -4
  2. classifiers.py +12 -9
app.py CHANGED
@@ -366,10 +366,6 @@ with gr.Blocks(title="Text Classification System") as demo:
366
  if df is None:
367
  return gr.Row(visible=False), gr.File(visible=False), gr.File(visible=False), gr.Dataframe(visible=False)
368
 
369
- # Sort by category if it exists
370
- if "Category" in df.columns:
371
- df = df.sort_values("Category")
372
-
373
  # Export to both formats
374
  csv_path = export_results(df, "csv")
375
  excel_path = export_results(df, "excel")
 
366
  if df is None:
367
  return gr.Row(visible=False), gr.File(visible=False), gr.File(visible=False), gr.Dataframe(visible=False)
368
 
 
 
 
 
369
  # Export to both formats
370
  csv_path = export_results(df, "csv")
371
  excel_path = export_results(df, "excel")
classifiers.py CHANGED
@@ -154,25 +154,28 @@ class LLMClassifier(BaseClassifier):
154
 
155
  # Process texts in parallel
156
  with ThreadPoolExecutor(max_workers=10) as executor:
157
- # Submit all tasks
158
- future_to_text = {
159
- executor.submit(self._classify_text, text, categories): text
160
- for text in texts
161
  }
162
 
 
 
 
163
  # Collect results as they complete
164
- results = []
165
- for future in as_completed(future_to_text):
166
  try:
167
  result = future.result()
168
- results.append(result)
169
  except Exception as e:
170
  print(f"Error processing text: {str(e)}")
171
- results.append({
172
  "category": categories[0],
173
  "confidence": 50,
174
  "explanation": f"Error during classification: {str(e)}"
175
- })
176
 
177
  return results
178
 
 
154
 
155
  # Process texts in parallel
156
  with ThreadPoolExecutor(max_workers=10) as executor:
157
+ # Submit all tasks with their original indices
158
+ future_to_index = {
159
+ executor.submit(self._classify_text, text, categories): idx
160
+ for idx, text in enumerate(texts)
161
  }
162
 
163
+ # Initialize results list with None values
164
+ results = [None] * len(texts)
165
+
166
  # Collect results as they complete
167
+ for future in as_completed(future_to_index):
168
+ original_idx = future_to_index[future]
169
  try:
170
  result = future.result()
171
+ results[original_idx] = result
172
  except Exception as e:
173
  print(f"Error processing text: {str(e)}")
174
+ results[original_idx] = {
175
  "category": categories[0],
176
  "confidence": 50,
177
  "explanation": f"Error during classification: {str(e)}"
178
+ }
179
 
180
  return results
181