pentarosarium commited on
Commit
680c2d5
·
1 Parent(s): feb6866
Files changed (1) hide show
  1. app.py +71 -58
app.py CHANGED
@@ -399,7 +399,7 @@ def create_interface():
399
  control = ProcessControl()
400
 
401
  with gr.Blocks(theme=gr.themes.Soft()) as app:
402
- gr.Markdown("# AI-анализ мониторинга новостей v.1.17")
403
 
404
  with gr.Row():
405
  file_input = gr.File(
@@ -445,7 +445,8 @@ def create_interface():
445
  def stop_processing():
446
  control.request_stop()
447
  return "Остановка обработки..."
448
-
 
449
  def analyze(file_bytes):
450
  if file_bytes is None:
451
  gr.Warning("Пожалуйста, загрузите файл")
@@ -458,75 +459,87 @@ def create_interface():
458
  file_obj = io.BytesIO(file_bytes)
459
  logger.info("File loaded into BytesIO successfully")
460
 
461
- progress_status = "Начинаем обработку файла..."
462
- yield None, None, None, progress_status
463
 
464
- # Process file
465
- df = pd.read_excel(file_obj, sheet_name='Публикации')
466
- logger.info(f"Successfully read Excel file. Shape: {df.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
- # Deduplication
 
469
  original_count = len(df)
470
  df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
471
  logger.info(f"Removed {original_count - len(df)} duplicate entries")
472
 
473
- detector = EventDetector()
474
- detector.control = control # Pass control object
475
  processed_rows = []
476
  total = len(df)
 
477
 
478
- # Initialize models
479
- if not detector.initialize_models():
480
- raise Exception("Failed to initialize models")
481
-
482
- for idx, row in df.iterrows():
483
  if control.should_stop():
484
- yield (
485
- pd.DataFrame(processed_rows) if processed_rows else None,
486
- None, None,
487
- f"Обработка остановлена. Обработано {idx} из {total} строк"
488
- )
489
- return
490
 
491
- try:
492
- text = str(row.get('Выдержки из текста', ''))
493
- if not text.strip():
494
- continue
495
 
496
- entity = str(row.get('Объект', ''))
497
- if not entity.strip():
498
- continue
499
-
500
- event_type, event_summary = detector.detect_events(text, entity)
501
- sentiment = detector.analyze_sentiment(text)
502
-
503
- processed_rows.append({
504
- 'Объект': entity,
505
- 'Заголовок': str(row.get('Заголовок', '')),
506
- 'Sentiment': sentiment,
507
- 'Event_Type': event_type,
508
- 'Event_Summary': event_summary,
509
- 'Текст': text[:1000]
510
- })
511
-
512
- if idx % 5 == 0:
513
- progress_status = f"Обработано {idx + 1}/{total} строк"
514
- yield None, None, None, progress_status
515
 
516
- except Exception as e:
517
- logger.error(f"Error processing row {idx}: {str(e)}")
518
- continue
519
-
520
- result_df = pd.DataFrame(processed_rows)
521
- fig_sentiment, fig_events = create_visualizations(result_df)
522
-
523
- return (
524
- result_df,
525
- fig_sentiment,
526
- fig_events,
527
- f"Обработка завершена успешно! Обработано {len(result_df)} строк"
528
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
 
 
 
 
 
 
 
530
  except Exception as e:
531
  error_msg = f"Ошибка анализа: {str(e)}"
532
  logger.error(error_msg)
@@ -544,4 +557,4 @@ def create_interface():
544
 
545
  if __name__ == "__main__":
546
  app = create_interface()
547
- app.launch(share=True)
 
399
  control = ProcessControl()
400
 
401
  with gr.Blocks(theme=gr.themes.Soft()) as app:
402
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.20")
403
 
404
  with gr.Row():
405
  file_input = gr.File(
 
445
  def stop_processing():
446
  control.request_stop()
447
  return "Остановка обработки..."
448
+
449
+ @spaces.GPU(duration=300) # 5 minutes duration for the entire analysis
450
  def analyze(file_bytes):
451
  if file_bytes is None:
452
  gr.Warning("Пожалуйста, загрузите файл")
 
459
  file_obj = io.BytesIO(file_bytes)
460
  logger.info("File loaded into BytesIO successfully")
461
 
462
+ detector = EventDetector()
 
463
 
464
+ # Initialize models with GPU
465
+ @spaces.GPU(duration=30)
466
+ def init_models():
467
+ return detector.initialize_models()
468
+
469
+ if not init_models():
470
+ raise Exception("Failed to initialize models")
471
+
472
+ # Process in batches with GPU allocation
473
+ @spaces.GPU(duration=20)
474
+ def process_batch(batch, entity):
475
+ event_type, event_summary = detector.detect_events(batch, entity)
476
+ time.sleep(1) # Wait between GPU operations
477
+ sentiment = detector.analyze_sentiment(batch)
478
+ return event_type, event_summary, sentiment
479
 
480
+ # Read and deduplicate data
481
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
482
  original_count = len(df)
483
  df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
484
  logger.info(f"Removed {original_count - len(df)} duplicate entries")
485
 
 
 
486
  processed_rows = []
487
  total = len(df)
488
+ batch_size = 3
489
 
490
+ for batch_start in range(0, total, batch_size):
 
 
 
 
491
  if control.should_stop():
492
+ break
493
+
494
+ batch_end = min(batch_start + batch_size, total)
495
+ batch = df.iloc[batch_start:batch_end]
 
 
496
 
497
+ for idx, row in batch.iterrows():
498
+ try:
499
+ text = str(row.get('Выдержки из текста', '')).strip()
500
+ entity = str(row.get('Объект', '')).strip()
501
 
502
+ if not text or not entity:
503
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
+ # Process with GPU
506
+ event_type, event_summary, sentiment = process_batch(text, entity)
507
+
508
+ processed_rows.append({
509
+ 'Объект': entity,
510
+ 'Заголовок': str(row.get('Заголовок', '')),
511
+ 'Sentiment': sentiment,
512
+ 'Event_Type': event_type,
513
+ 'Event_Summary': event_summary,
514
+ 'Текст': text[:1000]
515
+ })
516
+
517
+ except Exception as e:
518
+ logger.error(f"Error processing row {idx}: {str(e)}")
519
+ continue
520
+
521
+ # Create intermediate results
522
+ if processed_rows:
523
+ result_df = pd.DataFrame(processed_rows)
524
+ fig_sentiment, fig_events = create_visualizations(result_df)
525
+ yield (
526
+ result_df,
527
+ fig_sentiment,
528
+ fig_events,
529
+ f"Обработано {len(processed_rows)}/{total} строк"
530
+ )
531
+
532
+ # Cleanup GPU resources after batch
533
+ torch.cuda.empty_cache()
534
+ time.sleep(2)
535
 
536
+ if processed_rows:
537
+ final_df = pd.DataFrame(processed_rows)
538
+ fig_sentiment, fig_events = create_visualizations(final_df)
539
+ return final_df, fig_sentiment, fig_events, "Обработка завершена!"
540
+ else:
541
+ return None, None, None, "Нет обработанных данных"
542
+
543
  except Exception as e:
544
  error_msg = f"Ошибка анализа: {str(e)}"
545
  logger.error(error_msg)
 
557
 
558
  if __name__ == "__main__":
559
  app = create_interface()
560
+ app.launch(share=True)