Spaces:
Sleeping
Sleeping
Commit
·
051d547
1
Parent(s):
e6534df
v.1.33
Browse files
app.py
CHANGED
@@ -645,7 +645,7 @@ def create_interface():
|
|
645 |
control = ProcessControl()
|
646 |
|
647 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
648 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
649 |
|
650 |
with gr.Row():
|
651 |
file_input = gr.File(
|
@@ -704,128 +704,130 @@ def create_interface():
|
|
704 |
control.request_stop()
|
705 |
return "Остановка обработки..."
|
706 |
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
# Create partial results if stopped
|
737 |
-
if processed_rows:
|
738 |
-
result_df = pd.DataFrame(processed_rows)
|
739 |
-
output = create_output_file(result_df, file_obj)
|
740 |
-
if output:
|
741 |
-
fig_sentiment, fig_events = create_visualizations(result_df)
|
742 |
-
return (
|
743 |
-
result_df,
|
744 |
-
fig_sentiment,
|
745 |
-
fig_events,
|
746 |
-
True, # Show download button
|
747 |
-
output, # Raw bytes
|
748 |
-
f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
|
749 |
-
dedup_message
|
750 |
-
)
|
751 |
-
break
|
752 |
-
|
753 |
-
batch_end = min(batch_start + batch_size, total)
|
754 |
-
batch = df.iloc[batch_start:batch_end]
|
755 |
-
|
756 |
-
for idx, row in batch.iterrows():
|
757 |
-
try:
|
758 |
-
text = str(row.get('Выдержки из текста', '')).strip()
|
759 |
-
entity = str(row.get('Объект', '')).strip()
|
760 |
-
|
761 |
-
if not text or not entity:
|
762 |
-
continue
|
763 |
-
|
764 |
-
# Process with GPU
|
765 |
-
results = detector.process_text(text, entity)
|
766 |
-
|
767 |
-
processed_rows.append({
|
768 |
-
'Объект': entity,
|
769 |
-
'Заголовок': str(row.get('Заголовок', '')),
|
770 |
-
'Translated': results['translated_text'],
|
771 |
-
'Sentiment': results['sentiment'],
|
772 |
-
'Impact': results['impact'],
|
773 |
-
'Reasoning': results['reasoning'],
|
774 |
-
'Event_Type': results['event_type'],
|
775 |
-
'Event_Summary': results['event_summary'],
|
776 |
-
'Выдержки из текста': text[:1000]
|
777 |
-
})
|
778 |
-
|
779 |
-
except Exception as e:
|
780 |
-
logger.error(f"Error processing row {idx}: {str(e)}")
|
781 |
-
continue
|
782 |
-
|
783 |
-
# Create intermediate results
|
784 |
-
if processed_rows:
|
785 |
-
result_df = pd.DataFrame(processed_rows)
|
786 |
-
output = create_output_file(result_df, file_obj)
|
787 |
-
if output:
|
788 |
-
fig_sentiment, fig_events = create_visualizations(result_df)
|
789 |
-
yield (
|
790 |
-
result_df,
|
791 |
-
fig_sentiment,
|
792 |
-
fig_events,
|
793 |
-
True, # Show download button
|
794 |
-
output, # Raw bytes
|
795 |
-
f"Обработано {len(processed_rows)}/{total} строк",
|
796 |
-
dedup_message
|
797 |
-
)
|
798 |
-
|
799 |
-
# Cleanup GPU resources after batch
|
800 |
-
torch.cuda.empty_cache()
|
801 |
-
time.sleep(2)
|
802 |
-
|
803 |
-
# Create final results
|
804 |
if processed_rows:
|
805 |
-
|
806 |
-
|
807 |
-
if
|
808 |
-
|
|
|
809 |
return (
|
810 |
-
|
811 |
fig_sentiment,
|
812 |
fig_events,
|
813 |
-
True,
|
814 |
-
|
815 |
-
"Обработка
|
816 |
dedup_message
|
817 |
)
|
818 |
-
|
819 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
829 |
|
830 |
def trigger_download(show_button, file_content):
|
831 |
"""Handle download button visibility and file content"""
|
|
|
645 |
control = ProcessControl()
|
646 |
|
647 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
648 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.33")
|
649 |
|
650 |
with gr.Row():
|
651 |
file_input = gr.File(
|
|
|
704 |
control.request_stop()
|
705 |
return "Остановка обработки..."
|
706 |
|
707 |
+
@spaces.GPU(duration=300)
|
708 |
+
def analyze(file_bytes):
|
709 |
+
if file_bytes is None:
|
710 |
+
gr.Warning("Пожалуйста, загрузите файл")
|
711 |
+
return None, None, None, False, None, "Ожидание файла...", ""
|
712 |
+
|
713 |
+
try:
|
714 |
+
# Reset stop flag
|
715 |
+
control.reset()
|
716 |
+
|
717 |
+
file_obj = io.BytesIO(file_bytes)
|
718 |
+
logger.info("File loaded into BytesIO successfully")
|
719 |
+
|
720 |
+
detector = EventDetector()
|
721 |
+
|
722 |
+
# Read and deduplicate data
|
723 |
+
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
724 |
+
original_count = len(df)
|
725 |
+
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
726 |
+
removed_count = original_count - len(df)
|
727 |
+
dedup_message = f"Удалено {removed_count} дубликатов из {original_count} записей"
|
728 |
+
logger.info(f"Removed {removed_count} duplicate entries")
|
729 |
+
|
730 |
+
processed_rows = []
|
731 |
+
total = len(df)
|
732 |
+
batch_size = 3
|
733 |
+
|
734 |
+
for batch_start in range(0, total, batch_size):
|
735 |
+
if control.should_stop():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
736 |
if processed_rows:
|
737 |
+
result_df = pd.DataFrame(processed_rows)
|
738 |
+
output_bytes_io = create_output_file(result_df, file_obj)
|
739 |
+
if output_bytes_io:
|
740 |
+
output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
|
741 |
+
fig_sentiment, fig_events = create_visualizations(result_df)
|
742 |
return (
|
743 |
+
result_df,
|
744 |
fig_sentiment,
|
745 |
fig_events,
|
746 |
+
True,
|
747 |
+
output_bytes, # Return bytes instead of BytesIO
|
748 |
+
f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
|
749 |
dedup_message
|
750 |
)
|
751 |
+
break
|
752 |
+
|
753 |
+
batch_end = min(batch_start + batch_size, total)
|
754 |
+
batch = df.iloc[batch_start:batch_end]
|
755 |
+
|
756 |
+
for idx, row in batch.iterrows():
|
757 |
+
try:
|
758 |
+
text = str(row.get('Выдержки из текста', '')).strip()
|
759 |
+
entity = str(row.get('Объект', '')).strip()
|
760 |
|
761 |
+
if not text or not entity:
|
762 |
+
continue
|
763 |
+
|
764 |
+
# Process with GPU
|
765 |
+
results = detector.process_text(text, entity)
|
766 |
+
|
767 |
+
processed_rows.append({
|
768 |
+
'Объект': entity,
|
769 |
+
'Заголовок': str(row.get('Заголовок', '')),
|
770 |
+
'Translated': results['translated_text'],
|
771 |
+
'Sentiment': results['sentiment'],
|
772 |
+
'Impact': results['impact'],
|
773 |
+
'Reasoning': results['reasoning'],
|
774 |
+
'Event_Type': results['event_type'],
|
775 |
+
'Event_Summary': results['event_summary'],
|
776 |
+
'Выдержки из текста': text[:1000]
|
777 |
+
})
|
778 |
+
|
779 |
+
except Exception as e:
|
780 |
+
logger.error(f"Error processing row {idx}: {str(e)}")
|
781 |
+
continue
|
782 |
+
|
783 |
+
# Create intermediate results
|
784 |
+
if processed_rows:
|
785 |
+
result_df = pd.DataFrame(processed_rows)
|
786 |
+
output_bytes_io = create_output_file(result_df, file_obj)
|
787 |
+
if output_bytes_io:
|
788 |
+
output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
|
789 |
+
fig_sentiment, fig_events = create_visualizations(result_df)
|
790 |
+
yield (
|
791 |
+
result_df,
|
792 |
+
fig_sentiment,
|
793 |
+
fig_events,
|
794 |
+
True,
|
795 |
+
output_bytes, # Return bytes instead of BytesIO
|
796 |
+
f"Обработано {len(processed_rows)}/{total} строк",
|
797 |
+
dedup_message
|
798 |
+
)
|
799 |
+
|
800 |
+
# Cleanup GPU resources after batch
|
801 |
+
torch.cuda.empty_cache()
|
802 |
+
time.sleep(2)
|
803 |
+
|
804 |
+
# Create final results
|
805 |
+
if processed_rows:
|
806 |
+
final_df = pd.DataFrame(processed_rows)
|
807 |
+
output_bytes_io = create_output_file(final_df, file_obj)
|
808 |
+
if output_bytes_io:
|
809 |
+
output_bytes = output_bytes_io.getvalue() # Convert BytesIO to bytes
|
810 |
+
fig_sentiment, fig_events = create_visualizations(final_df)
|
811 |
+
return (
|
812 |
+
final_df,
|
813 |
+
fig_sentiment,
|
814 |
+
fig_events,
|
815 |
+
True,
|
816 |
+
output_bytes, # Return bytes instead of BytesIO
|
817 |
+
"Обработка завершена!",
|
818 |
+
dedup_message
|
819 |
+
)
|
820 |
+
else:
|
821 |
+
return None, None, None, False, None, "Нет обработанных данных", ""
|
822 |
+
|
823 |
+
except Exception as e:
|
824 |
+
error_msg = f"Ошибка анализа: {str(e)}"
|
825 |
+
logger.error(error_msg)
|
826 |
+
gr.Error(error_msg)
|
827 |
+
return None, None, None, False, None, error_msg, ""
|
828 |
+
finally:
|
829 |
+
if detector:
|
830 |
+
detector.cleanup()
|
831 |
|
832 |
def trigger_download(show_button, file_content):
|
833 |
"""Handle download button visibility and file content"""
|