Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

051d547

1 Parent(s): e6534df

v.1.33

Browse files

Files changed (1) hide show

app.py +118 -116

app.py CHANGED Viewed

@@ -645,7 +645,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.32")
         with gr.Row():
             file_input = gr.File(
@@ -704,128 +704,130 @@ def create_interface():
             control.request_stop()
             return "Остановка обработки..."
-        @spaces.GPU(duration=300)
-        def analyze(file_bytes):
-            if file_bytes is None:
-                gr.Warning("Пожалуйста, загрузите файл")
-                return None, None, None, False, None, "Ожидание файла...", ""
-            try:
-                # Reset stop flag
-                control.reset()
-                file_obj = io.BytesIO(file_bytes)
-                logger.info("File loaded into BytesIO successfully")
-                detector = EventDetector()
-                # Read and deduplicate data
-                df = pd.read_excel(file_obj, sheet_name='Публикации')
-                original_count = len(df)
-                df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
-                removed_count = original_count - len(df)
-                dedup_message = f"Удалено {removed_count} дубликатов из {original_count} записей"
-                logger.info(f"Removed {removed_count} duplicate entries")
-                processed_rows = []
-                total = len(df)
-                batch_size = 3
-                for batch_start in range(0, total, batch_size):
-                    if control.should_stop():
-                        # Create partial results if stopped
-                        if processed_rows:
-                            result_df = pd.DataFrame(processed_rows)
-                            output = create_output_file(result_df, file_obj)
-                            if output:
-                                fig_sentiment, fig_events = create_visualizations(result_df)
-                                return (
-                                    result_df,
-                                    fig_sentiment,
-                                    fig_events,
-                                    True,  # Show download button
-                                    output,  # Raw bytes
-                                    f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
-                                    dedup_message
-                                )
-                        break
-                    batch_end = min(batch_start + batch_size, total)
-                    batch = df.iloc[batch_start:batch_end]
-                    for idx, row in batch.iterrows():
-                        try:
-                            text = str(row.get('Выдержки из текста', '')).strip()
-                            entity = str(row.get('Объект', '')).strip()
-                            if not text or not entity:
-                                continue
-                            # Process with GPU
-                            results = detector.process_text(text, entity)
-                            processed_rows.append({
-                                'Объект': entity,
-                                'Заголовок': str(row.get('Заголовок', '')),
-                                'Translated': results['translated_text'],
-                                'Sentiment': results['sentiment'],
-                                'Impact': results['impact'],
-                                'Reasoning': results['reasoning'],
-                                'Event_Type': results['event_type'],
-                                'Event_Summary': results['event_summary'],
-                                'Выдержки из текста': text[:1000]
-                            })
-                        except Exception as e:
-                            logger.error(f"Error processing row {idx}: {str(e)}")
-                            continue
-                    # Create intermediate results
-                    if processed_rows:
-                        result_df = pd.DataFrame(processed_rows)
-                        output = create_output_file(result_df, file_obj)
-                        if output:
-                            fig_sentiment, fig_events = create_visualizations(result_df)
-                            yield (
-                                result_df,
-                                fig_sentiment,
-                                fig_events,
-                                True,  # Show download button
-                                output,  # Raw bytes
-                                f"Обработано {len(processed_rows)}/{total} строк",
-                                dedup_message
-                            )
-                    # Cleanup GPU resources after batch
-                    torch.cuda.empty_cache()
-                    time.sleep(2)
-                # Create final results
                 if processed_rows:
-                    final_df = pd.DataFrame(processed_rows)
-                    output = create_output_file(final_df, file_obj)
-                    if output:
-                        fig_sentiment, fig_events = create_visualizations(final_df)
                         return (
-                            final_df,
                             fig_sentiment,
                             fig_events,
-                            True,  # Show download button
-                            output,  # Raw bytes
-                            "Обработка завершена!",
                             dedup_message
                         )
-                else:
-                    return None, None, None, False, None, "Нет обработанных данных", ""
-            except Exception as e:
-                error_msg = f"Ошибка анализа: {str(e)}"
-                logger.error(error_msg)
-                gr.Error(error_msg)
-                return None, None, None, False, None, error_msg, ""
-            finally:
-                if detector:
-                    detector.cleanup()
         def trigger_download(show_button, file_content):
             """Handle download button visibility and file content"""

     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.33")
         with gr.Row():
             file_input = gr.File(
             control.request_stop()
             return "Остановка обработки..."
+@spaces.GPU(duration=300)
+def analyze(file_bytes):
+    if file_bytes is None:
+        gr.Warning("Пожалуйста, загрузите файл")
+        return None, None, None, False, None, "Ожидание файла...", ""
+    try:
+        # Reset stop flag
+        control.reset()
+        file_obj = io.BytesIO(file_bytes)
+        logger.info("File loaded into BytesIO successfully")
+        detector = EventDetector()
+        # Read and deduplicate data
+        df = pd.read_excel(file_obj, sheet_name='Публикации')
+        original_count = len(df)
+        df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
+        removed_count = original_count - len(df)
+        dedup_message = f"Удалено {removed_count} дубликатов из {original_count} записей"
+        logger.info(f"Removed {removed_count} duplicate entries")
+        processed_rows = []
+        total = len(df)
+        batch_size = 3
+        for batch_start in range(0, total, batch_size):
+            if control.should_stop():
                 if processed_rows:
+                    result_df = pd.DataFrame(processed_rows)
+                    output_bytes_io = create_output_file(result_df, file_obj)
+                    if output_bytes_io:
+                        output_bytes = output_bytes_io.getvalue()  # Convert BytesIO to bytes
+                        fig_sentiment, fig_events = create_visualizations(result_df)
                         return (
+                            result_df,
                             fig_sentiment,
                             fig_events,
+                            True,
+                            output_bytes,  # Return bytes instead of BytesIO
+                            f"Обработка остановлена. Обработано {len(processed_rows)}/{total} строк",
                             dedup_message
                         )
+                break
+            batch_end = min(batch_start + batch_size, total)
+            batch = df.iloc[batch_start:batch_end]
+            for idx, row in batch.iterrows():
+                try:
+                    text = str(row.get('Выдержки из текста', '')).strip()
+                    entity = str(row.get('Объект', '')).strip()
+                    if not text or not entity:
+                        continue
+                    # Process with GPU
+                    results = detector.process_text(text, entity)
+                    processed_rows.append({
+                        'Объект': entity,
+                        'Заголовок': str(row.get('Заголовок', '')),
+                        'Translated': results['translated_text'],
+                        'Sentiment': results['sentiment'],
+                        'Impact': results['impact'],
+                        'Reasoning': results['reasoning'],
+                        'Event_Type': results['event_type'],
+                        'Event_Summary': results['event_summary'],
+                        'Выдержки из текста': text[:1000]
+                    })
+                except Exception as e:
+                    logger.error(f"Error processing row {idx}: {str(e)}")
+                    continue
+            # Create intermediate results
+            if processed_rows:
+                result_df = pd.DataFrame(processed_rows)
+                output_bytes_io = create_output_file(result_df, file_obj)
+                if output_bytes_io:
+                    output_bytes = output_bytes_io.getvalue()  # Convert BytesIO to bytes
+                    fig_sentiment, fig_events = create_visualizations(result_df)
+                    yield (
+                        result_df,
+                        fig_sentiment,
+                        fig_events,
+                        True,
+                        output_bytes,  # Return bytes instead of BytesIO
+                        f"Обработано {len(processed_rows)}/{total} строк",
+                        dedup_message
+                    )
+            # Cleanup GPU resources after batch
+            torch.cuda.empty_cache()
+            time.sleep(2)
+        # Create final results
+        if processed_rows:
+            final_df = pd.DataFrame(processed_rows)
+            output_bytes_io = create_output_file(final_df, file_obj)
+            if output_bytes_io:
+                output_bytes = output_bytes_io.getvalue()  # Convert BytesIO to bytes
+                fig_sentiment, fig_events = create_visualizations(final_df)
+                return (
+                    final_df,
+                    fig_sentiment,
+                    fig_events,
+                    True,
+                    output_bytes,  # Return bytes instead of BytesIO
+                    "Обработка завершена!",
+                    dedup_message
+                )
+        else:
+            return None, None, None, False, None, "Нет обработанных данных", ""
+    except Exception as e:
+        error_msg = f"Ошибка анализа: {str(e)}"
+        logger.error(error_msg)
+        gr.Error(error_msg)
+        return None, None, None, False, None, error_msg, ""
+    finally:
+        if detector:
+            detector.cleanup()
         def trigger_download(show_button, file_content):
             """Handle download button visibility and file content"""