Spaces:
Sleeping
Sleeping
Commit
·
680c2d5
1
Parent(s):
feb6866
v.1.20
Browse files
app.py
CHANGED
@@ -399,7 +399,7 @@ def create_interface():
|
|
399 |
control = ProcessControl()
|
400 |
|
401 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
402 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
403 |
|
404 |
with gr.Row():
|
405 |
file_input = gr.File(
|
@@ -445,7 +445,8 @@ def create_interface():
|
|
445 |
def stop_processing():
|
446 |
control.request_stop()
|
447 |
return "Остановка обработки..."
|
448 |
-
|
|
|
449 |
def analyze(file_bytes):
|
450 |
if file_bytes is None:
|
451 |
gr.Warning("Пожалуйста, загрузите файл")
|
@@ -458,75 +459,87 @@ def create_interface():
|
|
458 |
file_obj = io.BytesIO(file_bytes)
|
459 |
logger.info("File loaded into BytesIO successfully")
|
460 |
|
461 |
-
|
462 |
-
yield None, None, None, progress_status
|
463 |
|
464 |
-
#
|
465 |
-
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
468 |
-
#
|
|
|
469 |
original_count = len(df)
|
470 |
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
471 |
logger.info(f"Removed {original_count - len(df)} duplicate entries")
|
472 |
|
473 |
-
detector = EventDetector()
|
474 |
-
detector.control = control # Pass control object
|
475 |
processed_rows = []
|
476 |
total = len(df)
|
|
|
477 |
|
478 |
-
|
479 |
-
if not detector.initialize_models():
|
480 |
-
raise Exception("Failed to initialize models")
|
481 |
-
|
482 |
-
for idx, row in df.iterrows():
|
483 |
if control.should_stop():
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
)
|
489 |
-
return
|
490 |
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
|
496 |
-
|
497 |
-
|
498 |
-
continue
|
499 |
-
|
500 |
-
event_type, event_summary = detector.detect_events(text, entity)
|
501 |
-
sentiment = detector.analyze_sentiment(text)
|
502 |
-
|
503 |
-
processed_rows.append({
|
504 |
-
'Объект': entity,
|
505 |
-
'Заголовок': str(row.get('Заголовок', '')),
|
506 |
-
'Sentiment': sentiment,
|
507 |
-
'Event_Type': event_type,
|
508 |
-
'Event_Summary': event_summary,
|
509 |
-
'Текст': text[:1000]
|
510 |
-
})
|
511 |
-
|
512 |
-
if idx % 5 == 0:
|
513 |
-
progress_status = f"Обработано {idx + 1}/{total} строк"
|
514 |
-
yield None, None, None, progress_status
|
515 |
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
except Exception as e:
|
531 |
error_msg = f"Ошибка анализа: {str(e)}"
|
532 |
logger.error(error_msg)
|
@@ -544,4 +557,4 @@ def create_interface():
|
|
544 |
|
545 |
if __name__ == "__main__":
|
546 |
app = create_interface()
|
547 |
-
app.launch(share=True)
|
|
|
399 |
control = ProcessControl()
|
400 |
|
401 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
402 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.20")
|
403 |
|
404 |
with gr.Row():
|
405 |
file_input = gr.File(
|
|
|
445 |
def stop_processing():
|
446 |
control.request_stop()
|
447 |
return "Остановка обработки..."
|
448 |
+
|
449 |
+
@spaces.GPU(duration=300) # 5 minutes duration for the entire analysis
|
450 |
def analyze(file_bytes):
|
451 |
if file_bytes is None:
|
452 |
gr.Warning("Пожалуйста, загрузите файл")
|
|
|
459 |
file_obj = io.BytesIO(file_bytes)
|
460 |
logger.info("File loaded into BytesIO successfully")
|
461 |
|
462 |
+
detector = EventDetector()
|
|
|
463 |
|
464 |
+
# Initialize models with GPU
|
465 |
+
@spaces.GPU(duration=30)
|
466 |
+
def init_models():
|
467 |
+
return detector.initialize_models()
|
468 |
+
|
469 |
+
if not init_models():
|
470 |
+
raise Exception("Failed to initialize models")
|
471 |
+
|
472 |
+
# Process in batches with GPU allocation
|
473 |
+
@spaces.GPU(duration=20)
|
474 |
+
def process_batch(batch, entity):
|
475 |
+
event_type, event_summary = detector.detect_events(batch, entity)
|
476 |
+
time.sleep(1) # Wait between GPU operations
|
477 |
+
sentiment = detector.analyze_sentiment(batch)
|
478 |
+
return event_type, event_summary, sentiment
|
479 |
|
480 |
+
# Read and deduplicate data
|
481 |
+
df = pd.read_excel(file_obj, sheet_name='Публикации')
|
482 |
original_count = len(df)
|
483 |
df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
|
484 |
logger.info(f"Removed {original_count - len(df)} duplicate entries")
|
485 |
|
|
|
|
|
486 |
processed_rows = []
|
487 |
total = len(df)
|
488 |
+
batch_size = 3
|
489 |
|
490 |
+
for batch_start in range(0, total, batch_size):
|
|
|
|
|
|
|
|
|
491 |
if control.should_stop():
|
492 |
+
break
|
493 |
+
|
494 |
+
batch_end = min(batch_start + batch_size, total)
|
495 |
+
batch = df.iloc[batch_start:batch_end]
|
|
|
|
|
496 |
|
497 |
+
for idx, row in batch.iterrows():
|
498 |
+
try:
|
499 |
+
text = str(row.get('Выдержки из текста', '')).strip()
|
500 |
+
entity = str(row.get('Объект', '')).strip()
|
501 |
|
502 |
+
if not text or not entity:
|
503 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
|
505 |
+
# Process with GPU
|
506 |
+
event_type, event_summary, sentiment = process_batch(text, entity)
|
507 |
+
|
508 |
+
processed_rows.append({
|
509 |
+
'Объект': entity,
|
510 |
+
'Заголовок': str(row.get('Заголовок', '')),
|
511 |
+
'Sentiment': sentiment,
|
512 |
+
'Event_Type': event_type,
|
513 |
+
'Event_Summary': event_summary,
|
514 |
+
'Текст': text[:1000]
|
515 |
+
})
|
516 |
+
|
517 |
+
except Exception as e:
|
518 |
+
logger.error(f"Error processing row {idx}: {str(e)}")
|
519 |
+
continue
|
520 |
+
|
521 |
+
# Create intermediate results
|
522 |
+
if processed_rows:
|
523 |
+
result_df = pd.DataFrame(processed_rows)
|
524 |
+
fig_sentiment, fig_events = create_visualizations(result_df)
|
525 |
+
yield (
|
526 |
+
result_df,
|
527 |
+
fig_sentiment,
|
528 |
+
fig_events,
|
529 |
+
f"Обработано {len(processed_rows)}/{total} строк"
|
530 |
+
)
|
531 |
+
|
532 |
+
# Cleanup GPU resources after batch
|
533 |
+
torch.cuda.empty_cache()
|
534 |
+
time.sleep(2)
|
535 |
|
536 |
+
if processed_rows:
|
537 |
+
final_df = pd.DataFrame(processed_rows)
|
538 |
+
fig_sentiment, fig_events = create_visualizations(final_df)
|
539 |
+
return final_df, fig_sentiment, fig_events, "Обработка завершена!"
|
540 |
+
else:
|
541 |
+
return None, None, None, "Нет обработанных данных"
|
542 |
+
|
543 |
except Exception as e:
|
544 |
error_msg = f"Ошибка анализа: {str(e)}"
|
545 |
logger.error(error_msg)
|
|
|
557 |
|
558 |
if __name__ == "__main__":
|
559 |
app = create_interface()
|
560 |
+
app.launch(share=True)
|