Spaces:
Running
Running
Commit
·
59bee7f
1
Parent(s):
78ed556
3.56
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ import time
|
|
19 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
20 |
from typing import Optional
|
21 |
import torch
|
22 |
-
|
23 |
from transformers import (
|
24 |
pipeline,
|
25 |
AutoModelForSeq2SeqLM,
|
@@ -293,14 +292,14 @@ class ProcessingUI:
|
|
293 |
# Create control buttons
|
294 |
col1, col2 = st.columns(2)
|
295 |
with col1:
|
296 |
-
if st.button("⏸️
|
297 |
if st.session_state.control.is_paused():
|
298 |
st.session_state.control.resume()
|
299 |
else:
|
300 |
st.session_state.control.pause()
|
301 |
|
302 |
with col2:
|
303 |
-
if st.button("⏹️
|
304 |
st.session_state.control.stop()
|
305 |
|
306 |
self.progress_bar = st.progress(0)
|
@@ -309,7 +308,7 @@ class ProcessingUI:
|
|
309 |
def update_progress(self, current, total):
|
310 |
progress = current / total
|
311 |
self.progress_bar.progress(progress)
|
312 |
-
self.status.text(f"
|
313 |
|
314 |
def show_negative(self, entity, headline, analysis, impact=None):
|
315 |
with st.session_state.negative_container:
|
@@ -447,6 +446,29 @@ class TranslationSystem:
|
|
447 |
st.warning(f"Translation error: {str(e)}")
|
448 |
return text
|
449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
|
451 |
|
452 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
@@ -484,7 +506,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
484 |
df = df.groupby('Объект', group_keys=False).apply(
|
485 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
486 |
).reset_index(drop=True)
|
487 |
-
st.write(f"
|
488 |
|
489 |
# Process rows
|
490 |
total_rows = len(df)
|
@@ -493,12 +515,12 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
493 |
for idx, row in df.iterrows():
|
494 |
# Check for stop/pause
|
495 |
if st.session_state.control.is_stopped():
|
496 |
-
st.warning("
|
497 |
break
|
498 |
|
499 |
st.session_state.control.wait_if_paused()
|
500 |
if st.session_state.control.is_paused():
|
501 |
-
st.info("
|
502 |
continue
|
503 |
|
504 |
try:
|
@@ -538,7 +560,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
538 |
impact = "Неопределенный эффект"
|
539 |
reasoning = "Error in impact estimation"
|
540 |
if 'rate limit' in str(e).lower():
|
541 |
-
st.warning("
|
542 |
|
543 |
df.at[idx, 'Impact'] = impact
|
544 |
df.at[idx, 'Reasoning'] = reasoning
|
@@ -556,18 +578,18 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
556 |
ui.update_progress(processed_rows, total_rows)
|
557 |
|
558 |
except Exception as e:
|
559 |
-
st.warning(f"
|
560 |
continue
|
561 |
|
562 |
time.sleep(0.1)
|
563 |
|
564 |
# Handle stopped processing
|
565 |
if st.session_state.control.is_stopped() and len(df) > 0:
|
566 |
-
st.warning("
|
567 |
-
if st.button("
|
568 |
output = create_output_file(df, uploaded_file, llm)
|
569 |
st.download_button(
|
570 |
-
label="📊
|
571 |
data=output,
|
572 |
file_name="partial_analysis.xlsx",
|
573 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
@@ -576,7 +598,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
576 |
return df
|
577 |
|
578 |
except Exception as e:
|
579 |
-
st.error(f"
|
580 |
return None
|
581 |
|
582 |
def translate_reasoning_to_russian(llm, text):
|
@@ -940,12 +962,12 @@ def main():
|
|
940 |
st.set_page_config(layout="wide")
|
941 |
|
942 |
with st.sidebar:
|
943 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
944 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
945 |
|
946 |
model_choice = st.radio(
|
947 |
"Выберите модель для анализа:",
|
948 |
-
["Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini"
|
949 |
key="model_selector",
|
950 |
help="Выберите модель для анализа новостей"
|
951 |
)
|
@@ -978,7 +1000,7 @@ def main():
|
|
978 |
|
979 |
with col1:
|
980 |
# Area for real-time updates
|
981 |
-
st.subheader("
|
982 |
st.markdown("""
|
983 |
<style>
|
984 |
.stProgress .st-bo {
|
@@ -1001,15 +1023,15 @@ def main():
|
|
1001 |
|
1002 |
with col2:
|
1003 |
# Area for statistics
|
1004 |
-
st.subheader("
|
1005 |
if st.session_state.processed_df is not None:
|
1006 |
-
st.metric("
|
1007 |
-
st.metric("
|
1008 |
len(st.session_state.processed_df[
|
1009 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
1010 |
])
|
1011 |
)
|
1012 |
-
st.metric("
|
1013 |
len(st.session_state.processed_df[
|
1014 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
1015 |
])
|
@@ -1030,29 +1052,29 @@ def main():
|
|
1030 |
elapsed_time = format_elapsed_time(end_time - start_time)
|
1031 |
|
1032 |
# Show results
|
1033 |
-
st.subheader("
|
1034 |
|
1035 |
# Display statistics
|
1036 |
stats_cols = st.columns(4)
|
1037 |
with stats_cols[0]:
|
1038 |
-
st.metric("
|
1039 |
with stats_cols[1]:
|
1040 |
-
st.metric("
|
1041 |
len(st.session_state.processed_df[
|
1042 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
1043 |
])
|
1044 |
)
|
1045 |
with stats_cols[2]:
|
1046 |
-
st.metric("
|
1047 |
len(st.session_state.processed_df[
|
1048 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
1049 |
])
|
1050 |
)
|
1051 |
with stats_cols[3]:
|
1052 |
-
st.metric("
|
1053 |
|
1054 |
# Show data previews
|
1055 |
-
with st.expander("📊
|
1056 |
preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
|
1057 |
st.dataframe(
|
1058 |
st.session_state.processed_df[preview_cols],
|
@@ -1067,15 +1089,15 @@ def main():
|
|
1067 |
)
|
1068 |
|
1069 |
st.download_button(
|
1070 |
-
label="📥
|
1071 |
data=output,
|
1072 |
-
file_name="
|
1073 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
1074 |
key='download_button'
|
1075 |
)
|
1076 |
|
1077 |
except Exception as e:
|
1078 |
-
st.error(f"
|
1079 |
st.session_state.processed_df = None
|
1080 |
|
1081 |
|
|
|
19 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
20 |
from typing import Optional
|
21 |
import torch
|
|
|
22 |
from transformers import (
|
23 |
pipeline,
|
24 |
AutoModelForSeq2SeqLM,
|
|
|
292 |
# Create control buttons
|
293 |
col1, col2 = st.columns(2)
|
294 |
with col1:
|
295 |
+
if st.button("⏸️ Пауза/Возобновить" if not st.session_state.control.is_paused() else "▶️ Возобновить", key="pause_button"):
|
296 |
if st.session_state.control.is_paused():
|
297 |
st.session_state.control.resume()
|
298 |
else:
|
299 |
st.session_state.control.pause()
|
300 |
|
301 |
with col2:
|
302 |
+
if st.button("⏹️ Стоп и всё", key="stop_button"):
|
303 |
st.session_state.control.stop()
|
304 |
|
305 |
self.progress_bar = st.progress(0)
|
|
|
308 |
def update_progress(self, current, total):
|
309 |
progress = current / total
|
310 |
self.progress_bar.progress(progress)
|
311 |
+
self.status.text(f"Обрабатываем {current} из {total} сообщений...")
|
312 |
|
313 |
def show_negative(self, entity, headline, analysis, impact=None):
|
314 |
with st.session_state.negative_container:
|
|
|
446 |
st.warning(f"Translation error: {str(e)}")
|
447 |
return text
|
448 |
|
449 |
+
def _split_into_chunks(self, text, max_length):
|
450 |
+
sentences = []
|
451 |
+
for s in text.replace('!', '.').replace('?', '.').split('.'):
|
452 |
+
s = s.strip()
|
453 |
+
if s:
|
454 |
+
if len(s) > max_length:
|
455 |
+
# Split long sentences into smaller chunks
|
456 |
+
words = s.split()
|
457 |
+
current_chunk = []
|
458 |
+
current_length = 0
|
459 |
+
for word in words:
|
460 |
+
if current_length + len(word) > max_length:
|
461 |
+
sentences.append(' '.join(current_chunk))
|
462 |
+
current_chunk = [word]
|
463 |
+
current_length = len(word)
|
464 |
+
else:
|
465 |
+
current_chunk.append(word)
|
466 |
+
current_length += len(word) + 1
|
467 |
+
if current_chunk:
|
468 |
+
sentences.append(' '.join(current_chunk))
|
469 |
+
else:
|
470 |
+
sentences.append(s)
|
471 |
+
|
472 |
|
473 |
|
474 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
|
|
506 |
df = df.groupby('Объект', group_keys=False).apply(
|
507 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
508 |
).reset_index(drop=True)
|
509 |
+
st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
|
510 |
|
511 |
# Process rows
|
512 |
total_rows = len(df)
|
|
|
515 |
for idx, row in df.iterrows():
|
516 |
# Check for stop/pause
|
517 |
if st.session_state.control.is_stopped():
|
518 |
+
st.warning("Обработку остановили")
|
519 |
break
|
520 |
|
521 |
st.session_state.control.wait_if_paused()
|
522 |
if st.session_state.control.is_paused():
|
523 |
+
st.info("Обработка на паузе. Можно возобновить.")
|
524 |
continue
|
525 |
|
526 |
try:
|
|
|
560 |
impact = "Неопределенный эффект"
|
561 |
reasoning = "Error in impact estimation"
|
562 |
if 'rate limit' in str(e).lower():
|
563 |
+
st.warning("Лимит запросов исчерпался. Иду на fallback.")
|
564 |
|
565 |
df.at[idx, 'Impact'] = impact
|
566 |
df.at[idx, 'Reasoning'] = reasoning
|
|
|
578 |
ui.update_progress(processed_rows, total_rows)
|
579 |
|
580 |
except Exception as e:
|
581 |
+
st.warning(f"Ошибка в обработке ряда {idx + 1}: {str(e)}")
|
582 |
continue
|
583 |
|
584 |
time.sleep(0.1)
|
585 |
|
586 |
# Handle stopped processing
|
587 |
if st.session_state.control.is_stopped() and len(df) > 0:
|
588 |
+
st.warning("Обработку остановили. Показываю частичные результаты.")
|
589 |
+
if st.button("Скачать частичный результат"):
|
590 |
output = create_output_file(df, uploaded_file, llm)
|
591 |
st.download_button(
|
592 |
+
label="📊 Скачать частичный результат",
|
593 |
data=output,
|
594 |
file_name="partial_analysis.xlsx",
|
595 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
598 |
return df
|
599 |
|
600 |
except Exception as e:
|
601 |
+
st.error(f"Ошибка в обработке файла: {str(e)}")
|
602 |
return None
|
603 |
|
604 |
def translate_reasoning_to_russian(llm, text):
|
|
|
962 |
st.set_page_config(layout="wide")
|
963 |
|
964 |
with st.sidebar:
|
965 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.56):::")
|
966 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
967 |
|
968 |
model_choice = st.radio(
|
969 |
"Выберите модель для анализа:",
|
970 |
+
["Local-MT5", "Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini"],
|
971 |
key="model_selector",
|
972 |
help="Выберите модель для анализа новостей"
|
973 |
)
|
|
|
1000 |
|
1001 |
with col1:
|
1002 |
# Area for real-time updates
|
1003 |
+
st.subheader("Что найдено, сообщаю:")
|
1004 |
st.markdown("""
|
1005 |
<style>
|
1006 |
.stProgress .st-bo {
|
|
|
1023 |
|
1024 |
with col2:
|
1025 |
# Area for statistics
|
1026 |
+
st.subheader("Статистика")
|
1027 |
if st.session_state.processed_df is not None:
|
1028 |
+
st.metric("Всего статей", len(st.session_state.processed_df))
|
1029 |
+
st.metric("Из них негативных",
|
1030 |
len(st.session_state.processed_df[
|
1031 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
1032 |
])
|
1033 |
)
|
1034 |
+
st.metric("Событий обнаружено",
|
1035 |
len(st.session_state.processed_df[
|
1036 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
1037 |
])
|
|
|
1052 |
elapsed_time = format_elapsed_time(end_time - start_time)
|
1053 |
|
1054 |
# Show results
|
1055 |
+
st.subheader("Итого по результатам")
|
1056 |
|
1057 |
# Display statistics
|
1058 |
stats_cols = st.columns(4)
|
1059 |
with stats_cols[0]:
|
1060 |
+
st.metric("Всего обработано", len(st.session_state.processed_df))
|
1061 |
with stats_cols[1]:
|
1062 |
+
st.metric("Негативных",
|
1063 |
len(st.session_state.processed_df[
|
1064 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
1065 |
])
|
1066 |
)
|
1067 |
with stats_cols[2]:
|
1068 |
+
st.metric("Событий обнаружено",
|
1069 |
len(st.session_state.processed_df[
|
1070 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
1071 |
])
|
1072 |
)
|
1073 |
with stats_cols[3]:
|
1074 |
+
st.metric("Время обработки составило", elapsed_time)
|
1075 |
|
1076 |
# Show data previews
|
1077 |
+
with st.expander("📊 Предпросмотр данных", expanded=True):
|
1078 |
preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
|
1079 |
st.dataframe(
|
1080 |
st.session_state.processed_df[preview_cols],
|
|
|
1089 |
)
|
1090 |
|
1091 |
st.download_button(
|
1092 |
+
label="📥 Полный отчет - загрузить",
|
1093 |
data=output,
|
1094 |
+
file_name="результаты_анализа.xlsx",
|
1095 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
1096 |
key='download_button'
|
1097 |
)
|
1098 |
|
1099 |
except Exception as e:
|
1100 |
+
st.error(f"Ошибочка в обработке файла: {str(e)}")
|
1101 |
st.session_state.processed_df = None
|
1102 |
|
1103 |
|