pentarosarium commited on
Commit
59bee7f
·
1 Parent(s): 78ed556
Files changed (1) hide show
  1. app.py +51 -29
app.py CHANGED
@@ -19,7 +19,6 @@ import time
19
  from tenacity import retry, stop_after_attempt, wait_exponential
20
  from typing import Optional
21
  import torch
22
-
23
  from transformers import (
24
  pipeline,
25
  AutoModelForSeq2SeqLM,
@@ -293,14 +292,14 @@ class ProcessingUI:
293
  # Create control buttons
294
  col1, col2 = st.columns(2)
295
  with col1:
296
- if st.button("⏸️ Pause/Resume" if not st.session_state.control.is_paused() else "▶️ Resume", key="pause_button"):
297
  if st.session_state.control.is_paused():
298
  st.session_state.control.resume()
299
  else:
300
  st.session_state.control.pause()
301
 
302
  with col2:
303
- if st.button("⏹️ Stop", key="stop_button"):
304
  st.session_state.control.stop()
305
 
306
  self.progress_bar = st.progress(0)
@@ -309,7 +308,7 @@ class ProcessingUI:
309
  def update_progress(self, current, total):
310
  progress = current / total
311
  self.progress_bar.progress(progress)
312
- self.status.text(f"Processing {current} of {total} items...")
313
 
314
  def show_negative(self, entity, headline, analysis, impact=None):
315
  with st.session_state.negative_container:
@@ -447,6 +446,29 @@ class TranslationSystem:
447
  st.warning(f"Translation error: {str(e)}")
448
  return text
449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
 
452
  def process_file(uploaded_file, model_choice, translation_method=None):
@@ -484,7 +506,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
484
  df = df.groupby('Объект', group_keys=False).apply(
485
  lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
486
  ).reset_index(drop=True)
487
- st.write(f"Removed {original_count - len(df)} duplicates.")
488
 
489
  # Process rows
490
  total_rows = len(df)
@@ -493,12 +515,12 @@ def process_file(uploaded_file, model_choice, translation_method=None):
493
  for idx, row in df.iterrows():
494
  # Check for stop/pause
495
  if st.session_state.control.is_stopped():
496
- st.warning("Processing stopped by user")
497
  break
498
 
499
  st.session_state.control.wait_if_paused()
500
  if st.session_state.control.is_paused():
501
- st.info("Processing paused... Click Resume to continue")
502
  continue
503
 
504
  try:
@@ -538,7 +560,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
538
  impact = "Неопределенный эффект"
539
  reasoning = "Error in impact estimation"
540
  if 'rate limit' in str(e).lower():
541
- st.warning("Rate limit reached. Using fallback values.")
542
 
543
  df.at[idx, 'Impact'] = impact
544
  df.at[idx, 'Reasoning'] = reasoning
@@ -556,18 +578,18 @@ def process_file(uploaded_file, model_choice, translation_method=None):
556
  ui.update_progress(processed_rows, total_rows)
557
 
558
  except Exception as e:
559
- st.warning(f"Error processing row {idx + 1}: {str(e)}")
560
  continue
561
 
562
  time.sleep(0.1)
563
 
564
  # Handle stopped processing
565
  if st.session_state.control.is_stopped() and len(df) > 0:
566
- st.warning("Processing was stopped. Showing partial results.")
567
- if st.button("Download Partial Results"):
568
  output = create_output_file(df, uploaded_file, llm)
569
  st.download_button(
570
- label="📊 Download Partial Results",
571
  data=output,
572
  file_name="partial_analysis.xlsx",
573
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
@@ -576,7 +598,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
576
  return df
577
 
578
  except Exception as e:
579
- st.error(f"Error processing file: {str(e)}")
580
  return None
581
 
582
  def translate_reasoning_to_russian(llm, text):
@@ -940,12 +962,12 @@ def main():
940
  st.set_page_config(layout="wide")
941
 
942
  with st.sidebar:
943
- st.title("::: AI-анализ мониторинга новостей (v.3.54):::")
944
  st.subheader("по материалам СКАН-ИНТЕРФАКС")
945
 
946
  model_choice = st.radio(
947
  "Выберите модель для анализа:",
948
- ["Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Local-MT5"],
949
  key="model_selector",
950
  help="Выберите модель для анализа новостей"
951
  )
@@ -978,7 +1000,7 @@ def main():
978
 
979
  with col1:
980
  # Area for real-time updates
981
- st.subheader("Live Updates")
982
  st.markdown("""
983
  <style>
984
  .stProgress .st-bo {
@@ -1001,15 +1023,15 @@ def main():
1001
 
1002
  with col2:
1003
  # Area for statistics
1004
- st.subheader("Statistics")
1005
  if st.session_state.processed_df is not None:
1006
- st.metric("Total Items", len(st.session_state.processed_df))
1007
- st.metric("Negative Items",
1008
  len(st.session_state.processed_df[
1009
  st.session_state.processed_df['Sentiment'] == 'Negative'
1010
  ])
1011
  )
1012
- st.metric("Events Detected",
1013
  len(st.session_state.processed_df[
1014
  st.session_state.processed_df['Event_Type'] != 'Нет'
1015
  ])
@@ -1030,29 +1052,29 @@ def main():
1030
  elapsed_time = format_elapsed_time(end_time - start_time)
1031
 
1032
  # Show results
1033
- st.subheader("Results Summary")
1034
 
1035
  # Display statistics
1036
  stats_cols = st.columns(4)
1037
  with stats_cols[0]:
1038
- st.metric("Total Processed", len(st.session_state.processed_df))
1039
  with stats_cols[1]:
1040
- st.metric("Negative Items",
1041
  len(st.session_state.processed_df[
1042
  st.session_state.processed_df['Sentiment'] == 'Negative'
1043
  ])
1044
  )
1045
  with stats_cols[2]:
1046
- st.metric("Events Detected",
1047
  len(st.session_state.processed_df[
1048
  st.session_state.processed_df['Event_Type'] != 'Нет'
1049
  ])
1050
  )
1051
  with stats_cols[3]:
1052
- st.metric("Processing Time", elapsed_time)
1053
 
1054
  # Show data previews
1055
- with st.expander("📊 Data Preview", expanded=True):
1056
  preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
1057
  st.dataframe(
1058
  st.session_state.processed_df[preview_cols],
@@ -1067,15 +1089,15 @@ def main():
1067
  )
1068
 
1069
  st.download_button(
1070
- label="📥 Download Full Report",
1071
  data=output,
1072
- file_name="analysis_report.xlsx",
1073
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1074
  key='download_button'
1075
  )
1076
 
1077
  except Exception as e:
1078
- st.error(f"Error processing file: {str(e)}")
1079
  st.session_state.processed_df = None
1080
 
1081
 
 
19
  from tenacity import retry, stop_after_attempt, wait_exponential
20
  from typing import Optional
21
  import torch
 
22
  from transformers import (
23
  pipeline,
24
  AutoModelForSeq2SeqLM,
 
292
  # Create control buttons
293
  col1, col2 = st.columns(2)
294
  with col1:
295
+ if st.button("⏸️ Пауза/Возобновить" if not st.session_state.control.is_paused() else "▶️ Возобновить", key="pause_button"):
296
  if st.session_state.control.is_paused():
297
  st.session_state.control.resume()
298
  else:
299
  st.session_state.control.pause()
300
 
301
  with col2:
302
+ if st.button("⏹️ Стоп и всё", key="stop_button"):
303
  st.session_state.control.stop()
304
 
305
  self.progress_bar = st.progress(0)
 
308
  def update_progress(self, current, total):
309
  progress = current / total
310
  self.progress_bar.progress(progress)
311
+ self.status.text(f"Обрабатываем {current} из {total} сообщений...")
312
 
313
  def show_negative(self, entity, headline, analysis, impact=None):
314
  with st.session_state.negative_container:
 
446
  st.warning(f"Translation error: {str(e)}")
447
  return text
448
 
449
+ def _split_into_chunks(self, text, max_length):
450
+ sentences = []
451
+ for s in text.replace('!', '.').replace('?', '.').split('.'):
452
+ s = s.strip()
453
+ if s:
454
+ if len(s) > max_length:
455
+ # Split long sentences into smaller chunks
456
+ words = s.split()
457
+ current_chunk = []
458
+ current_length = 0
459
+ for word in words:
460
+ if current_length + len(word) > max_length:
461
+ sentences.append(' '.join(current_chunk))
462
+ current_chunk = [word]
463
+ current_length = len(word)
464
+ else:
465
+ current_chunk.append(word)
466
+ current_length += len(word) + 1
467
+ if current_chunk:
468
+ sentences.append(' '.join(current_chunk))
469
+ else:
470
+ sentences.append(s)
471
+
472
 
473
 
474
  def process_file(uploaded_file, model_choice, translation_method=None):
 
506
  df = df.groupby('Объект', group_keys=False).apply(
507
  lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
508
  ).reset_index(drop=True)
509
+ st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
510
 
511
  # Process rows
512
  total_rows = len(df)
 
515
  for idx, row in df.iterrows():
516
  # Check for stop/pause
517
  if st.session_state.control.is_stopped():
518
+ st.warning("Обработку остановили")
519
  break
520
 
521
  st.session_state.control.wait_if_paused()
522
  if st.session_state.control.is_paused():
523
+ st.info("Обработка на паузе. Можно возобновить.")
524
  continue
525
 
526
  try:
 
560
  impact = "Неопределенный эффект"
561
  reasoning = "Error in impact estimation"
562
  if 'rate limit' in str(e).lower():
563
+ st.warning("Лимит запросов исчерпался. Иду на fallback.")
564
 
565
  df.at[idx, 'Impact'] = impact
566
  df.at[idx, 'Reasoning'] = reasoning
 
578
  ui.update_progress(processed_rows, total_rows)
579
 
580
  except Exception as e:
581
+ st.warning(f"Ошибка в обработке ряда {idx + 1}: {str(e)}")
582
  continue
583
 
584
  time.sleep(0.1)
585
 
586
  # Handle stopped processing
587
  if st.session_state.control.is_stopped() and len(df) > 0:
588
+ st.warning("Обработку остановили. Показываю частичные результаты.")
589
+ if st.button("Скачать частичный результат"):
590
  output = create_output_file(df, uploaded_file, llm)
591
  st.download_button(
592
+ label="📊 Скачать частичный результат",
593
  data=output,
594
  file_name="partial_analysis.xlsx",
595
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
 
598
  return df
599
 
600
  except Exception as e:
601
+ st.error(f"Ошибка в обработке файла: {str(e)}")
602
  return None
603
 
604
  def translate_reasoning_to_russian(llm, text):
 
962
  st.set_page_config(layout="wide")
963
 
964
  with st.sidebar:
965
+ st.title("::: AI-анализ мониторинга новостей (v.3.56):::")
966
  st.subheader("по материалам СКАН-ИНТЕРФАКС")
967
 
968
  model_choice = st.radio(
969
  "Выберите модель для анализа:",
970
+ ["Local-MT5", "Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini"],
971
  key="model_selector",
972
  help="Выберите модель для анализа новостей"
973
  )
 
1000
 
1001
  with col1:
1002
  # Area for real-time updates
1003
+ st.subheader("Что найдено, сообщаю:")
1004
  st.markdown("""
1005
  <style>
1006
  .stProgress .st-bo {
 
1023
 
1024
  with col2:
1025
  # Area for statistics
1026
+ st.subheader("Статистика")
1027
  if st.session_state.processed_df is not None:
1028
+ st.metric("Всего статей", len(st.session_state.processed_df))
1029
+ st.metric("Из них негативных",
1030
  len(st.session_state.processed_df[
1031
  st.session_state.processed_df['Sentiment'] == 'Negative'
1032
  ])
1033
  )
1034
+ st.metric("Событий обнаружено",
1035
  len(st.session_state.processed_df[
1036
  st.session_state.processed_df['Event_Type'] != 'Нет'
1037
  ])
 
1052
  elapsed_time = format_elapsed_time(end_time - start_time)
1053
 
1054
  # Show results
1055
+ st.subheader("Итого по результатам")
1056
 
1057
  # Display statistics
1058
  stats_cols = st.columns(4)
1059
  with stats_cols[0]:
1060
+ st.metric("Всего обработано", len(st.session_state.processed_df))
1061
  with stats_cols[1]:
1062
+ st.metric("Негативных",
1063
  len(st.session_state.processed_df[
1064
  st.session_state.processed_df['Sentiment'] == 'Negative'
1065
  ])
1066
  )
1067
  with stats_cols[2]:
1068
+ st.metric("Событий обнаружено",
1069
  len(st.session_state.processed_df[
1070
  st.session_state.processed_df['Event_Type'] != 'Нет'
1071
  ])
1072
  )
1073
  with stats_cols[3]:
1074
+ st.metric("Время обработки составило", elapsed_time)
1075
 
1076
  # Show data previews
1077
+ with st.expander("📊 Предпросмотр данных", expanded=True):
1078
  preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
1079
  st.dataframe(
1080
  st.session_state.processed_df[preview_cols],
 
1089
  )
1090
 
1091
  st.download_button(
1092
+ label="📥 Полный отчет - загрузить",
1093
  data=output,
1094
+ file_name="результаты_анализа.xlsx",
1095
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1096
  key='download_button'
1097
  )
1098
 
1099
  except Exception as e:
1100
+ st.error(f"Ошибочка в обработке файла: {str(e)}")
1101
  st.session_state.processed_df = None
1102
 
1103