pentarosarium commited on
Commit
7b14387
·
1 Parent(s): a25369d
Files changed (1) hide show
  1. app.py +50 -34
app.py CHANGED
@@ -20,20 +20,19 @@ from tenacity import retry, stop_after_attempt, wait_exponential
20
  from typing import Optional
21
  from deep_translator import GoogleTranslator
22
  from googletrans import Translator as LegacyTranslator
 
23
  from transformers import (
24
- pipeline,
25
- AutoModelForSeq2SeqLM,
26
- AutoTokenizer,
27
- AutoModelForCausalLM # Added as alternative
28
  )
29
 
30
-
31
  class FallbackLLMSystem:
32
  def __init__(self):
33
  """Initialize fallback models for event detection and reasoning"""
34
  try:
35
  # Initialize MT5 model (multilingual T5)
36
- self.model_name = "google/mt5-small" # Smaller, efficient multilingual model
37
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
38
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
39
 
@@ -217,7 +216,6 @@ def process_file(uploaded_file, model_choice, translation_method=None):
217
  try:
218
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
219
  llm = init_langchain_llm(model_choice)
220
- fallback_llm = FallbackLLMSystem() # Initialize fallback system
221
  translator = TranslationSystem(batch_size=5)
222
 
223
  # Initialize all required columns first
@@ -326,7 +324,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
326
 
327
  except Exception as e:
328
  st.error(f"❌ Ошибка при обработке файла: {str(e)}")
329
- return df if df is not None else None
330
 
331
  def translate_reasoning_to_russian(llm, text):
332
  template = """
@@ -725,7 +723,7 @@ def create_output_file(df, uploaded_file, llm):
725
  return output
726
  def main():
727
  with st.sidebar:
728
- st.title("::: AI-анализ мониторинга новостей (v.3.45):::")
729
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
730
 
731
 
@@ -783,32 +781,50 @@ def main():
783
  uploaded_file = st.sidebar.file_uploader("Выбирайте Excel-файл", type="xlsx", key="unique_file_uploader")
784
 
785
  if uploaded_file is not None and st.session_state.processed_df is None:
786
- st.session_state.processed_df = process_file(
787
- uploaded_file,
788
- model_choice,
789
- translation_method = 'auto' # This parameter won't affect the translation method but keeps the interface consistent
790
- )
791
-
792
- st.subheader("Предпросмотр данных")
793
- preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head()
794
- st.dataframe(preview_df)
795
-
796
- # Add preview of Monitoring results
797
- st.subheader("Предпросмотр мониторинга событий и риск-факторов эмитентов")
798
- monitoring_df = st.session_state.processed_df[
799
- (st.session_state.processed_df['Event_Type'] != 'Нет') &
800
- (st.session_state.processed_df['Event_Type'].notna())
801
- ][['Объект', 'Заголовок', 'Event_Type', 'Event_Summary']].head()
802
-
803
- if len(monitoring_df) > 0:
804
- st.dataframe(monitoring_df)
805
- else:
806
- st.info("Не обнаружено значимых событий для мониторинга")
807
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
 
809
- analysis_df = create_analysis_data(st.session_state.processed_df)
810
- st.subheader("Анализ")
811
- st.dataframe(analysis_df)
812
 
813
 
814
  output = create_output_file(st.session_state.processed_df, uploaded_file, llm)
 
20
  from typing import Optional
21
  from deep_translator import GoogleTranslator
22
  from googletrans import Translator as LegacyTranslator
23
+ import torch
24
  from transformers import (
25
+ pipeline,
26
+ AutoModelForSeq2SeqLM,
27
+ AutoTokenizer
 
28
  )
29
 
 
30
  class FallbackLLMSystem:
31
  def __init__(self):
32
  """Initialize fallback models for event detection and reasoning"""
33
  try:
34
  # Initialize MT5 model (multilingual T5)
35
+ self.model_name = "google/mt5-small"
36
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
37
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
38
 
 
216
  try:
217
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
218
  llm = init_langchain_llm(model_choice)
 
219
  translator = TranslationSystem(batch_size=5)
220
 
221
  # Initialize all required columns first
 
324
 
325
  except Exception as e:
326
  st.error(f"❌ Ошибка при обработке файла: {str(e)}")
327
+ return None
328
 
329
  def translate_reasoning_to_russian(llm, text):
330
  template = """
 
723
  return output
724
  def main():
725
  with st.sidebar:
726
+ st.title("::: AI-анализ мониторинга новостей (v.3.46):::")
727
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
728
 
729
 
 
781
  uploaded_file = st.sidebar.file_uploader("Выбирайте Excel-файл", type="xlsx", key="unique_file_uploader")
782
 
783
  if uploaded_file is not None and st.session_state.processed_df is None:
784
+ try:
785
+ st.session_state.processed_df = process_file(
786
+ uploaded_file,
787
+ model_choice,
788
+ translation_method='auto'
789
+ )
790
+
791
+ if st.session_state.processed_df is not None:
792
+ # Show preview with safe column access
793
+ st.subheader("Предпросмотр данных")
794
+ preview_columns = ['Объект', 'Заголовок']
795
+ if 'Sentiment' in st.session_state.processed_df.columns:
796
+ preview_columns.append('Sentiment')
797
+ if 'Impact' in st.session_state.processed_df.columns:
798
+ preview_columns.append('Impact')
799
+
800
+ preview_df = st.session_state.processed_df[preview_columns].head()
801
+ st.dataframe(preview_df)
802
+
803
+ # Show monitoring results
804
+ st.subheader("Предпросмотр мониторинга событий и риск-факторов эмитентов")
805
+ if 'Event_Type' in st.session_state.processed_df.columns:
806
+ monitoring_df = st.session_state.processed_df[
807
+ (st.session_state.processed_df['Event_Type'] != 'Нет') &
808
+ (st.session_state.processed_df['Event_Type'].notna())
809
+ ][['Объект', 'Заголовок', 'Event_Type', 'Event_Summary']].head()
810
+
811
+ if len(monitoring_df) > 0:
812
+ st.dataframe(monitoring_df)
813
+ else:
814
+ st.info("Не обнаружено значимых событий для мониторинга")
815
+
816
+ # Create analysis data
817
+ analysis_df = create_analysis_data(st.session_state.processed_df)
818
+ st.subheader("Анализ")
819
+ st.dataframe(analysis_df)
820
+
821
+ else:
822
+ st.error("Ошибка при обработке файла")
823
+
824
+ except Exception as e:
825
+ st.error(f"Ошибка при обработке файла: {str(e)}")
826
+ st.session_state.processed_df = None
827
 
 
 
 
828
 
829
 
830
  output = create_output_file(st.session_state.processed_df, uploaded_file, llm)