pentarosarium commited on
Commit
29f8d5d
·
1 Parent(s): d88103f

3.43 +bloomz

Browse files
Files changed (1) hide show
  1. app.py +151 -17
app.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  from openpyxl import load_workbook
10
  from langchain.prompts import PromptTemplate
11
  from langchain_core.runnables import RunnablePassthrough
12
- from transformers import pipeline
13
  from io import StringIO, BytesIO
14
  import sys
15
  import contextlib
@@ -23,6 +23,115 @@ from deep_translator import GoogleTranslator
23
  from googletrans import Translator as LegacyTranslator
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  class TranslationSystem:
28
  def __init__(self, batch_size=5):
@@ -106,6 +215,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
106
  try:
107
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
108
  llm = init_langchain_llm(model_choice)
 
109
  translator = TranslationSystem(batch_size=5)
110
 
111
  # Initialize all required columns first
@@ -152,21 +262,41 @@ def process_file(uploaded_file, model_choice, translation_method=None):
152
  sentiment = analyze_sentiment(translated_text)
153
  df.at[idx, 'Sentiment'] = sentiment
154
 
155
- # Event detection
156
- event_type, event_summary = detect_events(
157
- llm,
158
- row['Выдержки из текста'],
159
- row['Объект']
160
- )
 
 
 
 
 
 
 
 
 
161
  df.at[idx, 'Event_Type'] = event_type
162
  df.at[idx, 'Event_Summary'] = event_summary
163
 
 
 
164
  if sentiment == "Negative":
165
- impact, reasoning = estimate_impact(
166
- llm,
167
- translated_text,
168
- row['Объект']
169
- )
 
 
 
 
 
 
 
 
 
170
  df.at[idx, 'Impact'] = impact
171
  df.at[idx, 'Reasoning'] = reasoning
172
 
@@ -385,12 +515,14 @@ def init_langchain_llm(model_choice):
385
  temperature=0.0
386
  )
387
 
 
 
 
388
  else: # Qwen API
389
  if 'ali_key' not in st.secrets:
390
  st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.")
391
  st.stop()
392
 
393
- # Using Qwen's API through DashScope
394
  return ChatOpenAI(
395
  base_url="https://dashscope.aliyuncs.com/api/v1",
396
  model="qwen-max",
@@ -402,6 +534,7 @@ def init_langchain_llm(model_choice):
402
  st.error(f"Error initializing the LLM: {str(e)}")
403
  st.stop()
404
 
 
405
  def estimate_impact(llm, news_text, entity):
406
  template = """
407
  Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months.
@@ -590,16 +723,17 @@ def create_output_file(df, uploaded_file, llm):
590
  return output
591
  def main():
592
  with st.sidebar:
593
- st.title("::: AI-анализ мониторинга новостей (v.3.42 ):::")
594
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
595
 
 
 
596
  model_choice = st.radio(
597
  "Выберите модель для анализа:",
598
- ["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max"],
599
  key="model_selector"
600
  )
601
-
602
-
603
  st.markdown(
604
  """
605
  Использованы технологии:
 
9
  from openpyxl import load_workbook
10
  from langchain.prompts import PromptTemplate
11
  from langchain_core.runnables import RunnablePassthrough
12
+ from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
13
  from io import StringIO, BytesIO
14
  import sys
15
  import contextlib
 
23
  from googletrans import Translator as LegacyTranslator
24
 
25
 
26
+ class FallbackLLMSystem:
27
+ def __init__(self):
28
+ """Initialize fallback models for event detection and reasoning"""
29
+ try:
30
+ # Initialize BLOOMZ model for Russian text processing
31
+ self.model_name = "bigscience/bloomz-560m" # Smaller version for efficiency
32
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
33
+ self.model = AutoModelForSeq2SeqGeneration.from_pretrained(self.model_name)
34
+
35
+ # Set device
36
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
37
+ self.model = self.model.to(self.device)
38
+
39
+ # Initialize pipeline
40
+ self.pipeline = pipeline(
41
+ "text2text-generation",
42
+ model=self.model,
43
+ tokenizer=self.tokenizer,
44
+ device=0 if self.device == "cuda" else -1
45
+ )
46
+
47
+ except Exception as e:
48
+ st.error(f"Error initializing fallback LLM system: {str(e)}")
49
+ raise
50
+
51
+ def detect_events(self, text, entity):
52
+ """Detect events using the fallback model"""
53
+ prompt = f"""Задача: Проанализируйте новость о компании и определите тип события.
54
+
55
+ Компания: {entity}
56
+ Новость: {text}
57
+
58
+ Возможные типы событий:
59
+ - Отчетность (публикация финансовых результатов)
60
+ - РЦБ (события с облигациями или акциями)
61
+ - Суд (судебные иски)
62
+ - Нет (нет значимых событий)
63
+
64
+ Формат ответа:
65
+ Тип: [тип события]
66
+ Краткое описание: [описание в двух предложениях]
67
+
68
+ Ответ:"""
69
+
70
+ try:
71
+ response = self.pipeline(
72
+ prompt,
73
+ max_length=200,
74
+ num_return_sequences=1,
75
+ do_sample=False
76
+ )[0]['generated_text']
77
+
78
+ # Parse response
79
+ event_type = "Нет"
80
+ summary = ""
81
+
82
+ if "Тип:" in response and "Краткое описание:" in response:
83
+ type_part, summary_part = response.split("Краткое описание:")
84
+ event_type = type_part.split("Тип:")[1].strip()
85
+ summary = summary_part.strip()
86
+
87
+ return event_type, summary
88
+
89
+ except Exception as e:
90
+ st.warning(f"Error in fallback event detection: {str(e)}")
91
+ return "Нет", ""
92
+
93
+ def estimate_impact(self, text, entity):
94
+ """Estimate impact using the fallback model"""
95
+ prompt = f"""Задача: Оцените влияние новости на компанию.
96
+
97
+ Компания: {entity}
98
+ Новость: {text}
99
+
100
+ Возможные категории влияния:
101
+ - Значительный риск убытков
102
+ - Умеренный риск убытков
103
+ - Незначительный риск убытков
104
+ - Вероятность прибыли
105
+ - Неопределенный эффект
106
+
107
+ Формат ответа:
108
+ Impact: [категория]
109
+ Reasoning: [объяснение в двух предложениях]
110
+
111
+ Ответ:"""
112
+
113
+ try:
114
+ response = self.pipeline(
115
+ prompt,
116
+ max_length=200,
117
+ num_return_sequences=1,
118
+ do_sample=False
119
+ )[0]['generated_text']
120
+
121
+ impact = "Неопределенный эффект"
122
+ reasoning = "Не удалось определить влияние"
123
+
124
+ if "Impact:" in response and "Reasoning:" in response:
125
+ impact_part, reasoning_part = response.split("Reasoning:")
126
+ impact = impact_part.split("Impact:")[1].strip()
127
+ reasoning = reasoning_part.strip()
128
+
129
+ return impact, reasoning
130
+
131
+ except Exception as e:
132
+ st.warning(f"Error in fallback impact estimation: {str(e)}")
133
+ return "Неопределенный эффект", "Ошибка анализа"
134
+
135
 
136
  class TranslationSystem:
137
  def __init__(self, batch_size=5):
 
215
  try:
216
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
217
  llm = init_langchain_llm(model_choice)
218
+ fallback_llm = FallbackLLMSystem() # Initialize fallback system
219
  translator = TranslationSystem(batch_size=5)
220
 
221
  # Initialize all required columns first
 
262
  sentiment = analyze_sentiment(translated_text)
263
  df.at[idx, 'Sentiment'] = sentiment
264
 
265
+ try:
266
+ # Try with primary LLM
267
+ event_type, event_summary = detect_events(
268
+ llm,
269
+ row['Выдержки из текста'],
270
+ row['Объект']
271
+ )
272
+ except Exception as e:
273
+ if 'rate limit' in str(e).lower():
274
+ st.warning("Rate limit reached. Using fallback model for event detection.")
275
+ event_type, event_summary = fallback_llm.detect_events(
276
+ row['Выдержки из текста'],
277
+ row['Объект']
278
+ )
279
+
280
  df.at[idx, 'Event_Type'] = event_type
281
  df.at[idx, 'Event_Summary'] = event_summary
282
 
283
+
284
+ # Similar for impact estimation
285
  if sentiment == "Negative":
286
+ try:
287
+ impact, reasoning = estimate_impact(
288
+ llm,
289
+ translated_text,
290
+ row['Объект']
291
+ )
292
+ except Exception as e:
293
+ if 'rate limit' in str(e).lower():
294
+ st.warning("Rate limit reached. Using fallback model for impact estimation.")
295
+ impact, reasoning = fallback_llm.estimate_impact(
296
+ translated_text,
297
+ row['Объект']
298
+ )
299
+
300
  df.at[idx, 'Impact'] = impact
301
  df.at[idx, 'Reasoning'] = reasoning
302
 
 
515
  temperature=0.0
516
  )
517
 
518
+ elif model_choice == "Local-BLOOMZ": # Added new option
519
+ return FallbackLLMSystem()
520
+
521
  else: # Qwen API
522
  if 'ali_key' not in st.secrets:
523
  st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.")
524
  st.stop()
525
 
 
526
  return ChatOpenAI(
527
  base_url="https://dashscope.aliyuncs.com/api/v1",
528
  model="qwen-max",
 
534
  st.error(f"Error initializing the LLM: {str(e)}")
535
  st.stop()
536
 
537
+
538
  def estimate_impact(llm, news_text, entity):
539
  template = """
540
  Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months.
 
723
  return output
724
  def main():
725
  with st.sidebar:
726
+ st.title("::: AI-анализ мониторинга новостей (v.3.43 ):::")
727
  st.subheader("по материалам СКАН-ИНТЕРФАКС ")
728
 
729
+
730
+
731
  model_choice = st.radio(
732
  "Выберите модель для анализа:",
733
+ ["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max", "Local-BLOOMZ"],
734
  key="model_selector"
735
  )
736
+
 
737
  st.markdown(
738
  """
739
  Использованы технологии: