Spaces:
Running
Running
Commit
·
29f8d5d
1
Parent(s):
d88103f
3.43 +bloomz
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import os
|
|
9 |
from openpyxl import load_workbook
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
from langchain_core.runnables import RunnablePassthrough
|
12 |
-
from transformers import pipeline
|
13 |
from io import StringIO, BytesIO
|
14 |
import sys
|
15 |
import contextlib
|
@@ -23,6 +23,115 @@ from deep_translator import GoogleTranslator
|
|
23 |
from googletrans import Translator as LegacyTranslator
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
class TranslationSystem:
|
28 |
def __init__(self, batch_size=5):
|
@@ -106,6 +215,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
106 |
try:
|
107 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
108 |
llm = init_langchain_llm(model_choice)
|
|
|
109 |
translator = TranslationSystem(batch_size=5)
|
110 |
|
111 |
# Initialize all required columns first
|
@@ -152,21 +262,41 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
152 |
sentiment = analyze_sentiment(translated_text)
|
153 |
df.at[idx, 'Sentiment'] = sentiment
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
df.at[idx, 'Event_Type'] = event_type
|
162 |
df.at[idx, 'Event_Summary'] = event_summary
|
163 |
|
|
|
|
|
164 |
if sentiment == "Negative":
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
df.at[idx, 'Impact'] = impact
|
171 |
df.at[idx, 'Reasoning'] = reasoning
|
172 |
|
@@ -385,12 +515,14 @@ def init_langchain_llm(model_choice):
|
|
385 |
temperature=0.0
|
386 |
)
|
387 |
|
|
|
|
|
|
|
388 |
else: # Qwen API
|
389 |
if 'ali_key' not in st.secrets:
|
390 |
st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.")
|
391 |
st.stop()
|
392 |
|
393 |
-
# Using Qwen's API through DashScope
|
394 |
return ChatOpenAI(
|
395 |
base_url="https://dashscope.aliyuncs.com/api/v1",
|
396 |
model="qwen-max",
|
@@ -402,6 +534,7 @@ def init_langchain_llm(model_choice):
|
|
402 |
st.error(f"Error initializing the LLM: {str(e)}")
|
403 |
st.stop()
|
404 |
|
|
|
405 |
def estimate_impact(llm, news_text, entity):
|
406 |
template = """
|
407 |
Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months.
|
@@ -590,16 +723,17 @@ def create_output_file(df, uploaded_file, llm):
|
|
590 |
return output
|
591 |
def main():
|
592 |
with st.sidebar:
|
593 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
594 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
595 |
|
|
|
|
|
596 |
model_choice = st.radio(
|
597 |
"Выберите модель для анализа:",
|
598 |
-
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max"],
|
599 |
key="model_selector"
|
600 |
)
|
601 |
-
|
602 |
-
|
603 |
st.markdown(
|
604 |
"""
|
605 |
Использованы технологии:
|
|
|
9 |
from openpyxl import load_workbook
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
from langchain_core.runnables import RunnablePassthrough
|
12 |
+
from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
|
13 |
from io import StringIO, BytesIO
|
14 |
import sys
|
15 |
import contextlib
|
|
|
23 |
from googletrans import Translator as LegacyTranslator
|
24 |
|
25 |
|
26 |
+
class FallbackLLMSystem:
|
27 |
+
def __init__(self):
|
28 |
+
"""Initialize fallback models for event detection and reasoning"""
|
29 |
+
try:
|
30 |
+
# Initialize BLOOMZ model for Russian text processing
|
31 |
+
self.model_name = "bigscience/bloomz-560m" # Smaller version for efficiency
|
32 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
33 |
+
self.model = AutoModelForSeq2SeqGeneration.from_pretrained(self.model_name)
|
34 |
+
|
35 |
+
# Set device
|
36 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
37 |
+
self.model = self.model.to(self.device)
|
38 |
+
|
39 |
+
# Initialize pipeline
|
40 |
+
self.pipeline = pipeline(
|
41 |
+
"text2text-generation",
|
42 |
+
model=self.model,
|
43 |
+
tokenizer=self.tokenizer,
|
44 |
+
device=0 if self.device == "cuda" else -1
|
45 |
+
)
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
st.error(f"Error initializing fallback LLM system: {str(e)}")
|
49 |
+
raise
|
50 |
+
|
51 |
+
def detect_events(self, text, entity):
|
52 |
+
"""Detect events using the fallback model"""
|
53 |
+
prompt = f"""Задача: Проанализируйте новость о компании и определите тип события.
|
54 |
+
|
55 |
+
Компания: {entity}
|
56 |
+
Новость: {text}
|
57 |
+
|
58 |
+
Возможные типы событий:
|
59 |
+
- Отчетность (публикация финансовых результатов)
|
60 |
+
- РЦБ (события с облигациями или акциями)
|
61 |
+
- Суд (судебные иски)
|
62 |
+
- Нет (нет значимых событий)
|
63 |
+
|
64 |
+
Формат ответа:
|
65 |
+
Тип: [тип события]
|
66 |
+
Краткое описание: [описание в двух предложениях]
|
67 |
+
|
68 |
+
Ответ:"""
|
69 |
+
|
70 |
+
try:
|
71 |
+
response = self.pipeline(
|
72 |
+
prompt,
|
73 |
+
max_length=200,
|
74 |
+
num_return_sequences=1,
|
75 |
+
do_sample=False
|
76 |
+
)[0]['generated_text']
|
77 |
+
|
78 |
+
# Parse response
|
79 |
+
event_type = "Нет"
|
80 |
+
summary = ""
|
81 |
+
|
82 |
+
if "Тип:" in response and "Краткое описание:" in response:
|
83 |
+
type_part, summary_part = response.split("Краткое описание:")
|
84 |
+
event_type = type_part.split("Тип:")[1].strip()
|
85 |
+
summary = summary_part.strip()
|
86 |
+
|
87 |
+
return event_type, summary
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
st.warning(f"Error in fallback event detection: {str(e)}")
|
91 |
+
return "Нет", ""
|
92 |
+
|
93 |
+
def estimate_impact(self, text, entity):
|
94 |
+
"""Estimate impact using the fallback model"""
|
95 |
+
prompt = f"""Задача: Оцените влияние новости на компанию.
|
96 |
+
|
97 |
+
Компания: {entity}
|
98 |
+
Новость: {text}
|
99 |
+
|
100 |
+
Возможные категории влияния:
|
101 |
+
- Значительный риск убытков
|
102 |
+
- Умеренный риск убытков
|
103 |
+
- Незначительный риск убытков
|
104 |
+
- Вероятность прибыли
|
105 |
+
- Неопределенный эффект
|
106 |
+
|
107 |
+
Формат ответа:
|
108 |
+
Impact: [категория]
|
109 |
+
Reasoning: [объяснение в двух предложениях]
|
110 |
+
|
111 |
+
Ответ:"""
|
112 |
+
|
113 |
+
try:
|
114 |
+
response = self.pipeline(
|
115 |
+
prompt,
|
116 |
+
max_length=200,
|
117 |
+
num_return_sequences=1,
|
118 |
+
do_sample=False
|
119 |
+
)[0]['generated_text']
|
120 |
+
|
121 |
+
impact = "Неопределенный эффект"
|
122 |
+
reasoning = "Не удалось определить влияние"
|
123 |
+
|
124 |
+
if "Impact:" in response and "Reasoning:" in response:
|
125 |
+
impact_part, reasoning_part = response.split("Reasoning:")
|
126 |
+
impact = impact_part.split("Impact:")[1].strip()
|
127 |
+
reasoning = reasoning_part.strip()
|
128 |
+
|
129 |
+
return impact, reasoning
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
st.warning(f"Error in fallback impact estimation: {str(e)}")
|
133 |
+
return "Неопределенный эффект", "Ошибка анализа"
|
134 |
+
|
135 |
|
136 |
class TranslationSystem:
|
137 |
def __init__(self, batch_size=5):
|
|
|
215 |
try:
|
216 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
217 |
llm = init_langchain_llm(model_choice)
|
218 |
+
fallback_llm = FallbackLLMSystem() # Initialize fallback system
|
219 |
translator = TranslationSystem(batch_size=5)
|
220 |
|
221 |
# Initialize all required columns first
|
|
|
262 |
sentiment = analyze_sentiment(translated_text)
|
263 |
df.at[idx, 'Sentiment'] = sentiment
|
264 |
|
265 |
+
try:
|
266 |
+
# Try with primary LLM
|
267 |
+
event_type, event_summary = detect_events(
|
268 |
+
llm,
|
269 |
+
row['Выдержки из текста'],
|
270 |
+
row['Объект']
|
271 |
+
)
|
272 |
+
except Exception as e:
|
273 |
+
if 'rate limit' in str(e).lower():
|
274 |
+
st.warning("Rate limit reached. Using fallback model for event detection.")
|
275 |
+
event_type, event_summary = fallback_llm.detect_events(
|
276 |
+
row['Выдержки из текста'],
|
277 |
+
row['Объект']
|
278 |
+
)
|
279 |
+
|
280 |
df.at[idx, 'Event_Type'] = event_type
|
281 |
df.at[idx, 'Event_Summary'] = event_summary
|
282 |
|
283 |
+
|
284 |
+
# Similar for impact estimation
|
285 |
if sentiment == "Negative":
|
286 |
+
try:
|
287 |
+
impact, reasoning = estimate_impact(
|
288 |
+
llm,
|
289 |
+
translated_text,
|
290 |
+
row['Объект']
|
291 |
+
)
|
292 |
+
except Exception as e:
|
293 |
+
if 'rate limit' in str(e).lower():
|
294 |
+
st.warning("Rate limit reached. Using fallback model for impact estimation.")
|
295 |
+
impact, reasoning = fallback_llm.estimate_impact(
|
296 |
+
translated_text,
|
297 |
+
row['Объект']
|
298 |
+
)
|
299 |
+
|
300 |
df.at[idx, 'Impact'] = impact
|
301 |
df.at[idx, 'Reasoning'] = reasoning
|
302 |
|
|
|
515 |
temperature=0.0
|
516 |
)
|
517 |
|
518 |
+
elif model_choice == "Local-BLOOMZ": # Added new option
|
519 |
+
return FallbackLLMSystem()
|
520 |
+
|
521 |
else: # Qwen API
|
522 |
if 'ali_key' not in st.secrets:
|
523 |
st.error("DashScope API key not found in secrets. Please add it with the key 'dashscope_api_key'.")
|
524 |
st.stop()
|
525 |
|
|
|
526 |
return ChatOpenAI(
|
527 |
base_url="https://dashscope.aliyuncs.com/api/v1",
|
528 |
model="qwen-max",
|
|
|
534 |
st.error(f"Error initializing the LLM: {str(e)}")
|
535 |
st.stop()
|
536 |
|
537 |
+
|
538 |
def estimate_impact(llm, news_text, entity):
|
539 |
template = """
|
540 |
Analyze the following news piece about the entity "{entity}" and estimate its monetary impact in Russian rubles for this entity in the next 6 months.
|
|
|
723 |
return output
|
724 |
def main():
|
725 |
with st.sidebar:
|
726 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.43 ):::")
|
727 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
728 |
|
729 |
+
|
730 |
+
|
731 |
model_choice = st.radio(
|
732 |
"Выберите модель для анализа:",
|
733 |
+
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen-Max", "Local-BLOOMZ"],
|
734 |
key="model_selector"
|
735 |
)
|
736 |
+
|
|
|
737 |
st.markdown(
|
738 |
"""
|
739 |
Использованы технологии:
|