Spaces:
Sleeping
Sleeping
Commit
·
cd6115e
1
Parent(s):
2bf1f83
v.1.23
Browse files
app.py
CHANGED
@@ -64,22 +64,29 @@ class ProcessControl:
|
|
64 |
class EventDetector:
|
65 |
def __init__(self):
|
66 |
try:
|
|
|
|
|
|
|
|
|
67 |
# Initialize sentiment models
|
68 |
self.finbert = pipeline(
|
69 |
"sentiment-analysis",
|
70 |
model="ProsusAI/finbert",
|
|
|
71 |
truncation=True,
|
72 |
max_length=512
|
73 |
)
|
74 |
self.roberta = pipeline(
|
75 |
"sentiment-analysis",
|
76 |
model="cardiffnlp/twitter-roberta-base-sentiment",
|
|
|
77 |
truncation=True,
|
78 |
max_length=512
|
79 |
)
|
80 |
self.finbert_tone = pipeline(
|
81 |
"sentiment-analysis",
|
82 |
model="yiyanghkust/finbert-tone",
|
|
|
83 |
truncation=True,
|
84 |
max_length=512
|
85 |
)
|
@@ -90,16 +97,14 @@ class EventDetector:
|
|
90 |
self.model_name,
|
91 |
legacy=True
|
92 |
)
|
93 |
-
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
|
94 |
|
95 |
-
|
96 |
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
97 |
-
self.model = self.model.to(self.device)
|
98 |
self.initialized = True
|
99 |
-
|
100 |
|
101 |
except Exception as e:
|
102 |
-
|
103 |
raise
|
104 |
|
105 |
def analyze_sentiment(self, text):
|
@@ -140,7 +145,7 @@ class EventDetector:
|
|
140 |
return "Neutral"
|
141 |
|
142 |
except Exception as e:
|
143 |
-
|
144 |
return "Neutral"
|
145 |
|
146 |
def detect_events(self, text, entity):
|
@@ -157,59 +162,73 @@ class EventDetector:
|
|
157 |
# First check for keyword matches
|
158 |
text_lower = text.lower()
|
159 |
keywords = {
|
160 |
-
'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат'],
|
161 |
-
'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги'],
|
162 |
-
'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств']
|
163 |
}
|
164 |
|
165 |
# Check keywords first
|
|
|
166 |
for event_type, terms in keywords.items():
|
167 |
if any(term in text_lower for term in terms):
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
170 |
|
171 |
Text: {text}
|
172 |
|
173 |
-
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
# If no keywords matched
|
208 |
return "Нет", "No significant event detected"
|
209 |
|
210 |
except Exception as e:
|
211 |
-
|
212 |
return "Нет", f"Error in event detection: {str(e)}"
|
|
|
213 |
def cleanup(self):
|
214 |
"""Clean up GPU resources"""
|
215 |
try:
|
@@ -219,8 +238,9 @@ Summary: [your summary here]</s>"""
|
|
219 |
self.finbert_tone = None
|
220 |
torch.cuda.empty_cache()
|
221 |
self.initialized = False
|
|
|
222 |
except Exception as e:
|
223 |
-
logger.error(f"Error in cleanup: {e}")
|
224 |
|
225 |
def create_visualizations(df):
|
226 |
if df is None or df.empty:
|
@@ -360,7 +380,7 @@ def create_interface():
|
|
360 |
control = ProcessControl()
|
361 |
|
362 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
363 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
364 |
|
365 |
with gr.Row():
|
366 |
file_input = gr.File(
|
|
|
64 |
class EventDetector:
|
65 |
def __init__(self):
|
66 |
try:
|
67 |
+
# Initialize sentiment models with GPU support
|
68 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
69 |
+
logger.info(f"Initializing models on device: {device}")
|
70 |
+
|
71 |
# Initialize sentiment models
|
72 |
self.finbert = pipeline(
|
73 |
"sentiment-analysis",
|
74 |
model="ProsusAI/finbert",
|
75 |
+
device=device,
|
76 |
truncation=True,
|
77 |
max_length=512
|
78 |
)
|
79 |
self.roberta = pipeline(
|
80 |
"sentiment-analysis",
|
81 |
model="cardiffnlp/twitter-roberta-base-sentiment",
|
82 |
+
device=device,
|
83 |
truncation=True,
|
84 |
max_length=512
|
85 |
)
|
86 |
self.finbert_tone = pipeline(
|
87 |
"sentiment-analysis",
|
88 |
model="yiyanghkust/finbert-tone",
|
89 |
+
device=device,
|
90 |
truncation=True,
|
91 |
max_length=512
|
92 |
)
|
|
|
97 |
self.model_name,
|
98 |
legacy=True
|
99 |
)
|
100 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
|
101 |
|
102 |
+
self.device = device
|
|
|
|
|
103 |
self.initialized = True
|
104 |
+
logger.info(f"All models initialized successfully on {device}")
|
105 |
|
106 |
except Exception as e:
|
107 |
+
logger.error(f"Error in EventDetector initialization: {str(e)}")
|
108 |
raise
|
109 |
|
110 |
def analyze_sentiment(self, text):
|
|
|
145 |
return "Neutral"
|
146 |
|
147 |
except Exception as e:
|
148 |
+
logger.error(f"Sentiment analysis error: {str(e)}")
|
149 |
return "Neutral"
|
150 |
|
151 |
def detect_events(self, text, entity):
|
|
|
162 |
# First check for keyword matches
|
163 |
text_lower = text.lower()
|
164 |
keywords = {
|
165 |
+
'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
|
166 |
+
'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
|
167 |
+
'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
|
168 |
}
|
169 |
|
170 |
# Check keywords first
|
171 |
+
detected_event = None
|
172 |
for event_type, terms in keywords.items():
|
173 |
if any(term in text_lower for term in terms):
|
174 |
+
detected_event = event_type
|
175 |
+
break
|
176 |
+
|
177 |
+
if detected_event:
|
178 |
+
# Prepare prompt for summary
|
179 |
+
prompt = f"""<s>Summarize this {detected_event} news about {entity}:
|
180 |
|
181 |
Text: {text}
|
182 |
|
183 |
+
Create a brief, factual summary focusing on the main points.
|
184 |
|
185 |
+
Format:
|
186 |
+
Summary: [2-3 sentence summary]</s>"""
|
187 |
+
|
188 |
+
# Generate summary
|
189 |
+
inputs = self.tokenizer(
|
190 |
+
prompt,
|
191 |
+
return_tensors="pt",
|
192 |
+
padding=True,
|
193 |
+
truncation=True,
|
194 |
+
max_length=512
|
195 |
+
).to(self.device)
|
196 |
+
|
197 |
+
outputs = self.model.generate(
|
198 |
+
**inputs,
|
199 |
+
max_length=200,
|
200 |
+
num_return_sequences=1,
|
201 |
+
do_sample=False,
|
202 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
203 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
204 |
+
no_repeat_ngram_size=3 # Prevent repetition
|
205 |
+
)
|
206 |
+
|
207 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
208 |
+
|
209 |
+
# Extract summary
|
210 |
+
if "Summary:" in response:
|
211 |
+
summary = response.split("Summary:")[1].strip()
|
212 |
+
# Clean up any remaining prompt artifacts
|
213 |
+
summary = summary.replace('<s>', '').replace('</s>', '').strip()
|
214 |
+
else:
|
215 |
+
# Create a structured summary based on event type
|
216 |
+
if detected_event == 'Отчетность':
|
217 |
+
summary = f"Компания {entity} опубликовала финансовые показатели."
|
218 |
+
elif detected_event == 'РЦБ':
|
219 |
+
summary = f"Обнаружена информация о ценных бумагах компании {entity}."
|
220 |
+
elif detected_event == 'Суд':
|
221 |
+
summary = f"Компания {entity} участвует в судебном разбирательстве."
|
222 |
+
|
223 |
+
return detected_event, summary
|
224 |
|
225 |
# If no keywords matched
|
226 |
return "Нет", "No significant event detected"
|
227 |
|
228 |
except Exception as e:
|
229 |
+
logger.error(f"Event detection error: {str(e)}")
|
230 |
return "Нет", f"Error in event detection: {str(e)}"
|
231 |
+
|
232 |
def cleanup(self):
|
233 |
"""Clean up GPU resources"""
|
234 |
try:
|
|
|
238 |
self.finbert_tone = None
|
239 |
torch.cuda.empty_cache()
|
240 |
self.initialized = False
|
241 |
+
logger.info("Cleaned up GPU resources")
|
242 |
except Exception as e:
|
243 |
+
logger.error(f"Error in cleanup: {str(e)}")
|
244 |
|
245 |
def create_visualizations(df):
|
246 |
if df is None or df.empty:
|
|
|
380 |
control = ProcessControl()
|
381 |
|
382 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
383 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.23")
|
384 |
|
385 |
with gr.Row():
|
386 |
file_input = gr.File(
|