Spaces:
Sleeping
Sleeping
Commit
·
0a6a508
1
Parent(s):
fad358d
v.1.53
Browse files
app.py
CHANGED
@@ -174,7 +174,7 @@ class EventDetector:
|
|
174 |
|
175 |
@spaces.GPU(duration=20)
|
176 |
def analyze_sentiment(self, text):
|
177 |
-
"""
|
178 |
try:
|
179 |
if not text or not isinstance(text, str):
|
180 |
return "Neutral"
|
@@ -183,32 +183,46 @@ class EventDetector:
|
|
183 |
if not text:
|
184 |
return "Neutral"
|
185 |
|
186 |
-
# Get predictions
|
187 |
finbert_result = self.finbert(text)[0]
|
188 |
roberta_result = self.roberta(text)[0]
|
189 |
finbert_tone_result = self.finbert_tone(text)[0]
|
190 |
|
191 |
-
#
|
192 |
def map_sentiment(result):
|
193 |
label = result['label'].lower()
|
194 |
-
|
|
|
|
|
|
|
195 |
return "Positive"
|
196 |
-
|
|
|
197 |
return "Negative"
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
-
# Get mapped sentiments
|
201 |
sentiments = [
|
202 |
map_sentiment(finbert_result),
|
203 |
map_sentiment(roberta_result),
|
204 |
map_sentiment(finbert_tone_result)
|
205 |
]
|
206 |
|
207 |
-
#
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
212 |
return "Neutral"
|
213 |
|
214 |
except Exception as e:
|
@@ -313,79 +327,60 @@ class EventDetector:
|
|
313 |
|
314 |
@spaces.GPU(duration=20)
|
315 |
def detect_events(self, text, entity):
|
316 |
-
"""Rest of the detect_events method remains the same"""
|
317 |
if not text or not entity:
|
318 |
return "Нет", "Invalid input"
|
319 |
|
320 |
try:
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
if not text or not entity:
|
325 |
-
return "Нет", "Empty input"
|
326 |
-
|
327 |
-
# First check for keyword matches
|
328 |
-
text_lower = text.lower()
|
329 |
-
keywords = {
|
330 |
-
'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
|
331 |
-
'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
|
332 |
-
'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
|
333 |
-
}
|
334 |
-
|
335 |
-
# Check keywords first
|
336 |
-
detected_event = None
|
337 |
-
for event_type, terms in keywords.items():
|
338 |
-
if any(term in text_lower for term in terms):
|
339 |
-
detected_event = event_type
|
340 |
-
break
|
341 |
-
|
342 |
-
if detected_event:
|
343 |
-
# Prepare prompt for summary
|
344 |
-
prompt = f"""<s>Summarize this {detected_event} news about {entity}:
|
345 |
|
346 |
-
Text: {text}
|
347 |
|
348 |
-
|
|
|
|
|
|
|
|
|
349 |
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
|
372 |
-
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
if "Summary:" in response:
|
376 |
-
summary = response.split("Summary:")[1].strip()
|
377 |
-
summary = summary.replace('<s>', '').replace('</s>', '').strip()
|
378 |
-
else:
|
379 |
-
if detected_event == 'Отчетность':
|
380 |
-
summary = f"Компания {entity} опубликовала финансовые показатели."
|
381 |
-
elif detected_event == 'РЦБ':
|
382 |
-
summary = f"Обнаружена информация о ценных бумагах компании {entity}."
|
383 |
-
elif detected_event == 'Суд':
|
384 |
-
summary = f"Компания {entity} участвует в судебном разбирательстве."
|
385 |
|
386 |
-
|
387 |
-
|
388 |
-
return "Нет", "No significant event detected"
|
389 |
|
390 |
except Exception as e:
|
391 |
logger.error(f"Event detection error: {str(e)}")
|
@@ -658,7 +653,7 @@ def create_interface():
|
|
658 |
# Create state for file data
|
659 |
current_file = gr.State(None)
|
660 |
|
661 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
662 |
|
663 |
with gr.Row():
|
664 |
file_input = gr.File(
|
|
|
174 |
|
175 |
@spaces.GPU(duration=20)
|
176 |
def analyze_sentiment(self, text):
|
177 |
+
"""Enhanced sentiment analysis with better negative detection"""
|
178 |
try:
|
179 |
if not text or not isinstance(text, str):
|
180 |
return "Neutral"
|
|
|
183 |
if not text:
|
184 |
return "Neutral"
|
185 |
|
186 |
+
# Get predictions with confidence scores
|
187 |
finbert_result = self.finbert(text)[0]
|
188 |
roberta_result = self.roberta(text)[0]
|
189 |
finbert_tone_result = self.finbert_tone(text)[0]
|
190 |
|
191 |
+
# Enhanced sentiment mapping with confidence thresholds
|
192 |
def map_sentiment(result):
|
193 |
label = result['label'].lower()
|
194 |
+
score = result['score']
|
195 |
+
|
196 |
+
# Higher threshold for positive to reduce false positives
|
197 |
+
if label in ['positive', 'pos', 'positive tone'] and score > 0.75:
|
198 |
return "Positive"
|
199 |
+
# Lower threshold for negative to catch more cases
|
200 |
+
elif label in ['negative', 'neg', 'negative tone'] and score > 0.6:
|
201 |
return "Negative"
|
202 |
+
# Consider high-confidence neutral predictions
|
203 |
+
elif label == 'neutral' and score > 0.8:
|
204 |
+
return "Neutral"
|
205 |
+
# Default to negative for uncertain cases in financial context
|
206 |
+
else:
|
207 |
+
return "Negative" if score > 0.4 else "Neutral"
|
208 |
|
209 |
+
# Get mapped sentiments with confidence-based logic
|
210 |
sentiments = [
|
211 |
map_sentiment(finbert_result),
|
212 |
map_sentiment(roberta_result),
|
213 |
map_sentiment(finbert_tone_result)
|
214 |
]
|
215 |
|
216 |
+
# Weighted voting - prioritize negative signals
|
217 |
+
if "Negative" in sentiments:
|
218 |
+
neg_count = sentiments.count("Negative")
|
219 |
+
if neg_count >= 1: # More sensitive to negative sentiment
|
220 |
+
return "Negative"
|
221 |
+
|
222 |
+
pos_count = sentiments.count("Positive")
|
223 |
+
if pos_count >= 2: # Require stronger positive consensus
|
224 |
+
return "Positive"
|
225 |
+
|
226 |
return "Neutral"
|
227 |
|
228 |
except Exception as e:
|
|
|
327 |
|
328 |
@spaces.GPU(duration=20)
|
329 |
def detect_events(self, text, entity):
|
|
|
330 |
if not text or not entity:
|
331 |
return "Нет", "Invalid input"
|
332 |
|
333 |
try:
|
334 |
+
# Improved prompt for MT5
|
335 |
+
prompt = f"""<s>Analyze this news about {entity}:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
+
Text: {text}
|
338 |
|
339 |
+
Classify this news into ONE of these categories:
|
340 |
+
1. "Отчетность" if about: financial reports, revenue, profit, EBITDA, financial results, quarterly/annual reports
|
341 |
+
2. "Суд" if about: court cases, lawsuits, arbitration, bankruptcy, legal proceedings
|
342 |
+
3. "РЦБ" if about: bonds, securities, defaults, debt restructuring, coupon payments
|
343 |
+
4. "Нет" if none of the above
|
344 |
|
345 |
+
Provide classification and 2-3 sentence summary focusing on key facts.
|
346 |
+
|
347 |
+
Format response exactly as:
|
348 |
+
Category: [category name]
|
349 |
+
Summary: [brief factual summary]</s>"""
|
350 |
+
|
351 |
+
inputs = self.tokenizer(
|
352 |
+
prompt,
|
353 |
+
return_tensors="pt",
|
354 |
+
padding=True,
|
355 |
+
truncation=True,
|
356 |
+
max_length=512
|
357 |
+
).to(self.device)
|
358 |
+
|
359 |
+
outputs = self.model.generate(
|
360 |
+
**inputs,
|
361 |
+
max_length=200,
|
362 |
+
num_return_sequences=1,
|
363 |
+
do_sample=False,
|
364 |
+
temperature=0.7,
|
365 |
+
top_p=0.9,
|
366 |
+
no_repeat_ngram_size=3
|
367 |
+
)
|
368 |
+
|
369 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
370 |
+
|
371 |
+
# Extract category and summary
|
372 |
+
if "Category:" in response and "Summary:" in response:
|
373 |
+
parts = response.split("Summary:")
|
374 |
+
category = parts[0].split("Category:")[1].strip()
|
375 |
+
summary = parts[1].strip()
|
376 |
|
377 |
+
# Validate category
|
378 |
+
valid_categories = {"Отчетность", "Суд", "РЦБ", "Нет"}
|
379 |
+
category = category if category in valid_categories else "Нет"
|
380 |
|
381 |
+
return category, summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
|
383 |
+
return "Нет", "Could not classify event"
|
|
|
|
|
384 |
|
385 |
except Exception as e:
|
386 |
logger.error(f"Event detection error: {str(e)}")
|
|
|
653 |
# Create state for file data
|
654 |
current_file = gr.State(None)
|
655 |
|
656 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.53")
|
657 |
|
658 |
with gr.Row():
|
659 |
file_input = gr.File(
|