pentarosarium commited on
Commit
0a6a508
·
1 Parent(s): fad358d
Files changed (1) hide show
  1. app.py +71 -76
app.py CHANGED
@@ -174,7 +174,7 @@ class EventDetector:
174
 
175
  @spaces.GPU(duration=20)
176
  def analyze_sentiment(self, text):
177
- """Analyze sentiment of text (should be in English)"""
178
  try:
179
  if not text or not isinstance(text, str):
180
  return "Neutral"
@@ -183,32 +183,46 @@ class EventDetector:
183
  if not text:
184
  return "Neutral"
185
 
186
- # Get predictions from all models
187
  finbert_result = self.finbert(text)[0]
188
  roberta_result = self.roberta(text)[0]
189
  finbert_tone_result = self.finbert_tone(text)[0]
190
 
191
- # Map labels to standard format
192
  def map_sentiment(result):
193
  label = result['label'].lower()
194
- if label in ['positive', 'pos', 'positive tone']:
 
 
 
195
  return "Positive"
196
- elif label in ['negative', 'neg', 'negative tone']:
 
197
  return "Negative"
198
- return "Neutral"
 
 
 
 
 
199
 
200
- # Get mapped sentiments
201
  sentiments = [
202
  map_sentiment(finbert_result),
203
  map_sentiment(roberta_result),
204
  map_sentiment(finbert_tone_result)
205
  ]
206
 
207
- # Use majority voting
208
- sentiment_counts = pd.Series(sentiments).value_counts()
209
- if sentiment_counts.iloc[0] >= 2:
210
- return sentiment_counts.index[0]
211
-
 
 
 
 
 
212
  return "Neutral"
213
 
214
  except Exception as e:
@@ -313,79 +327,60 @@ class EventDetector:
313
 
314
  @spaces.GPU(duration=20)
315
  def detect_events(self, text, entity):
316
- """Rest of the detect_events method remains the same"""
317
  if not text or not entity:
318
  return "Нет", "Invalid input"
319
 
320
  try:
321
- text = str(text).strip()
322
- entity = str(entity).strip()
323
-
324
- if not text or not entity:
325
- return "Нет", "Empty input"
326
-
327
- # First check for keyword matches
328
- text_lower = text.lower()
329
- keywords = {
330
- 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
331
- 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
332
- 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
333
- }
334
-
335
- # Check keywords first
336
- detected_event = None
337
- for event_type, terms in keywords.items():
338
- if any(term in text_lower for term in terms):
339
- detected_event = event_type
340
- break
341
-
342
- if detected_event:
343
- # Prepare prompt for summary
344
- prompt = f"""<s>Summarize this {detected_event} news about {entity}:
345
 
346
- Text: {text}
347
 
348
- Create a brief, factual summary focusing on the main points.
 
 
 
 
349
 
350
- Format:
351
- Summary: [2-3 sentence summary]</s>"""
352
-
353
- # Generate summary
354
- inputs = self.tokenizer(
355
- prompt,
356
- return_tensors="pt",
357
- padding=True,
358
- truncation=True,
359
- max_length=512
360
- ).to(self.device)
361
-
362
- outputs = self.model.generate(
363
- **inputs,
364
- max_length=200,
365
- num_return_sequences=1,
366
- do_sample=False,
367
- pad_token_id=self.tokenizer.pad_token_id,
368
- eos_token_id=self.tokenizer.eos_token_id,
369
- no_repeat_ngram_size=3
370
- )
 
 
 
 
 
 
 
 
 
 
371
 
372
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
373
 
374
- # Extract summary
375
- if "Summary:" in response:
376
- summary = response.split("Summary:")[1].strip()
377
- summary = summary.replace('<s>', '').replace('</s>', '').strip()
378
- else:
379
- if detected_event == 'Отчетность':
380
- summary = f"Компания {entity} опубликовала финансовые показатели."
381
- elif detected_event == 'РЦБ':
382
- summary = f"Обнаружена информация о ценных бумагах компании {entity}."
383
- elif detected_event == 'Суд':
384
- summary = f"Компания {entity} участвует в судебном разбирательстве."
385
 
386
- return detected_event, summary
387
-
388
- return "Нет", "No significant event detected"
389
 
390
  except Exception as e:
391
  logger.error(f"Event detection error: {str(e)}")
@@ -658,7 +653,7 @@ def create_interface():
658
  # Create state for file data
659
  current_file = gr.State(None)
660
 
661
- gr.Markdown("# AI-анализ мониторинга новостей v.1.51")
662
 
663
  with gr.Row():
664
  file_input = gr.File(
 
174
 
175
  @spaces.GPU(duration=20)
176
  def analyze_sentiment(self, text):
177
+ """Enhanced sentiment analysis with better negative detection"""
178
  try:
179
  if not text or not isinstance(text, str):
180
  return "Neutral"
 
183
  if not text:
184
  return "Neutral"
185
 
186
+ # Get predictions with confidence scores
187
  finbert_result = self.finbert(text)[0]
188
  roberta_result = self.roberta(text)[0]
189
  finbert_tone_result = self.finbert_tone(text)[0]
190
 
191
+ # Enhanced sentiment mapping with confidence thresholds
192
  def map_sentiment(result):
193
  label = result['label'].lower()
194
+ score = result['score']
195
+
196
+ # Higher threshold for positive to reduce false positives
197
+ if label in ['positive', 'pos', 'positive tone'] and score > 0.75:
198
  return "Positive"
199
+ # Lower threshold for negative to catch more cases
200
+ elif label in ['negative', 'neg', 'negative tone'] and score > 0.6:
201
  return "Negative"
202
+ # Consider high-confidence neutral predictions
203
+ elif label == 'neutral' and score > 0.8:
204
+ return "Neutral"
205
+ # Default to negative for uncertain cases in financial context
206
+ else:
207
+ return "Negative" if score > 0.4 else "Neutral"
208
 
209
+ # Get mapped sentiments with confidence-based logic
210
  sentiments = [
211
  map_sentiment(finbert_result),
212
  map_sentiment(roberta_result),
213
  map_sentiment(finbert_tone_result)
214
  ]
215
 
216
+ # Weighted voting - prioritize negative signals
217
+ if "Negative" in sentiments:
218
+ neg_count = sentiments.count("Negative")
219
+ if neg_count >= 1: # More sensitive to negative sentiment
220
+ return "Negative"
221
+
222
+ pos_count = sentiments.count("Positive")
223
+ if pos_count >= 2: # Require stronger positive consensus
224
+ return "Positive"
225
+
226
  return "Neutral"
227
 
228
  except Exception as e:
 
327
 
328
  @spaces.GPU(duration=20)
329
  def detect_events(self, text, entity):
 
330
  if not text or not entity:
331
  return "Нет", "Invalid input"
332
 
333
  try:
334
+ # Improved prompt for MT5
335
+ prompt = f"""<s>Analyze this news about {entity}:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ Text: {text}
338
 
339
+ Classify this news into ONE of these categories:
340
+ 1. "Отчетность" if about: financial reports, revenue, profit, EBITDA, financial results, quarterly/annual reports
341
+ 2. "Суд" if about: court cases, lawsuits, arbitration, bankruptcy, legal proceedings
342
+ 3. "РЦБ" if about: bonds, securities, defaults, debt restructuring, coupon payments
343
+ 4. "Нет" if none of the above
344
 
345
+ Provide classification and 2-3 sentence summary focusing on key facts.
346
+
347
+ Format response exactly as:
348
+ Category: [category name]
349
+ Summary: [brief factual summary]</s>"""
350
+
351
+ inputs = self.tokenizer(
352
+ prompt,
353
+ return_tensors="pt",
354
+ padding=True,
355
+ truncation=True,
356
+ max_length=512
357
+ ).to(self.device)
358
+
359
+ outputs = self.model.generate(
360
+ **inputs,
361
+ max_length=200,
362
+ num_return_sequences=1,
363
+ do_sample=False,
364
+ temperature=0.7,
365
+ top_p=0.9,
366
+ no_repeat_ngram_size=3
367
+ )
368
+
369
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
370
+
371
+ # Extract category and summary
372
+ if "Category:" in response and "Summary:" in response:
373
+ parts = response.split("Summary:")
374
+ category = parts[0].split("Category:")[1].strip()
375
+ summary = parts[1].strip()
376
 
377
+ # Validate category
378
+ valid_categories = {"Отчетность", "Суд", "РЦБ", "Нет"}
379
+ category = category if category in valid_categories else "Нет"
380
 
381
+ return category, summary
 
 
 
 
 
 
 
 
 
 
382
 
383
+ return "Нет", "Could not classify event"
 
 
384
 
385
  except Exception as e:
386
  logger.error(f"Event detection error: {str(e)}")
 
653
  # Create state for file data
654
  current_file = gr.State(None)
655
 
656
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.53")
657
 
658
  with gr.Row():
659
  file_input = gr.File(