pentarosarium commited on
Commit
758ab80
·
1 Parent(s): 9cb77e8
Files changed (1) hide show
  1. app.py +68 -51
app.py CHANGED
@@ -71,17 +71,16 @@ class ProcessControl:
71
  class EventDetector:
72
  def __init__(self):
73
  try:
74
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
75
- logger.info(f"Initializing MT5 model on device: {self.device}")
76
-
77
- # Initialize MT5 model
78
- self.model_name = "google/mt5-small"
79
- self.tokenizer = AutoTokenizer.from_pretrained(
80
- self.model_name,
81
- legacy=True
82
- )
83
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
84
- logger.info("MT5 model initialized successfully")
85
 
86
  except Exception as e:
87
  logger.error(f"Error in EventDetector initialization: {str(e)}")
@@ -314,7 +313,7 @@ class EventDetector:
314
 
315
  @spaces.GPU(duration=20)
316
  def detect_events(self, text, entity):
317
- """Detect events in text using MT5 model"""
318
  if not text or not entity:
319
  return "Нет", "Invalid input"
320
 
@@ -324,56 +323,69 @@ class EventDetector:
324
 
325
  if not text or not entity:
326
  return "Нет", "Empty input"
327
-
328
- # Prepare prompt for event detection and summary
329
- prompt = f"""<s>Analyze this news about {entity}:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  Text: {text}
332
 
333
- Is there a significant event? Answer only YES or NO.
334
- If YES, create a brief, factual summary focusing on key points.
335
 
336
  Format:
337
- Event: [YES/NO]
338
- Summary: [2-3 sentence summary if event is YES]</s>"""
339
 
340
- # Generate response
341
- inputs = self.tokenizer(
342
- prompt,
343
- return_tensors="pt",
344
- padding=True,
345
- truncation=True,
346
- max_length=512
347
- ).to(self.device)
348
 
349
- outputs = self.model.generate(
350
- **inputs,
351
- max_length=200,
352
- num_return_sequences=1,
353
- do_sample=False,
354
- pad_token_id=self.tokenizer.pad_token_id,
355
- eos_token_id=self.tokenizer.eos_token_id,
356
- no_repeat_ngram_size=3
357
- )
358
 
359
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
360
-
361
- # Parse response for event detection and summary
362
- event_detected = False
363
- summary = ""
364
-
365
- if "Event:" in response:
366
- event_line = response.split("Event:")[1].split("\n")[0].strip()
367
- event_detected = "YES" in event_line.upper()
368
 
369
- if event_detected and "Summary:" in response:
 
370
  summary = response.split("Summary:")[1].strip()
371
- summary = summary.replace("<s>", "").replace("</s>", "").strip()
372
  else:
373
- summary = "Event detected but no summary generated"
 
 
 
 
 
 
 
374
 
375
- event_type = "Событие" if event_detected else "Нет"
376
- return event_type, summary
377
 
378
  except Exception as e:
379
  logger.error(f"Event detection error: {str(e)}")
@@ -383,7 +395,12 @@ Summary: [2-3 sentence summary if event is YES]</s>"""
383
  """Clean up GPU resources"""
384
  try:
385
  self.model = None
 
 
 
 
386
  torch.cuda.empty_cache()
 
387
  logger.info("Cleaned up GPU resources")
388
  except Exception as e:
389
  logger.error(f"Error in cleanup: {str(e)}")
@@ -641,7 +658,7 @@ def create_interface():
641
  # Create state for file data
642
  current_file = gr.State(None)
643
 
644
- gr.Markdown("# AI-анализ мониторинга новостей v.1.49")
645
 
646
  with gr.Row():
647
  file_input = gr.File(
 
71
  class EventDetector:
72
  def __init__(self):
73
  try:
74
+ # Initialize models
75
+ device = "cuda" if torch.cuda.is_available() else "cpu"
76
+ logger.info(f"Initializing models on device: {device}")
77
+
78
+ # Initialize all models
79
+ self.initialize_models(device) # Move initialization to separate method
80
+
81
+ self.device = device
82
+ self.initialized = True
83
+ logger.info("All models initialized successfully")
 
84
 
85
  except Exception as e:
86
  logger.error(f"Error in EventDetector initialization: {str(e)}")
 
313
 
314
  @spaces.GPU(duration=20)
315
  def detect_events(self, text, entity):
316
+ """Rest of the detect_events method remains the same"""
317
  if not text or not entity:
318
  return "Нет", "Invalid input"
319
 
 
323
 
324
  if not text or not entity:
325
  return "Нет", "Empty input"
326
+
327
+ # First check for keyword matches
328
+ text_lower = text.lower()
329
+ keywords = {
330
+ 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
331
+ 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
332
+ 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
333
+ }
334
+
335
+ # Check keywords first
336
+ detected_event = None
337
+ for event_type, terms in keywords.items():
338
+ if any(term in text_lower for term in terms):
339
+ detected_event = event_type
340
+ break
341
+
342
+ if detected_event:
343
+ # Prepare prompt for summary
344
+ prompt = f"""<s>Summarize this {detected_event} news about {entity}:
345
 
346
  Text: {text}
347
 
348
+ Create a brief, factual summary focusing on the main points.
 
349
 
350
  Format:
351
+ Summary: [2-3 sentence summary]</s>"""
 
352
 
353
+ # Generate summary
354
+ inputs = self.tokenizer(
355
+ prompt,
356
+ return_tensors="pt",
357
+ padding=True,
358
+ truncation=True,
359
+ max_length=512
360
+ ).to(self.device)
361
 
362
+ outputs = self.model.generate(
363
+ **inputs,
364
+ max_length=200,
365
+ num_return_sequences=1,
366
+ do_sample=False,
367
+ pad_token_id=self.tokenizer.pad_token_id,
368
+ eos_token_id=self.tokenizer.eos_token_id,
369
+ no_repeat_ngram_size=3
370
+ )
371
 
372
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
373
 
374
+ # Extract summary
375
+ if "Summary:" in response:
376
  summary = response.split("Summary:")[1].strip()
377
+ summary = summary.replace('<s>', '').replace('</s>', '').strip()
378
  else:
379
+ if detected_event == 'Отчетность':
380
+ summary = f"Компания {entity} опубликовала финансовые показатели."
381
+ elif detected_event == 'РЦБ':
382
+ summary = f"Обнаружена информация о ценных бумагах компании {entity}."
383
+ elif detected_event == 'Суд':
384
+ summary = f"Компания {entity} участвует в судебном разбирательстве."
385
+
386
+ return detected_event, summary
387
 
388
+ return "Нет", "No significant event detected"
 
389
 
390
  except Exception as e:
391
  logger.error(f"Event detection error: {str(e)}")
 
395
  """Clean up GPU resources"""
396
  try:
397
  self.model = None
398
+ self.translator = None
399
+ self.finbert = None
400
+ self.roberta = None
401
+ self.finbert_tone = None
402
  torch.cuda.empty_cache()
403
+ self.initialized = False
404
  logger.info("Cleaned up GPU resources")
405
  except Exception as e:
406
  logger.error(f"Error in cleanup: {str(e)}")
 
658
  # Create state for file data
659
  current_file = gr.State(None)
660
 
661
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.50")
662
 
663
  with gr.Row():
664
  file_input = gr.File(