pentarosarium commited on
Commit
9cb77e8
·
1 Parent(s): 0d417b5
Files changed (1) hide show
  1. app.py +50 -67
app.py CHANGED
@@ -71,16 +71,17 @@ class ProcessControl:
71
  class EventDetector:
72
  def __init__(self):
73
  try:
74
- # Initialize models
75
- device = "cuda" if torch.cuda.is_available() else "cpu"
76
- logger.info(f"Initializing models on device: {device}")
77
-
78
- # Initialize all models
79
- self.initialize_models(device) # Move initialization to separate method
80
-
81
- self.device = device
82
- self.initialized = True
83
- logger.info("All models initialized successfully")
 
84
 
85
  except Exception as e:
86
  logger.error(f"Error in EventDetector initialization: {str(e)}")
@@ -313,7 +314,7 @@ class EventDetector:
313
 
314
  @spaces.GPU(duration=20)
315
  def detect_events(self, text, entity):
316
- """Rest of the detect_events method remains the same"""
317
  if not text or not entity:
318
  return "Нет", "Invalid input"
319
 
@@ -323,69 +324,56 @@ class EventDetector:
323
 
324
  if not text or not entity:
325
  return "Нет", "Empty input"
326
-
327
- # First check for keyword matches
328
- text_lower = text.lower()
329
- keywords = {
330
- 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
331
- 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
332
- 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
333
- }
334
-
335
- # Check keywords first
336
- detected_event = None
337
- for event_type, terms in keywords.items():
338
- if any(term in text_lower for term in terms):
339
- detected_event = event_type
340
- break
341
-
342
- if detected_event:
343
- # Prepare prompt for summary
344
- prompt = f"""<s>Summarize this {detected_event} news about {entity}:
345
 
346
  Text: {text}
347
 
348
- Create a brief, factual summary focusing on the main points.
 
349
 
350
  Format:
351
- Summary: [2-3 sentence summary]</s>"""
 
352
 
353
- # Generate summary
354
- inputs = self.tokenizer(
355
- prompt,
356
- return_tensors="pt",
357
- padding=True,
358
- truncation=True,
359
- max_length=512
360
- ).to(self.device)
361
 
362
- outputs = self.model.generate(
363
- **inputs,
364
- max_length=200,
365
- num_return_sequences=1,
366
- do_sample=False,
367
- pad_token_id=self.tokenizer.pad_token_id,
368
- eos_token_id=self.tokenizer.eos_token_id,
369
- no_repeat_ngram_size=3
370
- )
371
 
372
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
373
 
374
- # Extract summary
375
- if "Summary:" in response:
376
  summary = response.split("Summary:")[1].strip()
377
- summary = summary.replace('<s>', '').replace('</s>', '').strip()
378
  else:
379
- if detected_event == 'Отчетность':
380
- summary = f"Компания {entity} опубликовала финансовые показатели."
381
- elif detected_event == 'РЦБ':
382
- summary = f"Обнаружена информация о ценных бумагах компании {entity}."
383
- elif detected_event == 'Суд':
384
- summary = f"Компания {entity} участвует в судебном разбирательстве."
385
-
386
- return detected_event, summary
387
 
388
- return "Нет", "No significant event detected"
 
389
 
390
  except Exception as e:
391
  logger.error(f"Event detection error: {str(e)}")
@@ -395,12 +383,7 @@ Summary: [2-3 sentence summary]</s>"""
395
  """Clean up GPU resources"""
396
  try:
397
  self.model = None
398
- self.translator = None
399
- self.finbert = None
400
- self.roberta = None
401
- self.finbert_tone = None
402
  torch.cuda.empty_cache()
403
- self.initialized = False
404
  logger.info("Cleaned up GPU resources")
405
  except Exception as e:
406
  logger.error(f"Error in cleanup: {str(e)}")
 
71
  class EventDetector:
72
  def __init__(self):
73
  try:
74
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
75
+ logger.info(f"Initializing MT5 model on device: {self.device}")
76
+
77
+ # Initialize MT5 model
78
+ self.model_name = "google/mt5-small"
79
+ self.tokenizer = AutoTokenizer.from_pretrained(
80
+ self.model_name,
81
+ legacy=True
82
+ )
83
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
84
+ logger.info("MT5 model initialized successfully")
85
 
86
  except Exception as e:
87
  logger.error(f"Error in EventDetector initialization: {str(e)}")
 
314
 
315
  @spaces.GPU(duration=20)
316
  def detect_events(self, text, entity):
317
+ """Detect events in text using MT5 model"""
318
  if not text or not entity:
319
  return "Нет", "Invalid input"
320
 
 
324
 
325
  if not text or not entity:
326
  return "Нет", "Empty input"
327
+
328
+ # Prepare prompt for event detection and summary
329
+ prompt = f"""<s>Analyze this news about {entity}:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  Text: {text}
332
 
333
+ Is there a significant event? Answer only YES or NO.
334
+ If YES, create a brief, factual summary focusing on key points.
335
 
336
  Format:
337
+ Event: [YES/NO]
338
+ Summary: [2-3 sentence summary if event is YES]</s>"""
339
 
340
+ # Generate response
341
+ inputs = self.tokenizer(
342
+ prompt,
343
+ return_tensors="pt",
344
+ padding=True,
345
+ truncation=True,
346
+ max_length=512
347
+ ).to(self.device)
348
 
349
+ outputs = self.model.generate(
350
+ **inputs,
351
+ max_length=200,
352
+ num_return_sequences=1,
353
+ do_sample=False,
354
+ pad_token_id=self.tokenizer.pad_token_id,
355
+ eos_token_id=self.tokenizer.eos_token_id,
356
+ no_repeat_ngram_size=3
357
+ )
358
 
359
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
360
+
361
+ # Parse response for event detection and summary
362
+ event_detected = False
363
+ summary = ""
364
+
365
+ if "Event:" in response:
366
+ event_line = response.split("Event:")[1].split("\n")[0].strip()
367
+ event_detected = "YES" in event_line.upper()
368
 
369
+ if event_detected and "Summary:" in response:
 
370
  summary = response.split("Summary:")[1].strip()
371
+ summary = summary.replace("<s>", "").replace("</s>", "").strip()
372
  else:
373
+ summary = "Event detected but no summary generated"
 
 
 
 
 
 
 
374
 
375
+ event_type = "Событие" if event_detected else "Нет"
376
+ return event_type, summary
377
 
378
  except Exception as e:
379
  logger.error(f"Event detection error: {str(e)}")
 
383
  """Clean up GPU resources"""
384
  try:
385
  self.model = None
 
 
 
 
386
  torch.cuda.empty_cache()
 
387
  logger.info("Cleaned up GPU resources")
388
  except Exception as e:
389
  logger.error(f"Error in cleanup: {str(e)}")