Spaces:
Sleeping
Sleeping
Commit
·
9cb77e8
1
Parent(s):
0d417b5
v.1.49++
Browse files
app.py
CHANGED
@@ -71,16 +71,17 @@ class ProcessControl:
|
|
71 |
class EventDetector:
|
72 |
def __init__(self):
|
73 |
try:
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
self.
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
84 |
|
85 |
except Exception as e:
|
86 |
logger.error(f"Error in EventDetector initialization: {str(e)}")
|
@@ -313,7 +314,7 @@ class EventDetector:
|
|
313 |
|
314 |
@spaces.GPU(duration=20)
|
315 |
def detect_events(self, text, entity):
|
316 |
-
"""
|
317 |
if not text or not entity:
|
318 |
return "Нет", "Invalid input"
|
319 |
|
@@ -323,69 +324,56 @@ class EventDetector:
|
|
323 |
|
324 |
if not text or not entity:
|
325 |
return "Нет", "Empty input"
|
326 |
-
|
327 |
-
#
|
328 |
-
|
329 |
-
keywords = {
|
330 |
-
'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
|
331 |
-
'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
|
332 |
-
'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
|
333 |
-
}
|
334 |
-
|
335 |
-
# Check keywords first
|
336 |
-
detected_event = None
|
337 |
-
for event_type, terms in keywords.items():
|
338 |
-
if any(term in text_lower for term in terms):
|
339 |
-
detected_event = event_type
|
340 |
-
break
|
341 |
-
|
342 |
-
if detected_event:
|
343 |
-
# Prepare prompt for summary
|
344 |
-
prompt = f"""<s>Summarize this {detected_event} news about {entity}:
|
345 |
|
346 |
Text: {text}
|
347 |
|
348 |
-
|
|
|
349 |
|
350 |
Format:
|
351 |
-
|
|
|
352 |
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
if "Summary:" in response:
|
376 |
summary = response.split("Summary:")[1].strip()
|
377 |
-
summary = summary.replace(
|
378 |
else:
|
379 |
-
|
380 |
-
summary = f"Компания {entity} опубликовала финансовые показатели."
|
381 |
-
elif detected_event == 'РЦБ':
|
382 |
-
summary = f"Обнаружена информация о ценных бумагах компании {entity}."
|
383 |
-
elif detected_event == 'Суд':
|
384 |
-
summary = f"Компания {entity} участвует в судебном разбирательстве."
|
385 |
-
|
386 |
-
return detected_event, summary
|
387 |
|
388 |
-
|
|
|
389 |
|
390 |
except Exception as e:
|
391 |
logger.error(f"Event detection error: {str(e)}")
|
@@ -395,12 +383,7 @@ Summary: [2-3 sentence summary]</s>"""
|
|
395 |
"""Clean up GPU resources"""
|
396 |
try:
|
397 |
self.model = None
|
398 |
-
self.translator = None
|
399 |
-
self.finbert = None
|
400 |
-
self.roberta = None
|
401 |
-
self.finbert_tone = None
|
402 |
torch.cuda.empty_cache()
|
403 |
-
self.initialized = False
|
404 |
logger.info("Cleaned up GPU resources")
|
405 |
except Exception as e:
|
406 |
logger.error(f"Error in cleanup: {str(e)}")
|
|
|
71 |
class EventDetector:
|
72 |
def __init__(self):
|
73 |
try:
|
74 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
75 |
+
logger.info(f"Initializing MT5 model on device: {self.device}")
|
76 |
+
|
77 |
+
# Initialize MT5 model
|
78 |
+
self.model_name = "google/mt5-small"
|
79 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
80 |
+
self.model_name,
|
81 |
+
legacy=True
|
82 |
+
)
|
83 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
|
84 |
+
logger.info("MT5 model initialized successfully")
|
85 |
|
86 |
except Exception as e:
|
87 |
logger.error(f"Error in EventDetector initialization: {str(e)}")
|
|
|
314 |
|
315 |
@spaces.GPU(duration=20)
|
316 |
def detect_events(self, text, entity):
|
317 |
+
"""Detect events in text using MT5 model"""
|
318 |
if not text or not entity:
|
319 |
return "Нет", "Invalid input"
|
320 |
|
|
|
324 |
|
325 |
if not text or not entity:
|
326 |
return "Нет", "Empty input"
|
327 |
+
|
328 |
+
# Prepare prompt for event detection and summary
|
329 |
+
prompt = f"""<s>Analyze this news about {entity}:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
Text: {text}
|
332 |
|
333 |
+
Is there a significant event? Answer only YES or NO.
|
334 |
+
If YES, create a brief, factual summary focusing on key points.
|
335 |
|
336 |
Format:
|
337 |
+
Event: [YES/NO]
|
338 |
+
Summary: [2-3 sentence summary if event is YES]</s>"""
|
339 |
|
340 |
+
# Generate response
|
341 |
+
inputs = self.tokenizer(
|
342 |
+
prompt,
|
343 |
+
return_tensors="pt",
|
344 |
+
padding=True,
|
345 |
+
truncation=True,
|
346 |
+
max_length=512
|
347 |
+
).to(self.device)
|
348 |
|
349 |
+
outputs = self.model.generate(
|
350 |
+
**inputs,
|
351 |
+
max_length=200,
|
352 |
+
num_return_sequences=1,
|
353 |
+
do_sample=False,
|
354 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
355 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
356 |
+
no_repeat_ngram_size=3
|
357 |
+
)
|
358 |
|
359 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
360 |
+
|
361 |
+
# Parse response for event detection and summary
|
362 |
+
event_detected = False
|
363 |
+
summary = ""
|
364 |
+
|
365 |
+
if "Event:" in response:
|
366 |
+
event_line = response.split("Event:")[1].split("\n")[0].strip()
|
367 |
+
event_detected = "YES" in event_line.upper()
|
368 |
|
369 |
+
if event_detected and "Summary:" in response:
|
|
|
370 |
summary = response.split("Summary:")[1].strip()
|
371 |
+
summary = summary.replace("<s>", "").replace("</s>", "").strip()
|
372 |
else:
|
373 |
+
summary = "Event detected but no summary generated"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
+
event_type = "Событие" if event_detected else "Нет"
|
376 |
+
return event_type, summary
|
377 |
|
378 |
except Exception as e:
|
379 |
logger.error(f"Event detection error: {str(e)}")
|
|
|
383 |
"""Clean up GPU resources"""
|
384 |
try:
|
385 |
self.model = None
|
|
|
|
|
|
|
|
|
386 |
torch.cuda.empty_cache()
|
|
|
387 |
logger.info("Cleaned up GPU resources")
|
388 |
except Exception as e:
|
389 |
logger.error(f"Error in cleanup: {str(e)}")
|