Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 25, 2024

Commit

9cb77e8

1 Parent(s): 0d417b5

v.1.49++

Browse files

Files changed (1) hide show

app.py +50 -67

app.py CHANGED Viewed

@@ -71,16 +71,17 @@ class ProcessControl:
 class EventDetector:
     def __init__(self):
         try:
-            # Initialize models
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            logger.info(f"Initializing models on device: {device}")
-            # Initialize all models
-            self.initialize_models(device)  # Move initialization to separate method
-            self.device = device
-            self.initialized = True
-            logger.info("All models initialized successfully")
         except Exception as e:
             logger.error(f"Error in EventDetector initialization: {str(e)}")
@@ -313,7 +314,7 @@ class EventDetector:
     @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
-        """Rest of the detect_events method remains the same"""
         if not text or not entity:
             return "Нет", "Invalid input"
@@ -323,69 +324,56 @@ class EventDetector:
             if not text or not entity:
                 return "Нет", "Empty input"
-            # First check for keyword matches
-            text_lower = text.lower()
-            keywords = {
-                'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
-                'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
-                'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
-            }
-            # Check keywords first
-            detected_event = None
-            for event_type, terms in keywords.items():
-                if any(term in text_lower for term in terms):
-                    detected_event = event_type
-                    break
-            if detected_event:
-                # Prepare prompt for summary
-                prompt = f"""<s>Summarize this {detected_event} news about {entity}:
 Text: {text}
-Create a brief, factual summary focusing on the main points.
 Format:
-Summary: [2-3 sentence summary]</s>"""
-                # Generate summary
-                inputs = self.tokenizer(
-                    prompt,
-                    return_tensors="pt",
-                    padding=True,
-                    truncation=True,
-                    max_length=512
-                ).to(self.device)
-                outputs = self.model.generate(
-                    **inputs,
-                    max_length=200,
-                    num_return_sequences=1,
-                    do_sample=False,
-                    pad_token_id=self.tokenizer.pad_token_id,
-                    eos_token_id=self.tokenizer.eos_token_id,
-                    no_repeat_ngram_size=3
-                )
-                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-                # Extract summary
-                if "Summary:" in response:
                     summary = response.split("Summary:")[1].strip()
-                    summary = summary.replace('<s>', '').replace('</s>', '').strip()
                 else:
-                    if detected_event == 'Отчетность':
-                        summary = f"Компания {entity} опубликовала финансовые показатели."
-                    elif detected_event == 'РЦБ':
-                        summary = f"Обнаружена информация о ценных бумагах компании {entity}."
-                    elif detected_event == 'Суд':
-                        summary = f"Компания {entity} участвует в судебном разбирательстве."
-                return detected_event, summary
-            return "Нет", "No significant event detected"
         except Exception as e:
             logger.error(f"Event detection error: {str(e)}")
@@ -395,12 +383,7 @@ Summary: [2-3 sentence summary]</s>"""
         """Clean up GPU resources"""
         try:
             self.model = None
-            self.translator = None
-            self.finbert = None
-            self.roberta = None
-            self.finbert_tone = None
             torch.cuda.empty_cache()
-            self.initialized = False
             logger.info("Cleaned up GPU resources")
         except Exception as e:
             logger.error(f"Error in cleanup: {str(e)}")

 class EventDetector:
     def __init__(self):
         try:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Initializing MT5 model on device: {self.device}")
+            # Initialize MT5 model
+            self.model_name = "google/mt5-small"
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                legacy=True
+            )
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
+            logger.info("MT5 model initialized successfully")
         except Exception as e:
             logger.error(f"Error in EventDetector initialization: {str(e)}")
     @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
+        """Detect events in text using MT5 model"""
         if not text or not entity:
             return "Нет", "Invalid input"
             if not text or not entity:
                 return "Нет", "Empty input"
+            # Prepare prompt for event detection and summary
+            prompt = f"""<s>Analyze this news about {entity}:
 Text: {text}
+Is there a significant event? Answer only YES or NO.
+If YES, create a brief, factual summary focusing on key points.
 Format:
+Event: [YES/NO]
+Summary: [2-3 sentence summary if event is YES]</s>"""
+            # Generate response
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            ).to(self.device)
+            outputs = self.model.generate(
+                **inputs,
+                max_length=200,
+                num_return_sequences=1,
+                do_sample=False,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id,
+                no_repeat_ngram_size=3
+            )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Parse response for event detection and summary
+            event_detected = False
+            summary = ""
+            if "Event:" in response:
+                event_line = response.split("Event:")[1].split("\n")[0].strip()
+                event_detected = "YES" in event_line.upper()
+                if event_detected and "Summary:" in response:
                     summary = response.split("Summary:")[1].strip()
+                    summary = summary.replace("<s>", "").replace("</s>", "").strip()
                 else:
+                    summary = "Event detected but no summary generated"
+            event_type = "Событие" if event_detected else "Нет"
+            return event_type, summary
         except Exception as e:
             logger.error(f"Event detection error: {str(e)}")
         """Clean up GPU resources"""
         try:
             self.model = None
             torch.cuda.empty_cache()
             logger.info("Cleaned up GPU resources")
         except Exception as e:
             logger.error(f"Error in cleanup: {str(e)}")