Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

33771c2

1 Parent(s): 23332bc

v.1.16

Browse files

Files changed (1) hide show

app.py +135 -19

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import plotly.graph_objects as go
 import logging
 import io
 from rapidfuzz import fuzz
 def fuzzy_deduplicate(df, column, threshold=55):
     """Deduplicate rows based on fuzzy matching of text content"""
@@ -41,21 +42,49 @@ class ProcessControl:
     def reset(self):
         self.stop_requested = False
 class EventDetector:
     def __init__(self):
-        self.model_name = "google/mt5-small"
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.model_name,
-            legacy=True
-        )
-        self.model = None
-        self.finbert = None
-        self.roberta = None
-        self.finbert_tone = None
-        self.last_gpu_use = 0
-    @spaces.GPU(duration=30)  # Reduced duration
     def initialize_models(self):
         try:
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
@@ -65,29 +94,116 @@ class EventDetector:
             logger.info(f"Initializing models on device: {device}")
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
-            self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=device)
-            self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=device)
-            self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=device)
             self.last_gpu_use = time.time()
             return True
         except Exception as e:
             logger.error(f"Model initialization error: {str(e)}")
-            return False
-    @spaces.GPU(duration=20)  # Reduced duration
     def detect_events(self, text, entity):
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
                 time.sleep(2)
-            # Rest of the method remains the same...
             self.last_gpu_use = time.time()
             return event_type, response
@@ -232,7 +348,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.15")
         with gr.Row():
             file_input = gr.File(

 import logging
 import io
 from rapidfuzz import fuzz
+import time
 def fuzzy_deduplicate(df, column, threshold=55):
     """Deduplicate rows based on fuzzy matching of text content"""
     def reset(self):
         self.stop_requested = False
+class ProcessControl:
+    def __init__(self):
+        self.stop_requested = False
+        self.error = None
+    def request_stop(self):
+        self.stop_requested = True
+    def should_stop(self):
+        return self.stop_requested
+    def reset(self):
+        self.stop_requested = False
+        self.error = None
+    def set_error(self, error):
+        self.error = error
+        self.stop_requested = True
 class EventDetector:
     def __init__(self):
+        try:
+            self.model_name = "google/mt5-small"
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                legacy=True
+            )
+            self.model = None
+            self.finbert = None
+            self.roberta = None
+            self.finbert_tone = None
+            self.last_gpu_use = 0
+            self.initialized = False
+            logger.info("EventDetector initialized successfully")
+        except Exception as e:
+            logger.error(f"Error in EventDetector initialization: {e}")
+            raise
+    @spaces.GPU(duration=30)
     def initialize_models(self):
+        if self.initialized:
+            return True
         try:
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
             logger.info(f"Initializing models on device: {device}")
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
+            # Initialize sentiment models with proper error handling
+            try:
+                self.finbert = pipeline(
+                    "sentiment-analysis",
+                    model="ProsusAI/finbert",
+                    device=device,
+                    truncation=True,
+                    max_length=512
+                )
+            except Exception as e:
+                logger.error(f"Error initializing finbert: {e}")
+                raise
+            try:
+                self.roberta = pipeline(
+                    "sentiment-analysis",
+                    model="cardiffnlp/twitter-roberta-base-sentiment",
+                    device=device,
+                    truncation=True,
+                    max_length=512
+                )
+            except Exception as e:
+                logger.error(f"Error initializing roberta: {e}")
+                raise
+            try:
+                self.finbert_tone = pipeline(
+                    "sentiment-analysis",
+                    model="yiyanghkust/finbert-tone",
+                    device=device,
+                    truncation=True,
+                    max_length=512
+                )
+            except Exception as e:
+                logger.error(f"Error initializing finbert_tone: {e}")
+                raise
             self.last_gpu_use = time.time()
+            self.initialized = True
+            logger.info("All models initialized successfully")
             return True
         except Exception as e:
+            self.initialized = False
             logger.error(f"Model initialization error: {str(e)}")
+            # Clean up any partially initialized models
+            self.cleanup()
+            raise
+    def cleanup(self):
+        """Clean up GPU resources"""
+        try:
+            self.model = None
+            self.finbert = None
+            self.roberta = None
+            self.finbert_tone = None
+            torch.cuda.empty_cache()
+            self.initialized = False
+        except Exception as e:
+            logger.error(f"Error in cleanup: {e}")
+    @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
+            if not self.initialized:
+                if not self.initialize_models():
+                    return "Нет", "Model initialization failed"
             current_time = time.time()
             if current_time - self.last_gpu_use < 2:
                 time.sleep(2)
+            text = text[:500]  # Truncate text
+            prompt = f"""<s>Analyze the following news about {entity}:
+            Text: {text}
+            Task: Identify the main event type and provide a brief summary.</s>"""
+            device = self.model.device
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            ).to(device)
+            outputs = self.model.generate(
+                **inputs,
+                max_length=300,
+                num_return_sequences=1,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id
+            )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Event classification
+            event_type = "Нет"
+            if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
+                event_type = "Отчетность"
+            elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
+                event_type = "РЦБ"
+            elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
+                event_type = "Суд"
             self.last_gpu_use = time.time()
             return event_type, response
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.16")
         with gr.Row():
             file_input = gr.File(