Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

f0111d1

1 Parent(s): ce3b970

v.1.10

Browse files

Files changed (1) hide show

app.py +42 -50

app.py CHANGED Viewed

@@ -12,36 +12,49 @@ logger = logging.getLogger(__name__)
 class EventDetector:
     def __init__(self):
         try:
-            logger.info(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
-            self.model_name = "google/mt5-small"
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
-            self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
-            self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
-            self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
-            logger.info("Models initialized successfully")
         except Exception as e:
-            logger.error(f"Model initialization error: {e}")
-            raise
-    @spaces.GPU(duration=120)
     def detect_events(self, text, entity):
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
             prompt = f"""<s>Analyze the following news about {entity}:
             Text: {text}
             Task: Identify the main event type and provide a brief summary.</s>"""
             inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
-                                  truncation=True, max_length=512).to(self.device)
             outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -57,12 +70,17 @@ class EventDetector:
             return event_type, response
         except Exception as e:
-            logger.error(f"Event detection error: {e}")
             return "Нет", f"Error: {str(e)}"
-    @spaces.GPU(duration=60)
     def analyze_sentiment(self, text):
         try:
             results = []
             texts = [text[:512]]  # Truncate to avoid token length issues
@@ -116,44 +134,27 @@ def create_visualizations(df):
         logger.error(f"Visualization error: {e}")
         return None, None
 def process_file(file_obj):
     try:
-        # Debug print
         logger.info("Starting to read Excel file...")
-        # Read Excel with error details
-        try:
-            df = pd.read_excel(file_obj, sheet_name='Публикации')
-            logger.info(f"Successfully read Excel file. Shape: {df.shape}")
-            logger.info(f"Columns: {df.columns.tolist()}")
-        except Exception as e:
-            logger.error(f"Failed to read Excel file: {str(e)}")
-            raise
         detector = EventDetector()
         processed_rows = []
         total = len(df)
-        current_status = "0%"
-        # Create progress counter
-        progress_text = gr.Textbox.update(
-            value=f"Обработано 0 из {total} строк (0%)"
-        )
         for idx, row in df.iterrows():
             try:
-                # Get text and entity with validation
                 text = str(row.get('Выдержки из текста', ''))
                 if not text.strip():
-                    logger.warning(f"Empty text at row {idx}")
                     continue
                 entity = str(row.get('Объект', ''))
                 if not entity.strip():
-                    logger.warning(f"Empty entity at row {idx}")
                     continue
-                # Process the row
                 event_type, event_summary = detector.detect_events(text, entity)
                 sentiment = detector.analyze_sentiment(text)
@@ -166,25 +167,16 @@ def process_file(file_obj):
                     'Текст': text
                 })
-                # Update progress every 5 rows
-                if idx % 5 == 0 or idx == total - 1:
-                    percentage = round((idx + 1) / total * 100)
-                    if percentage != current_status:
-                        current_status = percentage
-                        logger.info(f"Processed {idx + 1}/{total} rows ({percentage}%)")
             except Exception as e:
                 logger.error(f"Error processing row {idx}: {str(e)}")
                 continue
-        # Create final DataFrame
         result_df = pd.DataFrame(processed_rows)
         logger.info(f"Processing complete. Final DataFrame shape: {result_df.shape}")
-        if result_df.empty:
-            logger.error("No rows were processed successfully")
-            raise ValueError("No data was processed successfully")
         return result_df
     except Exception as e:
@@ -193,7 +185,7 @@ def process_file(file_obj):
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.09")
         with gr.Row():
             file_input = gr.File(

 class EventDetector:
     def __init__(self):
+        self.model_name = "google/mt5-small"
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        # Don't initialize models in __init__
+        self.model = None
+        self.finbert = None
+        self.roberta = None
+        self.finbert_tone = None
+    @spaces.GPU
+    def initialize_models(self):
+        """Initialize all models with GPU support"""
         try:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Initializing models on device: {device}")
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
+            self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=device)
+            self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=device)
+            self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=device)
+            return True
         except Exception as e:
+            logger.error(f"Model initialization error: {str(e)}")
+            return False
+    @spaces.GPU
     def detect_events(self, text, entity):
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
+            # Initialize models if needed
+            if self.model is None:
+                if not self.initialize_models():
+                    return "Нет", "Model initialization failed"
+            device = "cuda" if torch.cuda.is_available() else "cpu"
             prompt = f"""<s>Analyze the following news about {entity}:
             Text: {text}
             Task: Identify the main event type and provide a brief summary.</s>"""
             inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
+                                  truncation=True, max_length=512).to(device)
             outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             return event_type, response
         except Exception as e:
+            logger.error(f"Event detection error: {str(e)}")
             return "Нет", f"Error: {str(e)}"
+    @spaces.GPU
     def analyze_sentiment(self, text):
         try:
+            # Initialize models if needed
+            if self.finbert is None:
+                if not self.initialize_models():
+                    return "Neutral"
             results = []
             texts = [text[:512]]  # Truncate to avoid token length issues
         logger.error(f"Visualization error: {e}")
         return None, None
+@spaces.GPU
 def process_file(file_obj):
     try:
         logger.info("Starting to read Excel file...")
+        df = pd.read_excel(file_obj, sheet_name='Публикации')
+        logger.info(f"Successfully read Excel file. Shape: {df.shape}")
         detector = EventDetector()
         processed_rows = []
         total = len(df)
         for idx, row in df.iterrows():
             try:
                 text = str(row.get('Выдержки из текста', ''))
                 if not text.strip():
                     continue
                 entity = str(row.get('Объект', ''))
                 if not entity.strip():
                     continue
                 event_type, event_summary = detector.detect_events(text, entity)
                 sentiment = detector.analyze_sentiment(text)
                     'Текст': text
                 })
+                if idx % 5 == 0:
+                    logger.info(f"Processed {idx + 1}/{total} rows")
             except Exception as e:
                 logger.error(f"Error processing row {idx}: {str(e)}")
                 continue
         result_df = pd.DataFrame(processed_rows)
         logger.info(f"Processing complete. Final DataFrame shape: {result_df.shape}")
         return result_df
     except Exception as e:
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.10")
         with gr.Row():
             file_input = gr.File(