Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 25, 2024

Commit

0a6a508

1 Parent(s): fad358d

v.1.53

Browse files

Files changed (1) hide show

app.py +71 -76

app.py CHANGED Viewed

@@ -174,7 +174,7 @@ class EventDetector:
     @spaces.GPU(duration=20)
     def analyze_sentiment(self, text):
-        """Analyze sentiment of text (should be in English)"""
         try:
             if not text or not isinstance(text, str):
                 return "Neutral"
@@ -183,32 +183,46 @@ class EventDetector:
             if not text:
                 return "Neutral"
-            # Get predictions from all models
             finbert_result = self.finbert(text)[0]
             roberta_result = self.roberta(text)[0]
             finbert_tone_result = self.finbert_tone(text)[0]
-            # Map labels to standard format
             def map_sentiment(result):
                 label = result['label'].lower()
-                if label in ['positive', 'pos', 'positive tone']:
                     return "Positive"
-                elif label in ['negative', 'neg', 'negative tone']:
                     return "Negative"
-                return "Neutral"
-            # Get mapped sentiments
             sentiments = [
                 map_sentiment(finbert_result),
                 map_sentiment(roberta_result),
                 map_sentiment(finbert_tone_result)
             ]
-            # Use majority voting
-            sentiment_counts = pd.Series(sentiments).value_counts()
-            if sentiment_counts.iloc[0] >= 2:
-                return sentiment_counts.index[0]
             return "Neutral"
         except Exception as e:
@@ -313,79 +327,60 @@ class EventDetector:
     @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
-        """Rest of the detect_events method remains the same"""
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
-            text = str(text).strip()
-            entity = str(entity).strip()
-            if not text or not entity:
-                return "Нет", "Empty input"
-            # First check for keyword matches
-            text_lower = text.lower()
-            keywords = {
-                'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
-                'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
-                'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
-            }
-            # Check keywords first
-            detected_event = None
-            for event_type, terms in keywords.items():
-                if any(term in text_lower for term in terms):
-                    detected_event = event_type
-                    break
-            if detected_event:
-                # Prepare prompt for summary
-                prompt = f"""<s>Summarize this {detected_event} news about {entity}:
-Text: {text}
-Create a brief, factual summary focusing on the main points.
-Format:
-Summary: [2-3 sentence summary]</s>"""
-                # Generate summary
-                inputs = self.tokenizer(
-                    prompt,
-                    return_tensors="pt",
-                    padding=True,
-                    truncation=True,
-                    max_length=512
-                ).to(self.device)
-                outputs = self.model.generate(
-                    **inputs,
-                    max_length=200,
-                    num_return_sequences=1,
-                    do_sample=False,
-                    pad_token_id=self.tokenizer.pad_token_id,
-                    eos_token_id=self.tokenizer.eos_token_id,
-                    no_repeat_ngram_size=3
-                )
-                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-                # Extract summary
-                if "Summary:" in response:
-                    summary = response.split("Summary:")[1].strip()
-                    summary = summary.replace('<s>', '').replace('</s>', '').strip()
-                else:
-                    if detected_event == 'Отчетность':
-                        summary = f"Компания {entity} опубликовала финансовые показатели."
-                    elif detected_event == 'РЦБ':
-                        summary = f"Обнаружена информация о ценных бумагах компании {entity}."
-                    elif detected_event == 'Суд':
-                        summary = f"Компания {entity} участвует в судебном разбирательстве."
-                return detected_event, summary
-            return "Нет", "No significant event detected"
         except Exception as e:
             logger.error(f"Event detection error: {str(e)}")
@@ -658,7 +653,7 @@ def create_interface():
         # Create state for file data
         current_file = gr.State(None)
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.51")
         with gr.Row():
             file_input = gr.File(

     @spaces.GPU(duration=20)
     def analyze_sentiment(self, text):
+        """Enhanced sentiment analysis with better negative detection"""
         try:
             if not text or not isinstance(text, str):
                 return "Neutral"
             if not text:
                 return "Neutral"
+            # Get predictions with confidence scores
             finbert_result = self.finbert(text)[0]
             roberta_result = self.roberta(text)[0]
             finbert_tone_result = self.finbert_tone(text)[0]
+            # Enhanced sentiment mapping with confidence thresholds
             def map_sentiment(result):
                 label = result['label'].lower()
+                score = result['score']
+                # Higher threshold for positive to reduce false positives
+                if label in ['positive', 'pos', 'positive tone'] and score > 0.75:
                     return "Positive"
+                # Lower threshold for negative to catch more cases
+                elif label in ['negative', 'neg', 'negative tone'] and score > 0.6:
                     return "Negative"
+                # Consider high-confidence neutral predictions
+                elif label == 'neutral' and score > 0.8:
+                    return "Neutral"
+                # Default to negative for uncertain cases in financial context
+                else:
+                    return "Negative" if score > 0.4 else "Neutral"
+            # Get mapped sentiments with confidence-based logic
             sentiments = [
                 map_sentiment(finbert_result),
                 map_sentiment(roberta_result),
                 map_sentiment(finbert_tone_result)
             ]
+            # Weighted voting - prioritize negative signals
+            if "Negative" in sentiments:
+                neg_count = sentiments.count("Negative")
+                if neg_count >= 1:  # More sensitive to negative sentiment
+                    return "Negative"
+            pos_count = sentiments.count("Positive")
+            if pos_count >= 2:  # Require stronger positive consensus
+                return "Positive"
             return "Neutral"
         except Exception as e:
     @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
         if not text or not entity:
             return "Нет", "Invalid input"
         try:
+            # Improved prompt for MT5
+            prompt = f"""<s>Analyze this news about {entity}:
+    Text: {text}
+    Classify this news into ONE of these categories:
+    1. "Отчетность" if about: financial reports, revenue, profit, EBITDA, financial results, quarterly/annual reports
+    2. "Суд" if about: court cases, lawsuits, arbitration, bankruptcy, legal proceedings
+    3. "РЦБ" if about: bonds, securities, defaults, debt restructuring, coupon payments
+    4. "Нет" if none of the above
+    Provide classification and 2-3 sentence summary focusing on key facts.
+    Format response exactly as:
+    Category: [category name]
+    Summary: [brief factual summary]</s>"""
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            ).to(self.device)
+            outputs = self.model.generate(
+                **inputs,
+                max_length=200,
+                num_return_sequences=1,
+                do_sample=False,
+                temperature=0.7,
+                top_p=0.9,
+                no_repeat_ngram_size=3
+            )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract category and summary
+            if "Category:" in response and "Summary:" in response:
+                parts = response.split("Summary:")
+                category = parts[0].split("Category:")[1].strip()
+                summary = parts[1].strip()
+                # Validate category
+                valid_categories = {"Отчетность", "Суд", "РЦБ", "Нет"}
+                category = category if category in valid_categories else "Нет"
+                return category, summary
+            return "Нет", "Could not classify event"
         except Exception as e:
             logger.error(f"Event detection error: {str(e)}")
         # Create state for file data
         current_file = gr.State(None)
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.53")
         with gr.Row():
             file_input = gr.File(