Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

cd6115e

1 Parent(s): 2bf1f83

v.1.23

Browse files

Files changed (1) hide show

app.py +67 -47

app.py CHANGED Viewed

@@ -64,22 +64,29 @@ class ProcessControl:
 class EventDetector:
     def __init__(self):
         try:
             # Initialize sentiment models
             self.finbert = pipeline(
                 "sentiment-analysis",
                 model="ProsusAI/finbert",
                 truncation=True,
                 max_length=512
             )
             self.roberta = pipeline(
                 "sentiment-analysis",
                 model="cardiffnlp/twitter-roberta-base-sentiment",
                 truncation=True,
                 max_length=512
             )
             self.finbert_tone = pipeline(
                 "sentiment-analysis",
                 model="yiyanghkust/finbert-tone",
                 truncation=True,
                 max_length=512
             )
@@ -90,16 +97,14 @@ class EventDetector:
                 self.model_name,
                 legacy=True
             )
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
-            # Set device
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
-            self.model = self.model.to(self.device)
             self.initialized = True
-            st.success(f"Models initialized successfully on {self.device}")
         except Exception as e:
-            st.error(f"Error in EventDetector initialization: {str(e)}")
             raise
     def analyze_sentiment(self, text):
@@ -140,7 +145,7 @@ class EventDetector:
             return "Neutral"
         except Exception as e:
-            st.warning(f"Sentiment analysis error: {str(e)}")
             return "Neutral"
     def detect_events(self, text, entity):
@@ -157,59 +162,73 @@ class EventDetector:
             # First check for keyword matches
             text_lower = text.lower()
             keywords = {
-                'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат'],
-                'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги'],
-                'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств']
             }
             # Check keywords first
             for event_type, terms in keywords.items():
                 if any(term in text_lower for term in terms):
-                    # Prepare prompt for summary
-                    prompt = f"""<s>Summarize the following news about {entity}:
 Text: {text}
-Task: Create a 2-3 sentence summary focusing on the main {event_type} event.
-Required format:
-Event: {event_type}
-Summary: [your summary here]</s>"""
-                    # Generate summary
-                    inputs = self.tokenizer(
-                        prompt,
-                        return_tensors="pt",
-                        padding=True,
-                        truncation=True,
-                        max_length=512
-                    ).to(self.device)
-                    outputs = self.model.generate(
-                        **inputs,
-                        max_length=200,
-                        num_return_sequences=1,
-                        do_sample=False,
-                        pad_token_id=self.tokenizer.pad_token_id,
-                        eos_token_id=self.tokenizer.eos_token_id
-                    )
-                    response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-                    # Extract summary
-                    if "Summary:" in response:
-                        summary = response.split("Summary:")[1].strip()
-                    else:
-                        summary = f"Обнаружено событие типа {event_type} для компании {entity}"
-                    return event_type, summary
             # If no keywords matched
             return "Нет", "No significant event detected"
         except Exception as e:
-            st.warning(f"Event detection error: {str(e)}")
             return "Нет", f"Error in event detection: {str(e)}"
     def cleanup(self):
         """Clean up GPU resources"""
         try:
@@ -219,8 +238,9 @@ Summary: [your summary here]</s>"""
             self.finbert_tone = None
             torch.cuda.empty_cache()
             self.initialized = False
         except Exception as e:
-            logger.error(f"Error in cleanup: {e}")
 def create_visualizations(df):
     if df is None or df.empty:
@@ -360,7 +380,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.22")
         with gr.Row():
             file_input = gr.File(

 class EventDetector:
     def __init__(self):
         try:
+            # Initialize sentiment models with GPU support
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Initializing models on device: {device}")
             # Initialize sentiment models
             self.finbert = pipeline(
                 "sentiment-analysis",
                 model="ProsusAI/finbert",
+                device=device,
                 truncation=True,
                 max_length=512
             )
             self.roberta = pipeline(
                 "sentiment-analysis",
                 model="cardiffnlp/twitter-roberta-base-sentiment",
+                device=device,
                 truncation=True,
                 max_length=512
             )
             self.finbert_tone = pipeline(
                 "sentiment-analysis",
                 model="yiyanghkust/finbert-tone",
+                device=device,
                 truncation=True,
                 max_length=512
             )
                 self.model_name,
                 legacy=True
             )
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
+            self.device = device
             self.initialized = True
+            logger.info(f"All models initialized successfully on {device}")
         except Exception as e:
+            logger.error(f"Error in EventDetector initialization: {str(e)}")
             raise
     def analyze_sentiment(self, text):
             return "Neutral"
         except Exception as e:
+            logger.error(f"Sentiment analysis error: {str(e)}")
             return "Neutral"
     def detect_events(self, text, entity):
             # First check for keyword matches
             text_lower = text.lower()
             keywords = {
+                'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
+                'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
+                'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
             }
             # Check keywords first
+            detected_event = None
             for event_type, terms in keywords.items():
                 if any(term in text_lower for term in terms):
+                    detected_event = event_type
+                    break
+            if detected_event:
+                # Prepare prompt for summary
+                prompt = f"""<s>Summarize this {detected_event} news about {entity}:
 Text: {text}
+Create a brief, factual summary focusing on the main points.
+Format:
+Summary: [2-3 sentence summary]</s>"""
+                # Generate summary
+                inputs = self.tokenizer(
+                    prompt,
+                    return_tensors="pt",
+                    padding=True,
+                    truncation=True,
+                    max_length=512
+                ).to(self.device)
+                outputs = self.model.generate(
+                    **inputs,
+                    max_length=200,
+                    num_return_sequences=1,
+                    do_sample=False,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    no_repeat_ngram_size=3  # Prevent repetition
+                )
+                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+                # Extract summary
+                if "Summary:" in response:
+                    summary = response.split("Summary:")[1].strip()
+                    # Clean up any remaining prompt artifacts
+                    summary = summary.replace('<s>', '').replace('</s>', '').strip()
+                else:
+                    # Create a structured summary based on event type
+                    if detected_event == 'Отчетность':
+                        summary = f"Компания {entity} опубликовала финансовые показатели."
+                    elif detected_event == 'РЦБ':
+                        summary = f"Обнаружена информация о ценных бумагах компании {entity}."
+                    elif detected_event == 'Суд':
+                        summary = f"Компания {entity} участвует в судебном разбирательстве."
+                return detected_event, summary
             # If no keywords matched
             return "Нет", "No significant event detected"
         except Exception as e:
+            logger.error(f"Event detection error: {str(e)}")
             return "Нет", f"Error in event detection: {str(e)}"
     def cleanup(self):
         """Clean up GPU resources"""
         try:
             self.finbert_tone = None
             torch.cuda.empty_cache()
             self.initialized = False
+            logger.info("Cleaned up GPU resources")
         except Exception as e:
+            logger.error(f"Error in cleanup: {str(e)}")
 def create_visualizations(df):
     if df is None or df.empty:
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.23")
         with gr.Row():
             file_input = gr.File(