pentarosarium commited on
Commit
cd6115e
·
1 Parent(s): 2bf1f83
Files changed (1) hide show
  1. app.py +67 -47
app.py CHANGED
@@ -64,22 +64,29 @@ class ProcessControl:
64
  class EventDetector:
65
  def __init__(self):
66
  try:
 
 
 
 
67
  # Initialize sentiment models
68
  self.finbert = pipeline(
69
  "sentiment-analysis",
70
  model="ProsusAI/finbert",
 
71
  truncation=True,
72
  max_length=512
73
  )
74
  self.roberta = pipeline(
75
  "sentiment-analysis",
76
  model="cardiffnlp/twitter-roberta-base-sentiment",
 
77
  truncation=True,
78
  max_length=512
79
  )
80
  self.finbert_tone = pipeline(
81
  "sentiment-analysis",
82
  model="yiyanghkust/finbert-tone",
 
83
  truncation=True,
84
  max_length=512
85
  )
@@ -90,16 +97,14 @@ class EventDetector:
90
  self.model_name,
91
  legacy=True
92
  )
93
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
94
 
95
- # Set device
96
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
97
- self.model = self.model.to(self.device)
98
  self.initialized = True
99
- st.success(f"Models initialized successfully on {self.device}")
100
 
101
  except Exception as e:
102
- st.error(f"Error in EventDetector initialization: {str(e)}")
103
  raise
104
 
105
  def analyze_sentiment(self, text):
@@ -140,7 +145,7 @@ class EventDetector:
140
  return "Neutral"
141
 
142
  except Exception as e:
143
- st.warning(f"Sentiment analysis error: {str(e)}")
144
  return "Neutral"
145
 
146
  def detect_events(self, text, entity):
@@ -157,59 +162,73 @@ class EventDetector:
157
  # First check for keyword matches
158
  text_lower = text.lower()
159
  keywords = {
160
- 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат'],
161
- 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги'],
162
- 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств']
163
  }
164
 
165
  # Check keywords first
 
166
  for event_type, terms in keywords.items():
167
  if any(term in text_lower for term in terms):
168
- # Prepare prompt for summary
169
- prompt = f"""<s>Summarize the following news about {entity}:
 
 
 
 
170
 
171
  Text: {text}
172
 
173
- Task: Create a 2-3 sentence summary focusing on the main {event_type} event.
174
 
175
- Required format:
176
- Event: {event_type}
177
- Summary: [your summary here]</s>"""
178
-
179
- # Generate summary
180
- inputs = self.tokenizer(
181
- prompt,
182
- return_tensors="pt",
183
- padding=True,
184
- truncation=True,
185
- max_length=512
186
- ).to(self.device)
187
-
188
- outputs = self.model.generate(
189
- **inputs,
190
- max_length=200,
191
- num_return_sequences=1,
192
- do_sample=False,
193
- pad_token_id=self.tokenizer.pad_token_id,
194
- eos_token_id=self.tokenizer.eos_token_id
195
- )
196
-
197
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
198
-
199
- # Extract summary
200
- if "Summary:" in response:
201
- summary = response.split("Summary:")[1].strip()
202
- else:
203
- summary = f"Обнаружено событие типа {event_type} для компании {entity}"
204
-
205
- return event_type, summary
 
 
 
 
 
 
 
 
206
 
207
  # If no keywords matched
208
  return "Нет", "No significant event detected"
209
 
210
  except Exception as e:
211
- st.warning(f"Event detection error: {str(e)}")
212
  return "Нет", f"Error in event detection: {str(e)}"
 
213
  def cleanup(self):
214
  """Clean up GPU resources"""
215
  try:
@@ -219,8 +238,9 @@ Summary: [your summary here]</s>"""
219
  self.finbert_tone = None
220
  torch.cuda.empty_cache()
221
  self.initialized = False
 
222
  except Exception as e:
223
- logger.error(f"Error in cleanup: {e}")
224
 
225
  def create_visualizations(df):
226
  if df is None or df.empty:
@@ -360,7 +380,7 @@ def create_interface():
360
  control = ProcessControl()
361
 
362
  with gr.Blocks(theme=gr.themes.Soft()) as app:
363
- gr.Markdown("# AI-анализ мониторинга новостей v.1.22")
364
 
365
  with gr.Row():
366
  file_input = gr.File(
 
64
  class EventDetector:
65
  def __init__(self):
66
  try:
67
+ # Initialize sentiment models with GPU support
68
+ device = "cuda" if torch.cuda.is_available() else "cpu"
69
+ logger.info(f"Initializing models on device: {device}")
70
+
71
  # Initialize sentiment models
72
  self.finbert = pipeline(
73
  "sentiment-analysis",
74
  model="ProsusAI/finbert",
75
+ device=device,
76
  truncation=True,
77
  max_length=512
78
  )
79
  self.roberta = pipeline(
80
  "sentiment-analysis",
81
  model="cardiffnlp/twitter-roberta-base-sentiment",
82
+ device=device,
83
  truncation=True,
84
  max_length=512
85
  )
86
  self.finbert_tone = pipeline(
87
  "sentiment-analysis",
88
  model="yiyanghkust/finbert-tone",
89
+ device=device,
90
  truncation=True,
91
  max_length=512
92
  )
 
97
  self.model_name,
98
  legacy=True
99
  )
100
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
101
 
102
+ self.device = device
 
 
103
  self.initialized = True
104
+ logger.info(f"All models initialized successfully on {device}")
105
 
106
  except Exception as e:
107
+ logger.error(f"Error in EventDetector initialization: {str(e)}")
108
  raise
109
 
110
  def analyze_sentiment(self, text):
 
145
  return "Neutral"
146
 
147
  except Exception as e:
148
+ logger.error(f"Sentiment analysis error: {str(e)}")
149
  return "Neutral"
150
 
151
  def detect_events(self, text, entity):
 
162
  # First check for keyword matches
163
  text_lower = text.lower()
164
  keywords = {
165
+ 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат', 'показател'],
166
+ 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги', 'долг'],
167
+ 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств', 'банкрот']
168
  }
169
 
170
  # Check keywords first
171
+ detected_event = None
172
  for event_type, terms in keywords.items():
173
  if any(term in text_lower for term in terms):
174
+ detected_event = event_type
175
+ break
176
+
177
+ if detected_event:
178
+ # Prepare prompt for summary
179
+ prompt = f"""<s>Summarize this {detected_event} news about {entity}:
180
 
181
  Text: {text}
182
 
183
+ Create a brief, factual summary focusing on the main points.
184
 
185
+ Format:
186
+ Summary: [2-3 sentence summary]</s>"""
187
+
188
+ # Generate summary
189
+ inputs = self.tokenizer(
190
+ prompt,
191
+ return_tensors="pt",
192
+ padding=True,
193
+ truncation=True,
194
+ max_length=512
195
+ ).to(self.device)
196
+
197
+ outputs = self.model.generate(
198
+ **inputs,
199
+ max_length=200,
200
+ num_return_sequences=1,
201
+ do_sample=False,
202
+ pad_token_id=self.tokenizer.pad_token_id,
203
+ eos_token_id=self.tokenizer.eos_token_id,
204
+ no_repeat_ngram_size=3 # Prevent repetition
205
+ )
206
+
207
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
208
+
209
+ # Extract summary
210
+ if "Summary:" in response:
211
+ summary = response.split("Summary:")[1].strip()
212
+ # Clean up any remaining prompt artifacts
213
+ summary = summary.replace('<s>', '').replace('</s>', '').strip()
214
+ else:
215
+ # Create a structured summary based on event type
216
+ if detected_event == 'Отчетность':
217
+ summary = f"Компания {entity} опубликовала финансовые показатели."
218
+ elif detected_event == 'РЦБ':
219
+ summary = f"Обнаружена информация о ценных бумагах компании {entity}."
220
+ elif detected_event == 'Суд':
221
+ summary = f"Компания {entity} участвует в судебном разбирательстве."
222
+
223
+ return detected_event, summary
224
 
225
  # If no keywords matched
226
  return "Нет", "No significant event detected"
227
 
228
  except Exception as e:
229
+ logger.error(f"Event detection error: {str(e)}")
230
  return "Нет", f"Error in event detection: {str(e)}"
231
+
232
  def cleanup(self):
233
  """Clean up GPU resources"""
234
  try:
 
238
  self.finbert_tone = None
239
  torch.cuda.empty_cache()
240
  self.initialized = False
241
+ logger.info("Cleaned up GPU resources")
242
  except Exception as e:
243
+ logger.error(f"Error in cleanup: {str(e)}")
244
 
245
  def create_visualizations(df):
246
  if df is None or df.empty:
 
380
  control = ProcessControl()
381
 
382
  with gr.Blocks(theme=gr.themes.Soft()) as app:
383
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.23")
384
 
385
  with gr.Row():
386
  file_input = gr.File(