pentarosarium commited on
Commit
2bf1f83
·
1 Parent(s): 3390451
Files changed (1) hide show
  1. app.py +116 -179
app.py CHANGED
@@ -64,215 +64,152 @@ class ProcessControl:
64
  class EventDetector:
65
  def __init__(self):
66
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  self.model_name = "google/mt5-small"
68
  self.tokenizer = AutoTokenizer.from_pretrained(
69
  self.model_name,
70
  legacy=True
71
  )
72
- self.model = None
73
- self.finbert = None
74
- self.roberta = None
75
- self.finbert_tone = None
76
- self.last_gpu_use = 0
77
- self.initialized = False
78
- logger.info("EventDetector initialized successfully")
79
- except Exception as e:
80
- logger.error(f"Error in EventDetector initialization: {e}")
81
- raise
82
-
83
- def get_sentiment_label(self, result):
84
- """
85
- Convert model output to standardized sentiment label
86
- """
87
- try:
88
- # Handle different model output formats
89
- if isinstance(result, dict):
90
- label = result.get('label', '').lower()
91
- else:
92
- return "Neutral"
93
-
94
- # Map different model outputs to standard labels
95
- if label in ['positive', 'pos', 'positive tone']:
96
- return "Positive"
97
- elif label in ['negative', 'neg', 'negative tone']:
98
- return "Negative"
99
- else:
100
- return "Neutral"
101
-
102
- except Exception as e:
103
- logger.error(f"Error in get_sentiment_label: {e}")
104
- return "Neutral"
105
-
106
- @spaces.GPU(duration=30)
107
- def initialize_models(self):
108
- if self.initialized:
109
- return True
110
-
111
- try:
112
- current_time = time.time()
113
- if current_time - self.last_gpu_use < 2:
114
- time.sleep(2)
115
-
116
- device = "cuda" if torch.cuda.is_available() else "cpu"
117
- logger.info(f"Initializing models on device: {device}")
118
-
119
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
120
-
121
- # Initialize sentiment models with proper error handling
122
- try:
123
- self.finbert = pipeline(
124
- "sentiment-analysis",
125
- model="ProsusAI/finbert",
126
- device=device,
127
- truncation=True,
128
- max_length=512
129
- )
130
- except Exception as e:
131
- logger.error(f"Error initializing finbert: {e}")
132
- raise
133
-
134
- try:
135
- self.roberta = pipeline(
136
- "sentiment-analysis",
137
- model="cardiffnlp/twitter-roberta-base-sentiment",
138
- device=device,
139
- truncation=True,
140
- max_length=512
141
- )
142
- except Exception as e:
143
- logger.error(f"Error initializing roberta: {e}")
144
- raise
145
-
146
- try:
147
- self.finbert_tone = pipeline(
148
- "sentiment-analysis",
149
- model="yiyanghkust/finbert-tone",
150
- device=device,
151
- truncation=True,
152
- max_length=512
153
- )
154
- except Exception as e:
155
- logger.error(f"Error initializing finbert_tone: {e}")
156
- raise
157
 
158
- self.last_gpu_use = time.time()
 
 
159
  self.initialized = True
160
- logger.info("All models initialized successfully")
161
- return True
162
 
163
  except Exception as e:
164
- self.initialized = False
165
- logger.error(f"Model initialization error: {str(e)}")
166
- # Clean up any partially initialized models
167
- self.cleanup()
168
  raise
169
 
170
- @spaces.GPU(duration=20)
171
  def analyze_sentiment(self, text):
172
  try:
173
- if not self.initialized:
174
- if not self.initialize_models():
175
- return "Neutral"
176
-
177
- current_time = time.time()
178
- if current_time - self.last_gpu_use < 2:
179
- time.sleep(2)
180
 
181
- truncated_text = text[:500]
182
- results = []
 
 
183
 
184
- try:
185
- inputs = [truncated_text]
186
- sentiment_results = []
187
-
188
- # Process each model separately with delay
189
- if self.finbert:
190
- finbert_result = self.finbert(inputs, truncation=True, max_length=512)[0]
191
- results.append(self.get_sentiment_label(finbert_result))
192
- time.sleep(0.5)
193
-
194
- if self.roberta:
195
- roberta_result = self.roberta(inputs, truncation=True, max_length=512)[0]
196
- results.append(self.get_sentiment_label(roberta_result))
197
- time.sleep(0.5)
198
-
199
- if self.finbert_tone:
200
- finbert_tone_result = self.finbert_tone(inputs, truncation=True, max_length=512)[0]
201
- results.append(self.get_sentiment_label(finbert_tone_result))
202
-
203
- # Get majority vote
204
- if results:
205
- sentiment_counts = pd.Series(results).value_counts()
206
- final_sentiment = sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
207
- else:
208
- final_sentiment = "Neutral"
209
-
210
- self.last_gpu_use = time.time()
211
- return final_sentiment
212
-
213
- except Exception as e:
214
- logger.error(f"Model inference error: {e}")
215
  return "Neutral"
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  except Exception as e:
218
- logger.error(f"Sentiment analysis error: {e}")
219
  return "Neutral"
220
 
221
- @spaces.GPU(duration=20)
222
  def detect_events(self, text, entity):
223
  if not text or not entity:
224
  return "Нет", "Invalid input"
225
 
226
  try:
227
- if not self.initialized:
228
- if not self.initialize_models():
229
- return "Нет", "Model initialization failed"
230
-
231
- current_time = time.time()
232
- if current_time - self.last_gpu_use < 2:
233
- time.sleep(2)
234
-
235
- text = text[:500] # Truncate text
236
 
237
- prompt = f"""<s>Analyze the following news about {entity}:
238
- Text: {text}
239
- Task: Identify the main event type and provide a brief summary.</s>"""
240
 
241
- device = self.model.device
242
- inputs = self.tokenizer(
243
- prompt,
244
- return_tensors="pt",
245
- padding=True,
246
- truncation=True,
247
- max_length=512
248
- ).to(device)
249
 
250
- outputs = self.model.generate(
251
- **inputs,
252
- max_length=300,
253
- num_return_sequences=1,
254
- pad_token_id=self.tokenizer.pad_token_id,
255
- eos_token_id=self.tokenizer.eos_token_id
256
- )
257
-
258
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- # Event classification
261
- event_type = "Нет"
262
- if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
263
- event_type = "Отчетность"
264
- elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
265
- event_type = "РЦБ"
266
- elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
267
- event_type = "Суд"
268
-
269
- self.last_gpu_use = time.time()
270
- return event_type, response
271
 
272
  except Exception as e:
273
- logger.error(f"Event detection error: {str(e)}")
274
- return "Нет", f"Error: {str(e)}"
275
-
276
  def cleanup(self):
277
  """Clean up GPU resources"""
278
  try:
@@ -423,7 +360,7 @@ def create_interface():
423
  control = ProcessControl()
424
 
425
  with gr.Blocks(theme=gr.themes.Soft()) as app:
426
- gr.Markdown("# AI-анализ мониторинга новостей v.1.21+")
427
 
428
  with gr.Row():
429
  file_input = gr.File(
 
64
  class EventDetector:
65
  def __init__(self):
66
  try:
67
+ # Initialize sentiment models
68
+ self.finbert = pipeline(
69
+ "sentiment-analysis",
70
+ model="ProsusAI/finbert",
71
+ truncation=True,
72
+ max_length=512
73
+ )
74
+ self.roberta = pipeline(
75
+ "sentiment-analysis",
76
+ model="cardiffnlp/twitter-roberta-base-sentiment",
77
+ truncation=True,
78
+ max_length=512
79
+ )
80
+ self.finbert_tone = pipeline(
81
+ "sentiment-analysis",
82
+ model="yiyanghkust/finbert-tone",
83
+ truncation=True,
84
+ max_length=512
85
+ )
86
+
87
+ # Initialize MT5 model for event detection
88
  self.model_name = "google/mt5-small"
89
  self.tokenizer = AutoTokenizer.from_pretrained(
90
  self.model_name,
91
  legacy=True
92
  )
93
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Set device
96
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
97
+ self.model = self.model.to(self.device)
98
  self.initialized = True
99
+ st.success(f"Models initialized successfully on {self.device}")
 
100
 
101
  except Exception as e:
102
+ st.error(f"Error in EventDetector initialization: {str(e)}")
 
 
 
103
  raise
104
 
 
105
  def analyze_sentiment(self, text):
106
  try:
107
+ if not text or not isinstance(text, str):
108
+ return "Neutral"
109
+
110
+ text = text.strip()
111
+ if not text:
112
+ return "Neutral"
 
113
 
114
+ # Get predictions from all models
115
+ finbert_result = self.finbert(text)[0]
116
+ roberta_result = self.roberta(text)[0]
117
+ finbert_tone_result = self.finbert_tone(text)[0]
118
 
119
+ # Map labels to standard format
120
+ def map_sentiment(result):
121
+ label = result['label'].lower()
122
+ if label in ['positive', 'pos', 'positive tone']:
123
+ return "Positive"
124
+ elif label in ['negative', 'neg', 'negative tone']:
125
+ return "Negative"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  return "Neutral"
127
 
128
+ # Get mapped sentiments
129
+ sentiments = [
130
+ map_sentiment(finbert_result),
131
+ map_sentiment(roberta_result),
132
+ map_sentiment(finbert_tone_result)
133
+ ]
134
+
135
+ # Use majority voting
136
+ sentiment_counts = pd.Series(sentiments).value_counts()
137
+ if sentiment_counts.iloc[0] >= 2:
138
+ return sentiment_counts.index[0]
139
+
140
+ return "Neutral"
141
+
142
  except Exception as e:
143
+ st.warning(f"Sentiment analysis error: {str(e)}")
144
  return "Neutral"
145
 
 
146
  def detect_events(self, text, entity):
147
  if not text or not entity:
148
  return "Нет", "Invalid input"
149
 
150
  try:
151
+ text = str(text).strip()
152
+ entity = str(entity).strip()
 
 
 
 
 
 
 
153
 
154
+ if not text or not entity:
155
+ return "Нет", "Empty input"
 
156
 
157
+ # First check for keyword matches
158
+ text_lower = text.lower()
159
+ keywords = {
160
+ 'Отчетность': ['отчет', 'выручка', 'прибыль', 'ebitda', 'финансов', 'результат'],
161
+ 'РЦБ': ['облигаци', 'купон', 'дефолт', 'реструктуризац', 'ценные бумаги'],
162
+ 'Суд': ['суд', 'иск', 'арбитраж', 'разбирательств']
163
+ }
 
164
 
165
+ # Check keywords first
166
+ for event_type, terms in keywords.items():
167
+ if any(term in text_lower for term in terms):
168
+ # Prepare prompt for summary
169
+ prompt = f"""<s>Summarize the following news about {entity}:
170
+
171
+ Text: {text}
172
+
173
+ Task: Create a 2-3 sentence summary focusing on the main {event_type} event.
174
+
175
+ Required format:
176
+ Event: {event_type}
177
+ Summary: [your summary here]</s>"""
178
+
179
+ # Generate summary
180
+ inputs = self.tokenizer(
181
+ prompt,
182
+ return_tensors="pt",
183
+ padding=True,
184
+ truncation=True,
185
+ max_length=512
186
+ ).to(self.device)
187
+
188
+ outputs = self.model.generate(
189
+ **inputs,
190
+ max_length=200,
191
+ num_return_sequences=1,
192
+ do_sample=False,
193
+ pad_token_id=self.tokenizer.pad_token_id,
194
+ eos_token_id=self.tokenizer.eos_token_id
195
+ )
196
+
197
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
198
+
199
+ # Extract summary
200
+ if "Summary:" in response:
201
+ summary = response.split("Summary:")[1].strip()
202
+ else:
203
+ summary = f"Обнаружено событие типа {event_type} для компании {entity}"
204
+
205
+ return event_type, summary
206
 
207
+ # If no keywords matched
208
+ return "Нет", "No significant event detected"
 
 
 
 
 
 
 
 
 
209
 
210
  except Exception as e:
211
+ st.warning(f"Event detection error: {str(e)}")
212
+ return "Нет", f"Error in event detection: {str(e)}"
 
213
  def cleanup(self):
214
  """Clean up GPU resources"""
215
  try:
 
360
  control = ProcessControl()
361
 
362
  with gr.Blocks(theme=gr.themes.Soft()) as app:
363
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.22")
364
 
365
  with gr.Row():
366
  file_input = gr.File(