pentarosarium commited on
Commit
8934356
·
1 Parent(s): b4a7d5f

back 2 async fix

Browse files
Files changed (1) hide show
  1. app.py +815 -180
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import pandas as pd
3
  import torch
4
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModel
@@ -7,15 +8,94 @@ import logging
7
  import io
8
  from rapidfuzz import fuzz
9
  import time
 
 
 
 
 
 
 
 
 
10
  from typing import List, Set, Tuple
11
 
12
- # Configure logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class ProcessControl:
17
  def __init__(self):
18
  self.stop_requested = False
 
19
 
20
  def request_stop(self):
21
  self.stop_requested = True
@@ -25,121 +105,190 @@ class ProcessControl:
25
 
26
  def reset(self):
27
  self.stop_requested = False
 
 
 
 
 
28
 
29
  class EventDetector:
30
  def __init__(self):
31
- """Initialize with device-agnostic setup"""
32
  try:
33
- # Check CUDA availability but default to CPU
34
- self.device = "cpu"
35
- if torch.cuda.is_available():
36
- try:
37
- # Test CUDA availability
38
- torch.tensor([1.0]).cuda()
39
- self.device = "cuda"
40
- except RuntimeError:
41
- logger.warning("CUDA initialization failed, falling back to CPU")
42
-
43
- logger.info(f"Initializing models on device: {self.device}")
44
-
45
- # Initialize models
46
- self.initialize_models()
47
-
48
- # Initialize transformer for text analysis
49
- self.tokenizer_cluster = AutoTokenizer.from_pretrained(
50
- 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
51
- )
52
- self.model_cluster = AutoModel.from_pretrained(
53
- 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
54
- ).to(self.device)
55
 
 
56
  self.initialized = True
57
- logger.info("Models initialized successfully")
58
 
59
  except Exception as e:
60
  logger.error(f"Error in EventDetector initialization: {str(e)}")
61
  raise
62
 
63
- def initialize_models(self):
64
- """Initialize models with CPU fallback"""
65
- try:
66
- # Initialize translation model
67
- self.translator = pipeline(
68
- "translation",
69
- model="Helsinki-NLP/opus-mt-ru-en",
70
- device=self.device
71
- )
72
-
73
- # Initialize sentiment model
74
- self.sentiment = pipeline(
75
- "sentiment-analysis",
76
- model="cardiffnlp/twitter-roberta-base-sentiment",
77
- device=self.device
78
- )
79
 
80
- # Initialize MT5 model
81
- self.model_name = "google/mt5-small"
82
- self.tokenizer = AutoTokenizer.from_pretrained(
83
- self.model_name,
84
- legacy=True
85
- )
86
- self.model = AutoModelForSeq2SeqLM.from_pretrained(
87
- self.model_name
88
- ).to(self.device)
89
-
90
- logger.info("Models loaded successfully")
91
 
92
- except Exception as e:
93
- logger.error(f"Model initialization error: {str(e)}")
94
- raise
 
 
95
 
96
- def process_text(self, text: str, entity: str) -> dict:
97
- """Process text with simplified analysis"""
98
  try:
99
- # Translate text
100
- translated = self._translate_text(text)
101
 
102
- # Analyze sentiment
103
- sentiment = self.analyze_sentiment(translated)
 
104
 
105
- # Detect events
106
- event_type, summary = self.detect_events(text, entity)
107
 
108
- return {
109
- 'translated_text': translated,
110
- 'sentiment': sentiment,
111
- 'event_type': event_type,
112
- 'event_summary': summary
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  except Exception as e:
116
- logger.error(f"Text processing error: {str(e)}")
117
- return {
118
- 'translated_text': '',
119
- 'sentiment': 'Neutral',
120
- 'event_type': 'Нет',
121
- 'event_summary': str(e)
122
- }
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- def _translate_text(self, text: str) -> str:
125
- """Translate text with proper chunking"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  try:
127
- if not isinstance(text, str) or not text.strip():
 
 
 
 
128
  return ""
129
 
130
- # Split into smaller chunks to avoid memory issues
131
  max_length = 450
132
  chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
133
-
134
  translated_chunks = []
 
135
  for chunk in chunks:
136
- try:
137
- result = self.translator(chunk, max_length=512)[0]['translation_text']
138
- translated_chunks.append(result)
139
- time.sleep(0.1) # Rate limiting
140
- except Exception as e:
141
- logger.error(f"Chunk translation error: {str(e)}")
142
- translated_chunks.append(chunk)
143
 
144
  return " ".join(translated_chunks)
145
 
@@ -147,35 +296,179 @@ class EventDetector:
147
  logger.error(f"Translation error: {str(e)}")
148
  return text
149
 
150
- def analyze_sentiment(self, text: str) -> str:
151
- """Simple sentiment analysis"""
 
152
  try:
153
- if not isinstance(text, str) or not text.strip():
 
 
 
 
154
  return "Neutral"
155
 
156
- result = self.sentiment(text[:512])[0] # Limit text length
157
- label = result['label'].lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- if 'positive' in label:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  return "Positive"
161
- elif 'negative' in label:
162
- return "Negative"
163
  return "Neutral"
164
 
165
  except Exception as e:
166
  logger.error(f"Sentiment analysis error: {str(e)}")
167
  return "Neutral"
168
 
169
- def detect_events(self, text: str, entity: str) -> tuple:
170
- """Basic event detection"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  try:
172
- if not text or not entity:
173
- return "Нет", ""
174
 
175
- # Create prompt
176
- prompt = f"<s>News about {entity}: {text[:500]}</s>" # Limit text length
177
 
178
- # Tokenize
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  inputs = self.tokenizer(
180
  prompt,
181
  return_tensors="pt",
@@ -184,50 +477,55 @@ class EventDetector:
184
  max_length=512
185
  ).to(self.device)
186
 
187
- # Generate response
188
  outputs = self.model.generate(
189
  **inputs,
190
- max_length=100,
191
  num_return_sequences=1,
192
- do_sample=False
 
 
 
193
  )
194
 
195
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
196
 
197
- # Simple classification
198
- if any(word in response.lower() for word in ['financial', 'revenue', 'profit']):
199
- return "Отчетность", "Financial report detected"
200
- elif any(word in response.lower() for word in ['court', 'lawsuit']):
201
- return "Суд", "Legal proceedings detected"
202
- elif any(word in response.lower() for word in ['bonds', 'securities']):
203
- return "РЦБ", "Securities-related news detected"
204
-
205
- return "Нет", "No specific event detected"
 
 
 
 
206
 
207
  except Exception as e:
208
  logger.error(f"Event detection error: {str(e)}")
209
- return "Нет", str(e)
210
 
211
  def cleanup(self):
212
- """Clean up resources"""
213
  try:
214
  self.model = None
215
  self.translator = None
216
- self.sentiment = None
217
- if torch.cuda.is_available():
218
- torch.cuda.empty_cache()
 
219
  self.initialized = False
220
- logger.info("Resources cleaned up")
221
  except Exception as e:
222
- logger.error(f"Cleanup error: {str(e)}")
223
 
224
  def create_visualizations(df):
225
- """Create basic visualizations"""
226
  if df is None or df.empty:
227
  return None, None
228
 
229
  try:
230
- # Sentiment distribution
231
  sentiments = df['Sentiment'].value_counts()
232
  fig_sentiment = go.Figure(data=[go.Pie(
233
  labels=sentiments.index,
@@ -236,7 +534,6 @@ def create_visualizations(df):
236
  )])
237
  fig_sentiment.update_layout(title="Распределение тональности")
238
 
239
- # Event distribution
240
  events = df['Event_Type'].value_counts()
241
  fig_events = go.Figure(data=[go.Bar(
242
  x=events.index,
@@ -248,92 +545,430 @@ def create_visualizations(df):
248
  return fig_sentiment, fig_events
249
 
250
  except Exception as e:
251
- logger.error(f"Visualization error: {str(e)}")
252
  return None, None
 
253
 
254
- def process_file(file, progress=gr.Progress()):
255
- """Process file with progress updates"""
256
  try:
257
- if file is None:
258
- return None, None, None, "Пожалуйста, загрузите файл"
259
-
260
- # Read Excel file
261
- df = pd.read_excel(file.name)
262
 
263
- # Initialize detector
264
- detector = EventDetector()
 
 
265
 
 
266
  processed_rows = []
267
  total = len(df)
268
 
269
- # Process rows with progress updates
270
- for idx, row in enumerate(progress.tqdm(df.iterrows(), total=total)):
271
- text = str(row[1].get('Выдержки из текста', '')).strip()
272
- entity = str(row[1].get('Объект', '')).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- if text and entity:
275
- results = detector.process_text(text, entity)
276
- processed_rows.append({
277
- 'Объект': entity,
278
- 'Заголовок': str(row[1].get('Заголовок', '')),
279
- 'Sentiment': results['sentiment'],
280
- 'Event_Type': results['event_type'],
281
- 'Event_Summary': results['event_summary'],
282
- 'Текст': text[:1000]
283
- })
284
 
285
- # Create final dataframe
286
- final_df = pd.DataFrame(processed_rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- # Generate visualizations
289
- fig_sentiment, fig_events = create_visualizations(final_df)
290
 
291
- return final_df, fig_sentiment, fig_events, "Обработка завершена!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
- except Exception as e:
294
- error_msg = f"Ошибка анализа: {str(e)}"
295
- logger.error(error_msg)
296
- return None, None, None, error_msg
297
 
298
- finally:
299
- if 'detector' in locals():
300
- detector.cleanup()
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  def create_interface():
303
- """Create Gradio interface"""
304
- with gr.Blocks() as app:
305
- gr.Markdown("# AI-анализ мониторинга новостей v.2.0")
 
 
 
 
306
 
307
  with gr.Row():
308
  file_input = gr.File(
309
  label="Загрузите Excel файл",
310
- file_types=[".xlsx"]
 
311
  )
312
 
313
- progress = gr.Textbox(
314
- label="Статус обработки",
315
- value="Ожидание файла..."
316
- )
 
 
 
 
 
 
 
 
 
317
 
318
- stats = gr.DataFrame(label="Результаты анализа")
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
  with gr.Row():
321
- sentiment_plot = gr.Plot(label="Распределение тональности")
322
- events_plot = gr.Plot(label="Распределение событий")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- analyze_btn = gr.Button("Начать анализ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
 
326
  analyze_btn.click(
327
- fn=process_file,
328
  inputs=[file_input],
329
- outputs=[stats, sentiment_plot, events_plot, progress]
 
 
 
 
 
 
 
330
  )
331
-
332
  return app
333
 
334
  if __name__ == "__main__":
335
  app = create_interface()
336
- app.launch(
337
- server_name="0.0.0.0",
338
- share=False
339
- )
 
1
  import gradio as gr
2
+ import spaces
3
  import pandas as pd
4
  import torch
5
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModel
 
8
  import io
9
  from rapidfuzz import fuzz
10
  import time
11
+ import os
12
+ groq_key = os.environ['groq_key']
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain.prompts import PromptTemplate
15
+ from openpyxl import load_workbook
16
+ from openpyxl.utils.dataframe import dataframe_to_rows
17
+ import torch.nn.functional as F
18
+ import numpy as np
19
+ import logging
20
  from typing import List, Set, Tuple
21
 
22
+ def fuzzy_deduplicate(df, column, threshold=55):
23
+ """Deduplicate rows based on fuzzy matching of text content"""
24
+ seen_texts = []
25
+ indices_to_keep = []
26
+
27
+ for i, text in enumerate(df[column]):
28
+ if pd.isna(text):
29
+ indices_to_keep.append(i)
30
+ continue
31
+
32
+ text = str(text)
33
+ if not seen_texts or all(fuzz.ratio(text, seen) < threshold for seen in seen_texts):
34
+ seen_texts.append(text)
35
+ indices_to_keep.append(i)
36
+
37
+ return df.iloc[indices_to_keep]
38
+
39
  logging.basicConfig(level=logging.INFO)
40
  logger = logging.getLogger(__name__)
41
 
42
+
43
+ class GPUTaskManager:
44
+ def __init__(self, max_retries=3, retry_delay=30, cleanup_callback=None):
45
+ self.max_retries = max_retries
46
+ self.retry_delay = retry_delay
47
+ self.cleanup_callback = cleanup_callback
48
+
49
+ async def run_with_retry(self, task_func, *args, **kwargs):
50
+ """Execute a GPU task with retry logic"""
51
+ for attempt in range(self.max_retries):
52
+ try:
53
+ return await task_func(*args, **kwargs)
54
+ except Exception as e:
55
+ if "GPU task aborted" in str(e) or "GPU quota" in str(e):
56
+ if attempt < self.max_retries - 1:
57
+ if self.cleanup_callback:
58
+ self.cleanup_callback()
59
+ torch.cuda.empty_cache()
60
+ await asyncio.sleep(self.retry_delay)
61
+ continue
62
+ raise
63
+
64
+ @staticmethod
65
+ def batch_process(items, batch_size=3):
66
+ """Split items into smaller batches"""
67
+ return [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
68
+
69
+ @staticmethod
70
+ def is_gpu_error(error):
71
+ """Check if an error is GPU-related"""
72
+ error_msg = str(error).lower()
73
+ return any(msg in error_msg for msg in [
74
+ "gpu task aborted",
75
+ "gpu quota",
76
+ "cuda out of memory",
77
+ "device-side assert"
78
+ ])
79
+
80
+
81
+
82
+ class ProcessControl:
83
+ def __init__(self):
84
+ self.stop_requested = False
85
+
86
+ def request_stop(self):
87
+ self.stop_requested = True
88
+
89
+ def should_stop(self):
90
+ return self.stop_requested
91
+
92
+ def reset(self):
93
+ self.stop_requested = False
94
+
95
  class ProcessControl:
96
  def __init__(self):
97
  self.stop_requested = False
98
+ self.error = None
99
 
100
  def request_stop(self):
101
  self.stop_requested = True
 
105
 
106
  def reset(self):
107
  self.stop_requested = False
108
+ self.error = None
109
+
110
+ def set_error(self, error):
111
+ self.error = error
112
+ self.stop_requested = True
113
 
114
  class EventDetector:
115
  def __init__(self):
 
116
  try:
117
+ device = "cuda" if torch.cuda.is_available() else "cpu"
118
+ logger.info(f"Initializing models on device: {device}")
119
+
120
+ # Initialize all models
121
+ self.initialize_models(device)
122
+
123
+ # Initialize transformer for declusterization
124
+ self.tokenizer_cluster = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
125
+ self.model_cluster = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2').to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ self.device = device
128
  self.initialized = True
129
+ logger.info("All models initialized successfully")
130
 
131
  except Exception as e:
132
  logger.error(f"Error in EventDetector initialization: {str(e)}")
133
  raise
134
 
135
+ def mean_pooling(self, model_output, attention_mask):
136
+ token_embeddings = model_output[0]
137
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
138
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ def encode_text(self, text):
141
+ if pd.isna(text):
142
+ text = ""
143
+ text = str(text)
 
 
 
 
 
 
 
144
 
145
+ encoded_input = self.tokenizer_cluster(text, padding=True, truncation=True, max_length=512, return_tensors='pt').to(self.device)
146
+ with torch.no_grad():
147
+ model_output = self.model_cluster(**encoded_input)
148
+ sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask'])
149
+ return torch.nn.functional.normalize(sentence_embeddings[0], p=2, dim=0)
150
 
151
+ @spaces.GPU(duration=20)
152
+ def decluster_texts(self, df, text_column, similarity_threshold=0.75, time_threshold=24):
153
  try:
154
+ if df.empty:
155
+ return df
156
 
157
+ # Sort by datetime if available
158
+ if 'datetime' in df.columns:
159
+ df = df.sort_values('datetime')
160
 
161
+ # Initialize lists and sets for tracking
162
+ indices_to_delete = set()
163
 
164
+ # Process each text
165
+ for i in df.index:
166
+ if i in indices_to_delete: # Skip if already marked for deletion
167
+ continue
168
+
169
+ text1 = df.loc[i, text_column]
170
+ if pd.isna(text1):
171
+ continue
172
+
173
+ text1_embedding = self.encode_text(text1)
174
+ current_cluster = []
175
+
176
+ # Compare with other texts
177
+ for j in df.index:
178
+ if i == j or j in indices_to_delete: # Skip same text or already marked
179
+ continue
180
+
181
+ text2 = df.loc[j, text_column]
182
+ if pd.isna(text2):
183
+ continue
184
+
185
+ # Check time difference if datetime available
186
+ if 'datetime' in df.columns:
187
+ time_diff = pd.to_datetime(df.loc[j, 'datetime']) - pd.to_datetime(df.loc[i, 'datetime'])
188
+ if abs(time_diff.total_seconds() / 3600) > time_threshold:
189
+ continue
190
+
191
+ text2_embedding = self.encode_text(text2)
192
+ similarity = torch.dot(text1_embedding, text2_embedding).item()
193
+
194
+ if similarity >= similarity_threshold:
195
+ current_cluster.append(j)
196
+
197
+ # If we found similar texts, keep the longest one
198
+ if current_cluster:
199
+ current_cluster.append(i) # Add the current text to cluster
200
+ text_lengths = df.loc[current_cluster, text_column].fillna('').str.len()
201
+ longest_text_idx = text_lengths.idxmax()
202
+
203
+ # Mark all except longest for deletion
204
+ indices_to_delete.update(set(current_cluster) - {longest_text_idx})
205
+
206
+ # Return DataFrame without deleted rows
207
+ return df.drop(index=list(indices_to_delete))
208
 
209
  except Exception as e:
210
+ logger.error(f"Declusterization error: {str(e)}")
211
+ return df
212
+
213
+ @spaces.GPU(duration=30)
214
+ def initialize_models(self, device):
215
+ """Initialize all models with GPU support"""
216
+ # Initialize translation model
217
+ self.translator = pipeline(
218
+ "translation",
219
+ model="Helsinki-NLP/opus-mt-ru-en",
220
+ device=device
221
+ )
222
+
223
+ self.rutranslator = pipeline(
224
+ "translation",
225
+ model="Helsinki-NLP/opus-mt-en-ru",
226
+ device=device
227
+ )
228
 
229
+ # Initialize sentiment models
230
+ self.finbert = pipeline(
231
+ "sentiment-analysis",
232
+ model="ProsusAI/finbert",
233
+ device=device,
234
+ truncation=True,
235
+ max_length=512
236
+ )
237
+ self.roberta = pipeline(
238
+ "sentiment-analysis",
239
+ model="cardiffnlp/twitter-roberta-base-sentiment",
240
+ device=device,
241
+ truncation=True,
242
+ max_length=512
243
+ )
244
+ self.finbert_tone = pipeline(
245
+ "sentiment-analysis",
246
+ model="yiyanghkust/finbert-tone",
247
+ device=device,
248
+ truncation=True,
249
+ max_length=512
250
+ )
251
+
252
+ # Initialize MT5 model
253
+ self.model_name = "google/mt5-small"
254
+ self.tokenizer = AutoTokenizer.from_pretrained(
255
+ self.model_name,
256
+ legacy=True
257
+ )
258
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
259
+
260
+ # Initialize Groq
261
+ if 'groq_key':
262
+ self.groq = ChatOpenAI(
263
+ base_url="https://api.groq.com/openai/v1",
264
+ model="llama-3.1-70b-versatile",
265
+ openai_api_key=groq_key,
266
+ temperature=0.0
267
+ )
268
+ else:
269
+ logger.warning("Groq API key not found, impact estimation will be limited")
270
+ self.groq = None
271
+
272
+ @spaces.GPU(duration=20)
273
+ def _translate_text(self, text):
274
+ """Translate Russian text to English"""
275
  try:
276
+ if not text or not isinstance(text, str):
277
+ return ""
278
+
279
+ text = text.strip()
280
+ if not text:
281
  return ""
282
 
283
+ # Split into manageable chunks
284
  max_length = 450
285
  chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
 
286
  translated_chunks = []
287
+
288
  for chunk in chunks:
289
+ result = self.translator(chunk)[0]['translation_text']
290
+ translated_chunks.append(result)
291
+ time.sleep(0.1) # Rate limiting
 
 
 
 
292
 
293
  return " ".join(translated_chunks)
294
 
 
296
  logger.error(f"Translation error: {str(e)}")
297
  return text
298
 
299
+ @spaces.GPU(duration=20)
300
+ def analyze_sentiment(self, text):
301
+ """Enhanced sentiment analysis with better negative detection"""
302
  try:
303
+ if not text or not isinstance(text, str):
304
+ return "Neutral"
305
+
306
+ text = text.strip()
307
+ if not text:
308
  return "Neutral"
309
 
310
+ # Get predictions with confidence scores
311
+ finbert_result = self.finbert(text)[0]
312
+ roberta_result = self.roberta(text)[0]
313
+ finbert_tone_result = self.finbert_tone(text)[0]
314
+
315
+ # Enhanced sentiment mapping with confidence thresholds
316
+ def map_sentiment(result):
317
+ label = result['label'].lower()
318
+ score = result['score']
319
+
320
+ # Higher threshold for positive to reduce false positives
321
+ if label in ['positive', 'pos', 'positive tone'] and score > 0.75:
322
+ return "Positive"
323
+ # Lower threshold for negative to catch more cases
324
+ elif label in ['negative', 'neg', 'negative tone'] and score > 0.75:
325
+ return "Negative"
326
+ # Consider high-confidence neutral predictions
327
+ elif label == 'neutral' and score > 0.8:
328
+ return "Neutral"
329
+ # Default to negative for uncertain cases in financial context
330
+ else:
331
+ return "Negative" if score > 0.4 else "Neutral"
332
 
333
+ # Get mapped sentiments with confidence-based logic
334
+ sentiments = [
335
+ map_sentiment(finbert_result),
336
+ map_sentiment(roberta_result),
337
+ map_sentiment(finbert_tone_result)
338
+ ]
339
+
340
+ # Weighted voting - prioritize negative signals
341
+ if "Negative" in sentiments:
342
+ neg_count = sentiments.count("Negative")
343
+ if neg_count >= 2: # negative should be consensus
344
+ return "Negative"
345
+
346
+ pos_count = sentiments.count("Positive")
347
+ if pos_count >= 2: # Require stronger positive consensus
348
  return "Positive"
349
+
 
350
  return "Neutral"
351
 
352
  except Exception as e:
353
  logger.error(f"Sentiment analysis error: {str(e)}")
354
  return "Neutral"
355
 
356
+ def estimate_impact(self, text, entity):
357
+ """Estimate impact using Groq for negative sentiment texts"""
358
+ try:
359
+ if not self.groq:
360
+ return "Неопределенный эффект", "Groq API недоступен"
361
+
362
+ template = """
363
+ You are a financial analyst. Analyze this news about {entity} and assess its potential impact.
364
+
365
+ News: {news}
366
+
367
+ Classify the impact into one of these categories:
368
+ 1. "Значительный риск убытков" (Significant loss risk)
369
+ 2. "Умеренный риск убытков" (Moderate loss risk)
370
+ 3. "Незначительный риск убытков" (Minor loss risk)
371
+ 4. "Вероятность прибыли" (Potential profit)
372
+ 5. "Неопределенный эффект" (Uncertain effect)
373
+
374
+ Format your response exactly as:
375
+ Impact: [category]
376
+ Reasoning: [explanation in 2-3 sentences]
377
+ """
378
+
379
+ prompt = PromptTemplate(template=template, input_variables=["entity", "news"])
380
+ chain = prompt | self.groq
381
+
382
+ response = chain.invoke({
383
+ "entity": entity,
384
+ "news": text
385
+ })
386
+
387
+ # Parse response
388
+ response_text = response.content if hasattr(response, 'content') else str(response)
389
+
390
+ if "Impact:" in response_text and "Reasoning:" in response_text:
391
+ parts = response_text.split("Reasoning:")
392
+ impact = parts[0].split("Impact:")[1].strip()
393
+ reasoning = parts[1].strip()
394
+ else:
395
+ impact = "Неопределенный эффект"
396
+ reasoning = "Не удалось определить влияние"
397
+
398
+ return impact, reasoning
399
+
400
+ except Exception as e:
401
+ logger.error(f"Impact estimation error: {str(e)}")
402
+ return "Неопределенный эффект", f"Ошибка анализа: {str(e)}"
403
+
404
+
405
+ @spaces.GPU(duration=60)
406
+ def process_text(self, text, entity):
407
+ """Process text with Groq-driven sentiment analysis"""
408
  try:
409
+ translated_text = self._translate_text(text)
410
+ initial_sentiment = self.analyze_sentiment(translated_text)
411
 
412
+ impact = "Неопределенный эффект"
413
+ reasoning = ""
414
 
415
+ # Always get Groq analysis for all texts
416
+ impact, reasoning = self.estimate_impact(translated_text, entity)
417
+ reasoning = self.rutranslator(reasoning)[0]['translation_text']
418
+
419
+ # Override sentiment based on Groq impact
420
+ final_sentiment = initial_sentiment
421
+ if impact == "Вероятность прибыли":
422
+ final_sentiment = "Positive"
423
+
424
+ event_type, event_summary = self.detect_events(text, entity)
425
+
426
+ return {
427
+ 'translated_text': translated_text,
428
+ 'sentiment': final_sentiment,
429
+ 'impact': impact,
430
+ 'reasoning': reasoning,
431
+ 'event_type': event_type,
432
+ 'event_summary': event_summary
433
+ }
434
+
435
+ except Exception as e:
436
+ logger.error(f"Text processing error: {str(e)}")
437
+ return {
438
+ 'translated_text': '',
439
+ 'sentiment': 'Neutral',
440
+ 'impact': 'Неопределенный эффект',
441
+ 'reasoning': f'Ошибка обработки: {str(e)}',
442
+ 'event_type': 'Нет',
443
+ 'event_summary': ''
444
+ }
445
+
446
+
447
+
448
+
449
+ @spaces.GPU(duration=20)
450
+ def detect_events(self, text, entity):
451
+ if not text or not entity:
452
+ return "Нет", "Invalid input"
453
+
454
+ try:
455
+ # Improved prompt for MT5
456
+ prompt = f"""<s>Analyze this news about {entity}:
457
+
458
+ Text: {text}
459
+
460
+ Classify this news into ONE of these categories:
461
+ 1. "Отчетность" if about: financial reports, revenue, profit, EBITDA, financial results, quarterly/annual reports
462
+ 2. "Суд" if about: court cases, lawsuits, arbitration, bankruptcy, legal proceedings
463
+ 3. "РЦБ" if about: bonds, securities, defaults, debt restructuring, coupon payments
464
+ 4. "Нет" if none of the above
465
+
466
+ Provide classification and 2-3 sentence summary focusing on key facts.
467
+
468
+ Format response exactly as:
469
+ Category: [category name]
470
+ Summary: [brief factual summary]</s>"""
471
+
472
  inputs = self.tokenizer(
473
  prompt,
474
  return_tensors="pt",
 
477
  max_length=512
478
  ).to(self.device)
479
 
 
480
  outputs = self.model.generate(
481
  **inputs,
482
+ max_length=200,
483
  num_return_sequences=1,
484
+ do_sample=False,
485
+ temperature=0.7,
486
+ top_p=0.9,
487
+ no_repeat_ngram_size=3
488
  )
489
 
490
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
491
 
492
+ # Extract category and summary
493
+ if "Category:" in response and "Summary:" in response:
494
+ parts = response.split("Summary:")
495
+ category = parts[0].split("Category:")[1].strip()
496
+ summary = parts[1].strip()
497
+
498
+ # Validate category
499
+ valid_categories = {"Отчетность", "Суд", "РЦБ", "Нет"}
500
+ category = category if category in valid_categories else "Нет"
501
+
502
+ return category, summary
503
+
504
+ return "Нет", "Could not classify event"
505
 
506
  except Exception as e:
507
  logger.error(f"Event detection error: {str(e)}")
508
+ return "Нет", f"Error in event detection: {str(e)}"
509
 
510
  def cleanup(self):
511
+ """Clean up GPU resources"""
512
  try:
513
  self.model = None
514
  self.translator = None
515
+ self.finbert = None
516
+ self.roberta = None
517
+ self.finbert_tone = None
518
+ torch.cuda.empty_cache()
519
  self.initialized = False
520
+ logger.info("Cleaned up GPU resources")
521
  except Exception as e:
522
+ logger.error(f"Error in cleanup: {str(e)}")
523
 
524
  def create_visualizations(df):
 
525
  if df is None or df.empty:
526
  return None, None
527
 
528
  try:
 
529
  sentiments = df['Sentiment'].value_counts()
530
  fig_sentiment = go.Figure(data=[go.Pie(
531
  labels=sentiments.index,
 
534
  )])
535
  fig_sentiment.update_layout(title="Распределение тональности")
536
 
 
537
  events = df['Event_Type'].value_counts()
538
  fig_events = go.Figure(data=[go.Bar(
539
  x=events.index,
 
545
  return fig_sentiment, fig_events
546
 
547
  except Exception as e:
548
+ logger.error(f"Visualization error: {e}")
549
  return None, None
550
+
551
 
552
+ @spaces.GPU
553
+ def process_file(file_obj):
554
  try:
555
+ logger.info("Starting to read Excel file...")
556
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
557
+ logger.info(f"Successfully read Excel file. Shape: {df.shape}")
 
 
558
 
559
+ # Deduplication
560
+ original_count = len(df)
561
+ df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
562
+ logger.info(f"Removed {original_count - len(df)} duplicate entries")
563
 
564
+ detector = EventDetector()
565
  processed_rows = []
566
  total = len(df)
567
 
568
+ # Process in smaller batches with quota management
569
+ BATCH_SIZE = 3 # Reduced batch size
570
+ QUOTA_WAIT_TIME = 60 # Wait time when quota is exceeded
571
+
572
+ for batch_start in range(0, total, BATCH_SIZE):
573
+ try:
574
+ batch_end = min(batch_start + BATCH_SIZE, total)
575
+ batch = df.iloc[batch_start:batch_end]
576
+
577
+ # Initialize models for batch
578
+ if not detector.initialized:
579
+ detector.initialize_models()
580
+ time.sleep(1) # Wait after initialization
581
+
582
+ for idx, row in batch.iterrows():
583
+ try:
584
+ text = str(row.get('Выдержки из текста', ''))
585
+ if not text.strip():
586
+ continue
587
+
588
+ entity = str(row.get('Объект', ''))
589
+ if not entity.strip():
590
+ continue
591
+
592
+ # Process with GPU quota management
593
+ event_type = "Нет"
594
+ event_summary = ""
595
+ sentiment = "Neutral"
596
+
597
+ try:
598
+ event_type, event_summary = detector.detect_events(text, entity)
599
+ time.sleep(1) # Wait between GPU operations
600
+ sentiment = detector.analyze_sentiment(text)
601
+ except Exception as e:
602
+ if "GPU quota" in str(e):
603
+ logger.warning("GPU quota exceeded, waiting...")
604
+ time.sleep(QUOTA_WAIT_TIME)
605
+ continue
606
+ else:
607
+ raise e
608
+
609
+ processed_rows.append({
610
+ 'Объект': entity,
611
+ 'Заголовок': str(row.get('Заголовок', '')),
612
+ 'Sentiment': sentiment,
613
+ 'Event_Type': event_type,
614
+ 'Event_Summary': event_summary,
615
+ 'Текст': text[:1000]
616
+ })
617
+
618
+ logger.info(f"Processed {idx + 1}/{total} rows")
619
+
620
+ except Exception as e:
621
+ logger.error(f"Error processing row {idx}: {str(e)}")
622
+ continue
623
+
624
+ # Create intermediate results
625
+ if processed_rows:
626
+ intermediate_df = pd.DataFrame(processed_rows)
627
+ yield (
628
+ intermediate_df,
629
+ None,
630
+ None,
631
+ f"Обработано {len(processed_rows)}/{total} строк"
632
+ )
633
+
634
+ # Wait between batches
635
+ time.sleep(2)
636
+
637
+ # Cleanup GPU resources after each batch
638
+ torch.cuda.empty_cache()
639
+
640
+ except Exception as e:
641
+ logger.error(f"Batch processing error: {str(e)}")
642
+ if "GPU quota" in str(e):
643
+ time.sleep(QUOTA_WAIT_TIME)
644
+ continue
645
+
646
+ # Final results
647
+ if processed_rows:
648
+ result_df = pd.DataFrame(processed_rows)
649
+ fig_sentiment, fig_events = create_visualizations(result_df)
650
+ return result_df, fig_sentiment, fig_events, "Обработка завершена!"
651
+ else:
652
+ return None, None, None, "Нет обработанных данных"
653
 
654
+ except Exception as e:
655
+ logger.error(f"File processing error: {str(e)}")
656
+ raise
657
+
658
+ def create_output_file(df, uploaded_file):
659
+ """Create Excel file with multiple sheets from processed DataFrame"""
660
+ try:
661
+ wb = load_workbook("sample_file.xlsx")
 
 
662
 
663
+ # 1. Update 'Публикации' sheet
664
+ ws = wb['Публикации']
665
+ for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), start=1):
666
+ for c_idx, value in enumerate(row, start=1):
667
+ ws.cell(row=r_idx, column=c_idx, value=value)
668
+
669
+ # 2. Update 'Мониторинг' sheet with events
670
+ ws = wb['Мониторинг']
671
+ row_idx = 4
672
+ events_df = df[df['Event_Type'] != 'Нет'].copy()
673
+ for _, row in events_df.iterrows():
674
+ ws.cell(row=row_idx, column=5, value=row['Объект'])
675
+ ws.cell(row=row_idx, column=6, value=row['Заголовок'])
676
+ ws.cell(row=row_idx, column=7, value=row['Event_Type'])
677
+ ws.cell(row=row_idx, column=8, value=row['Event_Summary'])
678
+ ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
679
+ row_idx += 1
680
+
681
+ # 3. Update 'Сводка' sheet
682
+ ws = wb['Сводка']
683
+ unique_entities = df['Объект'].unique()
684
+ entity_stats = []
685
+ for entity in unique_entities:
686
+ entity_df = df[df['Объект'] == entity]
687
+ stats = {
688
+ 'Объект': entity,
689
+ 'Всего': len(entity_df),
690
+ 'Негативные': len(entity_df[entity_df['Sentiment'] == 'Negative']),
691
+ 'Позитивные': len(entity_df[entity_df['Sentiment'] == 'Positive'])
692
+ }
693
+
694
+ # Get most severe impact for entity
695
+ negative_df = entity_df[entity_df['Sentiment'] == 'Negative']
696
+ if len(negative_df) > 0:
697
+ impacts = negative_df['Impact'].dropna()
698
+ if len(impacts) > 0:
699
+ stats['Impact'] = impacts.iloc[0]
700
+ else:
701
+ stats['Impact'] = 'Неопределенный эффект'
702
+ else:
703
+ stats['Impact'] = 'Неопределенный эффект'
704
+
705
+ entity_stats.append(stats)
706
 
707
+ # Sort by number of negative mentions
708
+ entity_stats = sorted(entity_stats, key=lambda x: x['Негативные'], reverse=True)
709
 
710
+ # Write to sheet
711
+ row_idx = 4 # Starting row in Сводка sheet
712
+ for stats in entity_stats:
713
+ ws.cell(row=row_idx, column=5, value=stats['Объект'])
714
+ ws.cell(row=row_idx, column=6, value=stats['Всего'])
715
+ ws.cell(row=row_idx, column=7, value=stats['Негативные'])
716
+ ws.cell(row=row_idx, column=8, value=stats['Позитивные'])
717
+ ws.cell(row=row_idx, column=9, value=stats['Impact'])
718
+ row_idx += 1
719
+
720
+ # 4. Update 'Значимые' sheet
721
+ ws = wb['Значимые']
722
+ row_idx = 3
723
+ sentiment_df = df[df['Sentiment'].isin(['Negative', 'Positive'])].copy()
724
+ for _, row in sentiment_df.iterrows():
725
+ ws.cell(row=row_idx, column=3, value=row['Объек��'])
726
+ ws.cell(row=row_idx, column=4, value='релевантно')
727
+ ws.cell(row=row_idx, column=5, value=row['Sentiment'])
728
+ ws.cell(row=row_idx, column=6, value=row.get('Impact', '-'))
729
+ ws.cell(row=row_idx, column=7, value=row['Заголовок'])
730
+ ws.cell(row=row_idx, column=8, value=row['Выдержки из текста'])
731
+ row_idx += 1
732
+
733
+ # 5. Update 'Анализ' sheet
734
+ ws = wb['Анализ']
735
+ row_idx = 4
736
+ negative_df = df[df['Sentiment'] == 'Negative'].copy()
737
+ for _, row in negative_df.iterrows():
738
+ ws.cell(row=row_idx, column=5, value=row['Объект'])
739
+ ws.cell(row=row_idx, column=6, value=row['Заголовок'])
740
+ ws.cell(row=row_idx, column=7, value="Риск убытка")
741
+ ws.cell(row=row_idx, column=8, value=row.get('Reasoning', '-'))
742
+ ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
743
+ row_idx += 1
744
+
745
+ # 6. Update 'Тех.приложение' sheet
746
+ if 'Тех.приложение' not in wb.sheetnames:
747
+ wb.create_sheet('Тех.приложение')
748
+ ws = wb['Тех.приложение']
749
 
750
+ tech_cols = ['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']
751
+ tech_df = df[tech_cols].copy()
 
 
752
 
753
+ for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1):
754
+ for c_idx, value in enumerate(row, start=1):
755
+ ws.cell(row=r_idx, column=c_idx, value=value)
756
+
757
+ # Save workbook
758
+ output = io.BytesIO()
759
+ wb.save(output)
760
+ output.seek(0)
761
+ return output
762
+
763
+ except Exception as e:
764
+ logger.error(f"Error creating output file: {str(e)}")
765
+ logger.error(f"DataFrame shape: {df.shape}")
766
+ logger.error(f"Available columns: {df.columns.tolist()}")
767
+ return None
768
+
769
 
770
  def create_interface():
771
+ control = ProcessControl()
772
+
773
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
774
+ # Create state for file data
775
+ current_file = gr.State(None)
776
+
777
+ gr.Markdown("# AI-анализ мониторинга новостей v.2.0 + добавка")
778
 
779
  with gr.Row():
780
  file_input = gr.File(
781
  label="Загрузите Excel файл",
782
+ file_types=[".xlsx"],
783
+ type="binary"
784
  )
785
 
786
+ with gr.Row():
787
+ with gr.Column(scale=1):
788
+ analyze_btn = gr.Button(
789
+ "▶️ Начать анализ",
790
+ variant="primary",
791
+ size="lg"
792
+ )
793
+ with gr.Column(scale=1):
794
+ stop_btn = gr.Button(
795
+ "⏹️ Остановить",
796
+ variant="stop",
797
+ size="lg"
798
+ )
799
 
800
+ with gr.Row():
801
+ status_box = gr.Textbox(
802
+ label="Статус дедупликации",
803
+ interactive=False,
804
+ value=""
805
+ )
806
+
807
+ with gr.Row():
808
+ progress = gr.Textbox(
809
+ label="Статус обработки",
810
+ interactive=False,
811
+ value="Ожидание файла..."
812
+ )
813
 
814
  with gr.Row():
815
+ stats = gr.DataFrame(
816
+ label="Результаты анализа",
817
+ interactive=False,
818
+ wrap=True
819
+ )
820
+
821
+ with gr.Row():
822
+ with gr.Column(scale=1):
823
+ sentiment_plot = gr.Plot(label="Распределение тональности")
824
+ with gr.Column(scale=1):
825
+ events_plot = gr.Plot(label="Распределение событий")
826
+
827
+ # Create a download row with file component only
828
+ with gr.Row():
829
+ file_output = gr.File(
830
+ label="Скачать результаты",
831
+ visible=True,
832
+ interactive=True
833
+ )
834
+
835
+ def stop_processing():
836
+ control.request_stop()
837
+ return "Остановка обработки..."
838
+
839
 
840
+ @spaces.GPU(duration=300)
841
+ async def process_and_download(file_bytes):
842
+ if file_bytes is None:
843
+ gr.Warning("Пожалуйста, загрузите файл")
844
+ yield (pd.DataFrame(), None, None, None, "Ожидание файла...", "")
845
+ return
846
+
847
+ detector = None
848
+ gpu_manager = GPUTaskManager(
849
+ max_retries=3,
850
+ retry_delay=30,
851
+ cleanup_callback=lambda: detector.cleanup() if detector else None
852
+ )
853
+
854
+ try:
855
+ file_obj = io.BytesIO(file_bytes)
856
+ logger.info("File loaded into BytesIO successfully")
857
+
858
+ detector = EventDetector()
859
+
860
+ # Read and deduplicate data with retry
861
+ async def read_and_dedupe():
862
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
863
+ original_count = len(df)
864
+ df = fuzzy_deduplicate(df, 'Выдержки из текста', threshold=55)
865
+ return df, original_count
866
+
867
+ df, original_count = await gpu_manager.run_with_retry(read_and_dedupe)
868
+
869
+ # Process in smaller batches with better error handling
870
+ processed_rows = []
871
+ batches = gpu_manager.batch_process(list(df.iterrows()), batch_size=3)
872
+
873
+ for batch in batches:
874
+ if control.should_stop():
875
+ break
876
+
877
+ try:
878
+ # Process batch with retry mechanism
879
+ async def process_batch():
880
+ batch_results = []
881
+ for idx, row in batch:
882
+ text = str(row.get('Выдержки из текста', '')).strip()
883
+ entity = str(row.get('Объект', '')).strip()
884
+
885
+ if text and entity:
886
+ results = detector.process_text(text, entity)
887
+ batch_results.append({
888
+ 'Объект': entity,
889
+ 'Заголовок': str(row.get('Заголовок', '')),
890
+ 'Translated': results['translated_text'],
891
+ 'Sentiment': results['sentiment'],
892
+ 'Impact': results['impact'],
893
+ 'Reasoning': results['reasoning'],
894
+ 'Event_Type': results['event_type'],
895
+ 'Event_Summary': results['event_summary'],
896
+ 'Выдержки из текста': text
897
+ })
898
+ return batch_results
899
+
900
+ batch_results = await gpu_manager.run_with_retry(process_batch)
901
+ processed_rows.extend(batch_results)
902
+
903
+ # Create intermediate results
904
+ if processed_rows:
905
+ result_df = pd.DataFrame(processed_rows)
906
+ yield (
907
+ result_df,
908
+ None, None, None,
909
+ f"Обработано {len(processed_rows)}/{len(df)} строк",
910
+ f"Удалено {original_count - len(df)} дубликатов"
911
+ )
912
+
913
+ except Exception as e:
914
+ if gpu_manager.is_gpu_error(e):
915
+ logger.warning(f"GPU error in batch processing: {str(e)}")
916
+ continue
917
+ else:
918
+ logger.error(f"Non-GPU error in batch processing: {str(e)}")
919
+
920
+ finally:
921
+ torch.cuda.empty_cache()
922
+
923
+ # Create final results
924
+ if processed_rows:
925
+ result_df = pd.DataFrame(processed_rows)
926
+ output_bytes_io = create_output_file(result_df, file_obj)
927
+ fig_sentiment, fig_events = create_visualizations(result_df)
928
+
929
+ if output_bytes_io:
930
+ temp_file = "results.xlsx"
931
+ with open(temp_file, "wb") as f:
932
+ f.write(output_bytes_io.getvalue())
933
+ yield (
934
+ result_df,
935
+ fig_sentiment,
936
+ fig_events,
937
+ temp_file,
938
+ "Обработка завершена!",
939
+ f"Удалено {original_count - len(df)} дубликатов"
940
+ )
941
+ return
942
+
943
+ yield (pd.DataFrame(), None, None, None, "Нет обработанных данных", "")
944
+
945
+ except Exception as e:
946
+ error_msg = f"Ошибка анализа: {str(e)}"
947
+ logger.error(error_msg)
948
+ yield (pd.DataFrame(), None, None, None, error_msg, "")
949
+
950
+ finally:
951
+ if detector:
952
+ detector.cleanup()
953
+
954
+ stop_btn.click(fn=stop_processing, outputs=[progress])
955
 
956
+ # Main processing - simplified outputs
957
  analyze_btn.click(
958
+ fn=process_and_download,
959
  inputs=[file_input],
960
+ outputs=[
961
+ stats,
962
+ sentiment_plot,
963
+ events_plot,
964
+ file_output,
965
+ progress,
966
+ status_box
967
+ ]
968
  )
969
+
970
  return app
971
 
972
  if __name__ == "__main__":
973
  app = create_interface()
974
+ app.launch(share=True)