pentarosarium commited on
Commit
33771c2
·
1 Parent(s): 23332bc
Files changed (1) hide show
  1. app.py +135 -19
app.py CHANGED
@@ -7,6 +7,7 @@ import plotly.graph_objects as go
7
  import logging
8
  import io
9
  from rapidfuzz import fuzz
 
10
 
11
  def fuzzy_deduplicate(df, column, threshold=55):
12
  """Deduplicate rows based on fuzzy matching of text content"""
@@ -41,21 +42,49 @@ class ProcessControl:
41
  def reset(self):
42
  self.stop_requested = False
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  class EventDetector:
45
  def __init__(self):
46
- self.model_name = "google/mt5-small"
47
- self.tokenizer = AutoTokenizer.from_pretrained(
48
- self.model_name,
49
- legacy=True
50
- )
51
- self.model = None
52
- self.finbert = None
53
- self.roberta = None
54
- self.finbert_tone = None
55
- self.last_gpu_use = 0
 
 
 
 
 
 
56
 
57
- @spaces.GPU(duration=30) # Reduced duration
58
  def initialize_models(self):
 
 
 
59
  try:
60
  current_time = time.time()
61
  if current_time - self.last_gpu_use < 2:
@@ -65,29 +94,116 @@ class EventDetector:
65
  logger.info(f"Initializing models on device: {device}")
66
 
67
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
68
- self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=device)
69
- self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=device)
70
- self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  self.last_gpu_use = time.time()
 
 
73
  return True
74
 
75
  except Exception as e:
 
76
  logger.error(f"Model initialization error: {str(e)}")
77
- return False
 
 
78
 
79
- @spaces.GPU(duration=20) # Reduced duration
 
 
 
 
 
 
 
 
 
 
 
 
80
  def detect_events(self, text, entity):
81
  if not text or not entity:
82
  return "Нет", "Invalid input"
83
 
84
  try:
 
 
 
 
85
  current_time = time.time()
86
  if current_time - self.last_gpu_use < 2:
87
  time.sleep(2)
88
-
89
- # Rest of the method remains the same...
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  self.last_gpu_use = time.time()
92
  return event_type, response
93
 
@@ -232,7 +348,7 @@ def create_interface():
232
  control = ProcessControl()
233
 
234
  with gr.Blocks(theme=gr.themes.Soft()) as app:
235
- gr.Markdown("# AI-анализ мониторинга новостей v.1.15")
236
 
237
  with gr.Row():
238
  file_input = gr.File(
 
7
  import logging
8
  import io
9
  from rapidfuzz import fuzz
10
+ import time
11
 
12
  def fuzzy_deduplicate(df, column, threshold=55):
13
  """Deduplicate rows based on fuzzy matching of text content"""
 
42
  def reset(self):
43
  self.stop_requested = False
44
 
45
+ class ProcessControl:
46
+ def __init__(self):
47
+ self.stop_requested = False
48
+ self.error = None
49
+
50
+ def request_stop(self):
51
+ self.stop_requested = True
52
+
53
+ def should_stop(self):
54
+ return self.stop_requested
55
+
56
+ def reset(self):
57
+ self.stop_requested = False
58
+ self.error = None
59
+
60
+ def set_error(self, error):
61
+ self.error = error
62
+ self.stop_requested = True
63
+
64
  class EventDetector:
65
  def __init__(self):
66
+ try:
67
+ self.model_name = "google/mt5-small"
68
+ self.tokenizer = AutoTokenizer.from_pretrained(
69
+ self.model_name,
70
+ legacy=True
71
+ )
72
+ self.model = None
73
+ self.finbert = None
74
+ self.roberta = None
75
+ self.finbert_tone = None
76
+ self.last_gpu_use = 0
77
+ self.initialized = False
78
+ logger.info("EventDetector initialized successfully")
79
+ except Exception as e:
80
+ logger.error(f"Error in EventDetector initialization: {e}")
81
+ raise
82
 
83
+ @spaces.GPU(duration=30)
84
  def initialize_models(self):
85
+ if self.initialized:
86
+ return True
87
+
88
  try:
89
  current_time = time.time()
90
  if current_time - self.last_gpu_use < 2:
 
94
  logger.info(f"Initializing models on device: {device}")
95
 
96
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
97
+
98
+ # Initialize sentiment models with proper error handling
99
+ try:
100
+ self.finbert = pipeline(
101
+ "sentiment-analysis",
102
+ model="ProsusAI/finbert",
103
+ device=device,
104
+ truncation=True,
105
+ max_length=512
106
+ )
107
+ except Exception as e:
108
+ logger.error(f"Error initializing finbert: {e}")
109
+ raise
110
+
111
+ try:
112
+ self.roberta = pipeline(
113
+ "sentiment-analysis",
114
+ model="cardiffnlp/twitter-roberta-base-sentiment",
115
+ device=device,
116
+ truncation=True,
117
+ max_length=512
118
+ )
119
+ except Exception as e:
120
+ logger.error(f"Error initializing roberta: {e}")
121
+ raise
122
+
123
+ try:
124
+ self.finbert_tone = pipeline(
125
+ "sentiment-analysis",
126
+ model="yiyanghkust/finbert-tone",
127
+ device=device,
128
+ truncation=True,
129
+ max_length=512
130
+ )
131
+ except Exception as e:
132
+ logger.error(f"Error initializing finbert_tone: {e}")
133
+ raise
134
 
135
  self.last_gpu_use = time.time()
136
+ self.initialized = True
137
+ logger.info("All models initialized successfully")
138
  return True
139
 
140
  except Exception as e:
141
+ self.initialized = False
142
  logger.error(f"Model initialization error: {str(e)}")
143
+ # Clean up any partially initialized models
144
+ self.cleanup()
145
+ raise
146
 
147
+ def cleanup(self):
148
+ """Clean up GPU resources"""
149
+ try:
150
+ self.model = None
151
+ self.finbert = None
152
+ self.roberta = None
153
+ self.finbert_tone = None
154
+ torch.cuda.empty_cache()
155
+ self.initialized = False
156
+ except Exception as e:
157
+ logger.error(f"Error in cleanup: {e}")
158
+
159
+ @spaces.GPU(duration=20)
160
  def detect_events(self, text, entity):
161
  if not text or not entity:
162
  return "Нет", "Invalid input"
163
 
164
  try:
165
+ if not self.initialized:
166
+ if not self.initialize_models():
167
+ return "Нет", "Model initialization failed"
168
+
169
  current_time = time.time()
170
  if current_time - self.last_gpu_use < 2:
171
  time.sleep(2)
 
 
172
 
173
+ text = text[:500] # Truncate text
174
+
175
+ prompt = f"""<s>Analyze the following news about {entity}:
176
+ Text: {text}
177
+ Task: Identify the main event type and provide a brief summary.</s>"""
178
+
179
+ device = self.model.device
180
+ inputs = self.tokenizer(
181
+ prompt,
182
+ return_tensors="pt",
183
+ padding=True,
184
+ truncation=True,
185
+ max_length=512
186
+ ).to(device)
187
+
188
+ outputs = self.model.generate(
189
+ **inputs,
190
+ max_length=300,
191
+ num_return_sequences=1,
192
+ pad_token_id=self.tokenizer.pad_token_id,
193
+ eos_token_id=self.tokenizer.eos_token_id
194
+ )
195
+
196
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
197
+
198
+ # Event classification
199
+ event_type = "Нет"
200
+ if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
201
+ event_type = "Отчетность"
202
+ elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
203
+ event_type = "РЦБ"
204
+ elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
205
+ event_type = "Суд"
206
+
207
  self.last_gpu_use = time.time()
208
  return event_type, response
209
 
 
348
  control = ProcessControl()
349
 
350
  with gr.Blocks(theme=gr.themes.Soft()) as app:
351
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.16")
352
 
353
  with gr.Row():
354
  file_input = gr.File(