pentarosarium commited on
Commit
f0111d1
·
1 Parent(s): ce3b970
Files changed (1) hide show
  1. app.py +42 -50
app.py CHANGED
@@ -12,36 +12,49 @@ logger = logging.getLogger(__name__)
12
 
13
  class EventDetector:
14
  def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
15
  try:
16
- logger.info(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
17
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
18
-
19
- self.model_name = "google/mt5-small"
20
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
21
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
22
 
23
- self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
24
- self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
25
- self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
26
-
27
- logger.info("Models initialized successfully")
28
 
 
29
  except Exception as e:
30
- logger.error(f"Model initialization error: {e}")
31
- raise
32
 
33
- @spaces.GPU(duration=120)
34
  def detect_events(self, text, entity):
35
  if not text or not entity:
36
  return "Нет", "Invalid input"
37
 
38
  try:
 
 
 
 
 
 
39
  prompt = f"""<s>Analyze the following news about {entity}:
40
  Text: {text}
41
  Task: Identify the main event type and provide a brief summary.</s>"""
42
 
43
  inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
44
- truncation=True, max_length=512).to(self.device)
45
 
46
  outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
47
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -57,12 +70,17 @@ class EventDetector:
57
  return event_type, response
58
 
59
  except Exception as e:
60
- logger.error(f"Event detection error: {e}")
61
  return "Нет", f"Error: {str(e)}"
62
 
63
- @spaces.GPU(duration=60)
64
  def analyze_sentiment(self, text):
65
  try:
 
 
 
 
 
66
  results = []
67
  texts = [text[:512]] # Truncate to avoid token length issues
68
 
@@ -116,44 +134,27 @@ def create_visualizations(df):
116
  logger.error(f"Visualization error: {e}")
117
  return None, None
118
 
 
119
  def process_file(file_obj):
120
  try:
121
- # Debug print
122
  logger.info("Starting to read Excel file...")
 
 
123
 
124
- # Read Excel with error details
125
- try:
126
- df = pd.read_excel(file_obj, sheet_name='Публикации')
127
- logger.info(f"Successfully read Excel file. Shape: {df.shape}")
128
- logger.info(f"Columns: {df.columns.tolist()}")
129
- except Exception as e:
130
- logger.error(f"Failed to read Excel file: {str(e)}")
131
- raise
132
-
133
  detector = EventDetector()
134
  processed_rows = []
135
  total = len(df)
136
- current_status = "0%"
137
-
138
- # Create progress counter
139
- progress_text = gr.Textbox.update(
140
- value=f"Обработано 0 из {total} строк (0%)"
141
- )
142
 
143
  for idx, row in df.iterrows():
144
  try:
145
- # Get text and entity with validation
146
  text = str(row.get('Выдержки из текста', ''))
147
  if not text.strip():
148
- logger.warning(f"Empty text at row {idx}")
149
  continue
150
 
151
  entity = str(row.get('Объект', ''))
152
  if not entity.strip():
153
- logger.warning(f"Empty entity at row {idx}")
154
  continue
155
 
156
- # Process the row
157
  event_type, event_summary = detector.detect_events(text, entity)
158
  sentiment = detector.analyze_sentiment(text)
159
 
@@ -166,25 +167,16 @@ def process_file(file_obj):
166
  'Текст': text
167
  })
168
 
169
- # Update progress every 5 rows
170
- if idx % 5 == 0 or idx == total - 1:
171
- percentage = round((idx + 1) / total * 100)
172
- if percentage != current_status:
173
- current_status = percentage
174
- logger.info(f"Processed {idx + 1}/{total} rows ({percentage}%)")
175
-
176
  except Exception as e:
177
  logger.error(f"Error processing row {idx}: {str(e)}")
178
  continue
179
 
180
- # Create final DataFrame
181
  result_df = pd.DataFrame(processed_rows)
182
  logger.info(f"Processing complete. Final DataFrame shape: {result_df.shape}")
183
 
184
- if result_df.empty:
185
- logger.error("No rows were processed successfully")
186
- raise ValueError("No data was processed successfully")
187
-
188
  return result_df
189
 
190
  except Exception as e:
@@ -193,7 +185,7 @@ def process_file(file_obj):
193
 
194
  def create_interface():
195
  with gr.Blocks(theme=gr.themes.Soft()) as app:
196
- gr.Markdown("# AI-анализ мониторинга новостей v.1.09")
197
 
198
  with gr.Row():
199
  file_input = gr.File(
 
12
 
13
  class EventDetector:
14
  def __init__(self):
15
+ self.model_name = "google/mt5-small"
16
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
17
+ # Don't initialize models in __init__
18
+ self.model = None
19
+ self.finbert = None
20
+ self.roberta = None
21
+ self.finbert_tone = None
22
+
23
+ @spaces.GPU
24
+ def initialize_models(self):
25
+ """Initialize all models with GPU support"""
26
  try:
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ logger.info(f"Initializing models on device: {device}")
 
 
 
 
29
 
30
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
31
+ self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=device)
32
+ self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=device)
33
+ self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=device)
 
34
 
35
+ return True
36
  except Exception as e:
37
+ logger.error(f"Model initialization error: {str(e)}")
38
+ return False
39
 
40
+ @spaces.GPU
41
  def detect_events(self, text, entity):
42
  if not text or not entity:
43
  return "Нет", "Invalid input"
44
 
45
  try:
46
+ # Initialize models if needed
47
+ if self.model is None:
48
+ if not self.initialize_models():
49
+ return "Нет", "Model initialization failed"
50
+
51
+ device = "cuda" if torch.cuda.is_available() else "cpu"
52
  prompt = f"""<s>Analyze the following news about {entity}:
53
  Text: {text}
54
  Task: Identify the main event type and provide a brief summary.</s>"""
55
 
56
  inputs = self.tokenizer(prompt, return_tensors="pt", padding=True,
57
+ truncation=True, max_length=512).to(device)
58
 
59
  outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
60
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
70
  return event_type, response
71
 
72
  except Exception as e:
73
+ logger.error(f"Event detection error: {str(e)}")
74
  return "Нет", f"Error: {str(e)}"
75
 
76
+ @spaces.GPU
77
  def analyze_sentiment(self, text):
78
  try:
79
+ # Initialize models if needed
80
+ if self.finbert is None:
81
+ if not self.initialize_models():
82
+ return "Neutral"
83
+
84
  results = []
85
  texts = [text[:512]] # Truncate to avoid token length issues
86
 
 
134
  logger.error(f"Visualization error: {e}")
135
  return None, None
136
 
137
+ @spaces.GPU
138
  def process_file(file_obj):
139
  try:
 
140
  logger.info("Starting to read Excel file...")
141
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
142
+ logger.info(f"Successfully read Excel file. Shape: {df.shape}")
143
 
 
 
 
 
 
 
 
 
 
144
  detector = EventDetector()
145
  processed_rows = []
146
  total = len(df)
 
 
 
 
 
 
147
 
148
  for idx, row in df.iterrows():
149
  try:
 
150
  text = str(row.get('Выдержки из текста', ''))
151
  if not text.strip():
 
152
  continue
153
 
154
  entity = str(row.get('Объект', ''))
155
  if not entity.strip():
 
156
  continue
157
 
 
158
  event_type, event_summary = detector.detect_events(text, entity)
159
  sentiment = detector.analyze_sentiment(text)
160
 
 
167
  'Текст': text
168
  })
169
 
170
+ if idx % 5 == 0:
171
+ logger.info(f"Processed {idx + 1}/{total} rows")
172
+
 
 
 
 
173
  except Exception as e:
174
  logger.error(f"Error processing row {idx}: {str(e)}")
175
  continue
176
 
 
177
  result_df = pd.DataFrame(processed_rows)
178
  logger.info(f"Processing complete. Final DataFrame shape: {result_df.shape}")
179
 
 
 
 
 
180
  return result_df
181
 
182
  except Exception as e:
 
185
 
186
  def create_interface():
187
  with gr.Blocks(theme=gr.themes.Soft()) as app:
188
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.10")
189
 
190
  with gr.Row():
191
  file_input = gr.File(