pentarosarium commited on
Commit
ce3b970
·
1 Parent(s): 0c8faca
Files changed (1) hide show
  1. app.py +100 -44
app.py CHANGED
@@ -116,9 +116,84 @@ def create_visualizations(df):
116
  logger.error(f"Visualization error: {e}")
117
  return None, None
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def create_interface():
120
  with gr.Blocks(theme=gr.themes.Soft()) as app:
121
- gr.Markdown("# AI-анализ мониторинга новостей v.1.08+")
122
 
123
  with gr.Row():
124
  file_input = gr.File(
@@ -135,7 +210,7 @@ def create_interface():
135
 
136
  with gr.Row():
137
  progress = gr.Textbox(
138
- label="Статус",
139
  interactive=False,
140
  value="Ожидание файла..."
141
  )
@@ -157,21 +232,38 @@ def create_interface():
157
  if file_bytes is None:
158
  gr.Warning("Пожалуйста, загрузите файл")
159
  return None, None, None, "Ожидание файла..."
 
160
  try:
161
- # Convert bytes to BytesIO for pandas to read
162
  file_obj = io.BytesIO(file_bytes)
 
 
 
 
 
 
163
  df = process_file(file_obj)
164
 
165
  if df.empty:
166
  return None, None, None, "Нет данных для обработки"
167
-
 
 
 
168
  fig_sentiment, fig_events = create_visualizations(df)
169
- return df, fig_sentiment, fig_events, "Обработка завершена успешно!"
 
 
 
 
 
 
170
 
171
  except Exception as e:
172
- logger.error(f"Analysis error: {e}")
173
- gr.Error(f"Ошибка анализа: {str(e)}")
174
- return None, None, None, f"Ошибка: {str(e)}"
 
175
 
176
  analyze_btn.click(
177
  fn=analyze,
@@ -181,42 +273,6 @@ def create_interface():
181
 
182
  return app
183
 
184
- def process_file(file_obj):
185
- try:
186
- # Read Excel directly from BytesIO object
187
- df = pd.read_excel(file_obj, sheet_name='Публикации')
188
- detector = EventDetector()
189
- processed_rows = []
190
- total = len(df)
191
-
192
- for idx, row in df.iterrows():
193
- text = str(row.get('Выдержки из текста', ''))
194
- entity = str(row.get('Объект', ''))
195
-
196
- event_type, event_summary = detector.detect_events(text, entity)
197
- sentiment = detector.analyze_sentiment(text)
198
-
199
- processed_rows.append({
200
- 'Объект': entity,
201
- 'Заголовок': str(row.get('Заголовок', '')),
202
- 'Sentiment': sentiment,
203
- 'Event_Type': event_type,
204
- 'Event_Summary': event_summary,
205
- 'Текст': text
206
- })
207
-
208
- if idx % 10 == 0:
209
- logger.info(f"Processed {idx}/{total} rows")
210
-
211
- result_df = pd.DataFrame(processed_rows)
212
- logger.info("File processing complete!")
213
- return result_df
214
-
215
- except Exception as e:
216
- logger.error(f"File processing error: {e}")
217
- gr.Error(f"Error processing file: {str(e)}")
218
- return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])
219
-
220
  if __name__ == "__main__":
221
  app = create_interface()
222
  app.launch(share=True)
 
116
  logger.error(f"Visualization error: {e}")
117
  return None, None
118
 
119
+ def process_file(file_obj):
120
+ try:
121
+ # Debug print
122
+ logger.info("Starting to read Excel file...")
123
+
124
+ # Read Excel with error details
125
+ try:
126
+ df = pd.read_excel(file_obj, sheet_name='Публикации')
127
+ logger.info(f"Successfully read Excel file. Shape: {df.shape}")
128
+ logger.info(f"Columns: {df.columns.tolist()}")
129
+ except Exception as e:
130
+ logger.error(f"Failed to read Excel file: {str(e)}")
131
+ raise
132
+
133
+ detector = EventDetector()
134
+ processed_rows = []
135
+ total = len(df)
136
+ current_status = "0%"
137
+
138
+ # Create progress counter
139
+ progress_text = gr.Textbox.update(
140
+ value=f"Обработано 0 из {total} строк (0%)"
141
+ )
142
+
143
+ for idx, row in df.iterrows():
144
+ try:
145
+ # Get text and entity with validation
146
+ text = str(row.get('Выдержки из текста', ''))
147
+ if not text.strip():
148
+ logger.warning(f"Empty text at row {idx}")
149
+ continue
150
+
151
+ entity = str(row.get('Объект', ''))
152
+ if not entity.strip():
153
+ logger.warning(f"Empty entity at row {idx}")
154
+ continue
155
+
156
+ # Process the row
157
+ event_type, event_summary = detector.detect_events(text, entity)
158
+ sentiment = detector.analyze_sentiment(text)
159
+
160
+ processed_rows.append({
161
+ 'Объект': entity,
162
+ 'Заголовок': str(row.get('Заголовок', '')),
163
+ 'Sentiment': sentiment,
164
+ 'Event_Type': event_type,
165
+ 'Event_Summary': event_summary,
166
+ 'Текст': text
167
+ })
168
+
169
+ # Update progress every 5 rows
170
+ if idx % 5 == 0 or idx == total - 1:
171
+ percentage = round((idx + 1) / total * 100)
172
+ if percentage != current_status:
173
+ current_status = percentage
174
+ logger.info(f"Processed {idx + 1}/{total} rows ({percentage}%)")
175
+
176
+ except Exception as e:
177
+ logger.error(f"Error processing row {idx}: {str(e)}")
178
+ continue
179
+
180
+ # Create final DataFrame
181
+ result_df = pd.DataFrame(processed_rows)
182
+ logger.info(f"Processing complete. Final DataFrame shape: {result_df.shape}")
183
+
184
+ if result_df.empty:
185
+ logger.error("No rows were processed successfully")
186
+ raise ValueError("No data was processed successfully")
187
+
188
+ return result_df
189
+
190
+ except Exception as e:
191
+ logger.error(f"File processing error: {str(e)}")
192
+ raise
193
+
194
  def create_interface():
195
  with gr.Blocks(theme=gr.themes.Soft()) as app:
196
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.09")
197
 
198
  with gr.Row():
199
  file_input = gr.File(
 
210
 
211
  with gr.Row():
212
  progress = gr.Textbox(
213
+ label="Статус обработки",
214
  interactive=False,
215
  value="Ожидание файла..."
216
  )
 
232
  if file_bytes is None:
233
  gr.Warning("Пожалуйста, загрузите файл")
234
  return None, None, None, "Ожидание файла..."
235
+
236
  try:
237
+ # Create BytesIO object and debug print its content
238
  file_obj = io.BytesIO(file_bytes)
239
+ logger.info("File loaded into BytesIO successfully")
240
+
241
+ # Process file with progress updates
242
+ progress_status = "Начинаем обработку файла..."
243
+ yield None, None, None, progress_status
244
+
245
  df = process_file(file_obj)
246
 
247
  if df.empty:
248
  return None, None, None, "Нет данных для обработки"
249
+
250
+ progress_status = f"Создание визуализаций..."
251
+ yield None, None, None, progress_status
252
+
253
  fig_sentiment, fig_events = create_visualizations(df)
254
+
255
+ return (
256
+ df,
257
+ fig_sentiment,
258
+ fig_events,
259
+ f"Обработка завершена успешно! Обработано {len(df)} строк"
260
+ )
261
 
262
  except Exception as e:
263
+ error_msg = f"Ошибка анализа: {str(e)}"
264
+ logger.error(error_msg)
265
+ gr.Error(error_msg)
266
+ return None, None, None, error_msg
267
 
268
  analyze_btn.click(
269
  fn=analyze,
 
273
 
274
  return app
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  if __name__ == "__main__":
277
  app = create_interface()
278
  app.launch(share=True)