pentarosarium commited on
Commit
d7392b8
·
1 Parent(s): 87fd9e7

progress more 33+

Browse files
Files changed (1) hide show
  1. app.py +75 -55
app.py CHANGED
@@ -11,6 +11,8 @@ from tqdm.auto import tqdm
11
  import time
12
  import torch
13
  from openpyxl import load_workbook
 
 
14
 
15
  # Initialize pymystem3 for lemmatization
16
  mystem = Mystem()
@@ -23,6 +25,13 @@ finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
23
  rubert1 = pipeline("sentiment-analysis", model = "DeepPavlov/rubert-base-cased")
24
  rubert2 = pipeline("sentiment-analysis", model = "blanchefort/rubert-base-cased-sentiment")
25
 
 
 
 
 
 
 
 
26
 
27
  # Function for lemmatizing Russian text
28
  def lemmatize_text(text):
@@ -163,66 +172,73 @@ def process_file(uploaded_file):
163
 
164
  return df
165
 
166
- def create_output_file(df):
167
- # Load the sample file to copy its structure
168
- sample_wb = load_workbook("sample_file.xlsx")
169
 
170
- # Create a new Excel writer object
171
- output = io.BytesIO()
172
- with pd.ExcelWriter(output, engine='openpyxl') as writer:
173
- writer.book = sample_wb
174
- writer.sheets = {ws.title: ws for ws in sample_wb.worksheets}
175
-
176
- # Process data for 'Сводка' sheet
177
- entities = df['Объект'].unique()
178
- summary_data = []
179
- for entity in entities:
180
- entity_df = df[df['Объект'] == entity]
181
- total_news = len(entity_df)
182
- negative_news = sum((entity_df['FinBERT'] == 'Negative') |
183
- (entity_df['RoBERTa'] == 'Negative') |
184
- (entity_df['FinBERT-Tone'] == 'Negative'))
185
- positive_news = sum((entity_df['FinBERT'] == 'Positive') |
186
- (entity_df['RoBERTa'] == 'Positive') |
187
- (entity_df['FinBERT-Tone'] == 'Positive'))
188
- summary_data.append([entity, total_news, negative_news, positive_news])
189
-
190
- summary_df = pd.DataFrame(summary_data, columns=['Объект', 'Всего новостей', 'Отрицательные', 'Положительные'])
191
- summary_df = summary_df.sort_values('Отрицательные', ascending=False)
192
-
193
- # Write 'Сводка' sheet
194
- summary_df.to_excel(writer, sheet_name='Сводка', startrow=3, startcol=4, index=False, header=False)
195
-
196
- # Process data for 'Значимые' and 'Анализ' sheets
197
- significant_data = []
198
- analysis_data = []
199
- for _, row in df.iterrows():
200
- if any(row[model] in ['Negative', 'Positive'] for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
201
- sentiment = 'Negative' if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']) else 'Positive'
202
- significant_data.append([row['Объект'], sentiment, row['Заголовок'], row['Выдержки из текста']])
203
-
204
- if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
205
- analysis_data.append([row['Объект'], 'РИСК УБЫТКА', row['Заголовок'], row['Выдержки из текста']])
206
-
207
- # Write 'Значимые' sheet
208
- significant_df = pd.DataFrame(significant_data, columns=['Объект', 'Окраска', 'Заголовок', 'Текст'])
209
- significant_df.to_excel(writer, sheet_name='Значимые', startrow=2, startcol=2, index=False)
210
-
211
- # Write 'Анализ' sheet
212
- analysis_df = pd.DataFrame(analysis_data, columns=['Объект', 'Тип риска', 'Заголовок', 'Текст'])
213
- analysis_df.to_excel(writer, sheet_name='Анализ', startrow=3, startcol=4, index=False)
214
-
215
- # Copy 'Публикации' sheet from original file
216
- df.to_excel(writer, sheet_name='Публикации', index=False)
217
-
218
- # Add 'Тех.приложение' sheet
219
- df.to_excel(writer, sheet_name='Тех.приложение', index=False)
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  output.seek(0)
 
222
  return output
223
 
224
  def main():
225
- st.title("... приступим к анализу... версия 32+")
226
 
227
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
228
 
@@ -247,7 +263,11 @@ def main():
247
 
248
  plt.tight_layout()
249
  st.pyplot(fig)
250
-
 
 
 
 
251
  # Offer download of results
252
  output = create_output_file(df)
253
  st.download_button(
 
11
  import time
12
  import torch
13
  from openpyxl import load_workbook
14
+ from openpyxl import Workbook
15
+ from openpyxl.utils.dataframe import dataframe_to_rows
16
 
17
  # Initialize pymystem3 for lemmatization
18
  mystem = Mystem()
 
25
  rubert1 = pipeline("sentiment-analysis", model = "DeepPavlov/rubert-base-cased")
26
  rubert2 = pipeline("sentiment-analysis", model = "blanchefort/rubert-base-cased-sentiment")
27
 
28
+ def create_analysis_data(df):
29
+ analysis_data = []
30
+ for _, row in df.iterrows():
31
+ if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
32
+ analysis_data.append([row['Объект'], 'РИСК УБЫТКА', row['Заголовок'], row['Выдержки из текста']])
33
+ return pd.DataFrame(analysis_data, columns=['Объект', 'Тип риска', 'Заголовок', 'Текст'])
34
+
35
 
36
  # Function for lemmatizing Russian text
37
  def lemmatize_text(text):
 
172
 
173
  return df
174
 
175
+ def create_output_file(df, uploaded_file, analysis_df):
176
+ # Create a new workbook
177
+ wb = Workbook()
178
 
179
+ # Remove the default sheet created by openpyxl
180
+ wb.remove(wb.active)
181
+
182
+ # Process data for 'Сводка' sheet
183
+ entities = df['Объект'].unique()
184
+ summary_data = []
185
+ for entity in entities:
186
+ entity_df = df[df['Объект'] == entity]
187
+ total_news = len(entity_df)
188
+ negative_news = sum((entity_df['FinBERT'] == 'Negative') |
189
+ (entity_df['RoBERTa'] == 'Negative') |
190
+ (entity_df['FinBERT-Tone'] == 'Negative'))
191
+ positive_news = sum((entity_df['FinBERT'] == 'Positive') |
192
+ (entity_df['RoBERTa'] == 'Positive') |
193
+ (entity_df['FinBERT-Tone'] == 'Positive'))
194
+ summary_data.append([entity, total_news, negative_news, positive_news])
195
+
196
+ summary_df = pd.DataFrame(summary_data, columns=['Объект', 'Всего новостей', 'Отрицательные', 'Положительные'])
197
+ summary_df = summary_df.sort_values('Отрицательные', ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Write 'Сводка' sheet
200
+ ws = wb.create_sheet('Сводка')
201
+ for r in dataframe_to_rows(summary_df, index=False, header=False):
202
+ ws.append(r)
203
+
204
+ # Process data for 'Значимые' sheet
205
+ significant_data = []
206
+ for _, row in df.iterrows():
207
+ if any(row[model] in ['Negative', 'Positive'] for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
208
+ sentiment = 'Negative' if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']) else 'Positive'
209
+ significant_data.append([row['Объект'], sentiment, row['Заголовок'], row['Выдержки из текста']])
210
+
211
+ # Write 'Значимые' sheet
212
+ significant_df = pd.DataFrame(significant_data, columns=['Объект', 'Окраска', 'Заголовок', 'Текст'])
213
+ ws = wb.create_sheet('Значимые')
214
+ for r in dataframe_to_rows(significant_df, index=False, header=True):
215
+ ws.append(r)
216
+
217
+ # Write 'Анализ' sheet
218
+ ws = wb.create_sheet('Анализ')
219
+ for r in dataframe_to_rows(analysis_df, index=False, header=True):
220
+ ws.append(r)
221
+
222
+ # Copy 'Публикации' sheet from original uploaded file
223
+ original_df = pd.read_excel(uploaded_file, sheet_name='Публикации')
224
+ ws = wb.create_sheet('Публикации')
225
+ for r in dataframe_to_rows(original_df, index=False, header=True):
226
+ ws.append(r)
227
+
228
+ # Add 'Тех.приложение' sheet with processed data
229
+ ws = wb.create_sheet('Тех.приложение')
230
+ for r in dataframe_to_rows(df, index=False, header=True):
231
+ ws.append(r)
232
+
233
+ # Save the workbook to a BytesIO object
234
+ output = io.BytesIO()
235
+ wb.save(output)
236
  output.seek(0)
237
+
238
  return output
239
 
240
  def main():
241
+ st.title("... приступим к анализу... версия 33+")
242
 
243
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
244
 
 
263
 
264
  plt.tight_layout()
265
  st.pyplot(fig)
266
+ analysis_df = create_analysis_data(df)
267
+ st.subheader("Анализ")
268
+ st.dataframe(analysis_df)
269
+
270
+
271
  # Offer download of results
272
  output = create_output_file(df)
273
  st.download_button(