Spaces:
Running
Running
Commit
·
d7392b8
1
Parent(s):
87fd9e7
progress more 33+
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ from tqdm.auto import tqdm
|
|
11 |
import time
|
12 |
import torch
|
13 |
from openpyxl import load_workbook
|
|
|
|
|
14 |
|
15 |
# Initialize pymystem3 for lemmatization
|
16 |
mystem = Mystem()
|
@@ -23,6 +25,13 @@ finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
|
|
23 |
rubert1 = pipeline("sentiment-analysis", model = "DeepPavlov/rubert-base-cased")
|
24 |
rubert2 = pipeline("sentiment-analysis", model = "blanchefort/rubert-base-cased-sentiment")
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Function for lemmatizing Russian text
|
28 |
def lemmatize_text(text):
|
@@ -163,66 +172,73 @@ def process_file(uploaded_file):
|
|
163 |
|
164 |
return df
|
165 |
|
166 |
-
def create_output_file(df):
|
167 |
-
#
|
168 |
-
|
169 |
|
170 |
-
#
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
summary_df = pd.DataFrame(summary_data, columns=['Объект', 'Всего новостей', 'Отрицательные', 'Положительные'])
|
191 |
-
summary_df = summary_df.sort_values('Отрицательные', ascending=False)
|
192 |
-
|
193 |
-
# Write 'Сводка' sheet
|
194 |
-
summary_df.to_excel(writer, sheet_name='Сводка', startrow=3, startcol=4, index=False, header=False)
|
195 |
-
|
196 |
-
# Process data for 'Значимые' and 'Анализ' sheets
|
197 |
-
significant_data = []
|
198 |
-
analysis_data = []
|
199 |
-
for _, row in df.iterrows():
|
200 |
-
if any(row[model] in ['Negative', 'Positive'] for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
|
201 |
-
sentiment = 'Negative' if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']) else 'Positive'
|
202 |
-
significant_data.append([row['Объект'], sentiment, row['Заголовок'], row['Выдержки из текста']])
|
203 |
-
|
204 |
-
if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
|
205 |
-
analysis_data.append([row['Объект'], 'РИСК УБЫТКА', row['Заголовок'], row['Выдержки из текста']])
|
206 |
-
|
207 |
-
# Write 'Значимые' sheet
|
208 |
-
significant_df = pd.DataFrame(significant_data, columns=['Объект', 'Окраска', 'Заголовок', 'Текст'])
|
209 |
-
significant_df.to_excel(writer, sheet_name='Значимые', startrow=2, startcol=2, index=False)
|
210 |
-
|
211 |
-
# Write 'Анализ' sheet
|
212 |
-
analysis_df = pd.DataFrame(analysis_data, columns=['Объект', 'Тип риска', 'Заголовок', 'Текст'])
|
213 |
-
analysis_df.to_excel(writer, sheet_name='Анализ', startrow=3, startcol=4, index=False)
|
214 |
-
|
215 |
-
# Copy 'Публикации' sheet from original file
|
216 |
-
df.to_excel(writer, sheet_name='Публикации', index=False)
|
217 |
-
|
218 |
-
# Add 'Тех.приложение' sheet
|
219 |
-
df.to_excel(writer, sheet_name='Тех.приложение', index=False)
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
output.seek(0)
|
|
|
222 |
return output
|
223 |
|
224 |
def main():
|
225 |
-
st.title("... приступим к анализу... версия
|
226 |
|
227 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
228 |
|
@@ -247,7 +263,11 @@ def main():
|
|
247 |
|
248 |
plt.tight_layout()
|
249 |
st.pyplot(fig)
|
250 |
-
|
|
|
|
|
|
|
|
|
251 |
# Offer download of results
|
252 |
output = create_output_file(df)
|
253 |
st.download_button(
|
|
|
11 |
import time
|
12 |
import torch
|
13 |
from openpyxl import load_workbook
|
14 |
+
from openpyxl import Workbook
|
15 |
+
from openpyxl.utils.dataframe import dataframe_to_rows
|
16 |
|
17 |
# Initialize pymystem3 for lemmatization
|
18 |
mystem = Mystem()
|
|
|
25 |
rubert1 = pipeline("sentiment-analysis", model = "DeepPavlov/rubert-base-cased")
|
26 |
rubert2 = pipeline("sentiment-analysis", model = "blanchefort/rubert-base-cased-sentiment")
|
27 |
|
28 |
+
def create_analysis_data(df):
|
29 |
+
analysis_data = []
|
30 |
+
for _, row in df.iterrows():
|
31 |
+
if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
|
32 |
+
analysis_data.append([row['Объект'], 'РИСК УБЫТКА', row['Заголовок'], row['Выдержки из текста']])
|
33 |
+
return pd.DataFrame(analysis_data, columns=['Объект', 'Тип риска', 'Заголовок', 'Текст'])
|
34 |
+
|
35 |
|
36 |
# Function for lemmatizing Russian text
|
37 |
def lemmatize_text(text):
|
|
|
172 |
|
173 |
return df
|
174 |
|
175 |
+
def create_output_file(df, uploaded_file, analysis_df):
|
176 |
+
# Create a new workbook
|
177 |
+
wb = Workbook()
|
178 |
|
179 |
+
# Remove the default sheet created by openpyxl
|
180 |
+
wb.remove(wb.active)
|
181 |
+
|
182 |
+
# Process data for 'Сводка' sheet
|
183 |
+
entities = df['Объект'].unique()
|
184 |
+
summary_data = []
|
185 |
+
for entity in entities:
|
186 |
+
entity_df = df[df['Объект'] == entity]
|
187 |
+
total_news = len(entity_df)
|
188 |
+
negative_news = sum((entity_df['FinBERT'] == 'Negative') |
|
189 |
+
(entity_df['RoBERTa'] == 'Negative') |
|
190 |
+
(entity_df['FinBERT-Tone'] == 'Negative'))
|
191 |
+
positive_news = sum((entity_df['FinBERT'] == 'Positive') |
|
192 |
+
(entity_df['RoBERTa'] == 'Positive') |
|
193 |
+
(entity_df['FinBERT-Tone'] == 'Positive'))
|
194 |
+
summary_data.append([entity, total_news, negative_news, positive_news])
|
195 |
+
|
196 |
+
summary_df = pd.DataFrame(summary_data, columns=['Объект', 'Всего новостей', 'Отрицательные', 'Положительные'])
|
197 |
+
summary_df = summary_df.sort_values('Отрицательные', ascending=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
+
# Write 'Сводка' sheet
|
200 |
+
ws = wb.create_sheet('Сводка')
|
201 |
+
for r in dataframe_to_rows(summary_df, index=False, header=False):
|
202 |
+
ws.append(r)
|
203 |
+
|
204 |
+
# Process data for 'Значимые' sheet
|
205 |
+
significant_data = []
|
206 |
+
for _, row in df.iterrows():
|
207 |
+
if any(row[model] in ['Negative', 'Positive'] for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']):
|
208 |
+
sentiment = 'Negative' if any(row[model] == 'Negative' for model in ['FinBERT', 'RoBERTa', 'FinBERT-Tone']) else 'Positive'
|
209 |
+
significant_data.append([row['Объект'], sentiment, row['Заголовок'], row['Выдержки из текста']])
|
210 |
+
|
211 |
+
# Write 'Значимые' sheet
|
212 |
+
significant_df = pd.DataFrame(significant_data, columns=['Объект', 'Окраска', 'Заголовок', 'Текст'])
|
213 |
+
ws = wb.create_sheet('Значимые')
|
214 |
+
for r in dataframe_to_rows(significant_df, index=False, header=True):
|
215 |
+
ws.append(r)
|
216 |
+
|
217 |
+
# Write 'Анализ' sheet
|
218 |
+
ws = wb.create_sheet('Анализ')
|
219 |
+
for r in dataframe_to_rows(analysis_df, index=False, header=True):
|
220 |
+
ws.append(r)
|
221 |
+
|
222 |
+
# Copy 'Публикации' sheet from original uploaded file
|
223 |
+
original_df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
224 |
+
ws = wb.create_sheet('Публикации')
|
225 |
+
for r in dataframe_to_rows(original_df, index=False, header=True):
|
226 |
+
ws.append(r)
|
227 |
+
|
228 |
+
# Add 'Тех.приложение' sheet with processed data
|
229 |
+
ws = wb.create_sheet('Тех.приложение')
|
230 |
+
for r in dataframe_to_rows(df, index=False, header=True):
|
231 |
+
ws.append(r)
|
232 |
+
|
233 |
+
# Save the workbook to a BytesIO object
|
234 |
+
output = io.BytesIO()
|
235 |
+
wb.save(output)
|
236 |
output.seek(0)
|
237 |
+
|
238 |
return output
|
239 |
|
240 |
def main():
|
241 |
+
st.title("... приступим к анализу... версия 33+")
|
242 |
|
243 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
244 |
|
|
|
263 |
|
264 |
plt.tight_layout()
|
265 |
st.pyplot(fig)
|
266 |
+
analysis_df = create_analysis_data(df)
|
267 |
+
st.subheader("Анализ")
|
268 |
+
st.dataframe(analysis_df)
|
269 |
+
|
270 |
+
|
271 |
# Offer download of results
|
272 |
output = create_output_file(df)
|
273 |
st.download_button(
|