Spaces:
Running
Running
Commit
·
55dd648
1
Parent(s):
b4b8d2a
progress more 21
Browse files
app.py
CHANGED
@@ -16,10 +16,11 @@ import torch
|
|
16 |
mystem = Mystem()
|
17 |
|
18 |
# Set up the sentiment analyzers
|
19 |
-
|
20 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
21 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
22 |
finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
|
|
|
23 |
|
24 |
# Function for lemmatizing Russian text
|
25 |
def lemmatize_text(text):
|
@@ -59,16 +60,6 @@ def translate(text):
|
|
59 |
return translated_text
|
60 |
|
61 |
|
62 |
-
|
63 |
-
# Function for VADER sentiment analysis with label mapping
|
64 |
-
def get_vader_sentiment(text):
|
65 |
-
score = vader_analyzer.polarity_scores(text)["compound"]
|
66 |
-
if score > 0.2:
|
67 |
-
return "Positive"
|
68 |
-
elif score < -0.2:
|
69 |
-
return "Negative"
|
70 |
-
return "Neutral"
|
71 |
-
|
72 |
# Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
|
73 |
def get_mapped_sentiment(result):
|
74 |
label = result['label'].lower()
|
@@ -78,6 +69,11 @@ def get_mapped_sentiment(result):
|
|
78 |
return "Negative"
|
79 |
return "Neutral"
|
80 |
|
|
|
|
|
|
|
|
|
|
|
81 |
def get_finbert_sentiment(text):
|
82 |
result = finbert(text, truncation=True, max_length=512)[0]
|
83 |
return get_mapped_sentiment(result)
|
@@ -135,26 +131,26 @@ def process_file(uploaded_file):
|
|
135 |
progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
|
136 |
|
137 |
# Perform sentiment analysis
|
138 |
-
|
139 |
finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
|
140 |
roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
|
141 |
finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
|
142 |
|
143 |
# Add results to DataFrame
|
144 |
-
df['
|
145 |
df['FinBERT'] = finbert_results
|
146 |
df['RoBERTa'] = roberta_results
|
147 |
df['FinBERT-Tone'] = finbert_tone_results
|
148 |
df['Translated']
|
149 |
|
150 |
# Reorder columns
|
151 |
-
columns_order = ['Объект', '
|
152 |
df = df[columns_order]
|
153 |
|
154 |
return df
|
155 |
|
156 |
def main():
|
157 |
-
st.title("... приступим к анализу... версия
|
158 |
|
159 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
160 |
|
@@ -168,7 +164,7 @@ def main():
|
|
168 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
169 |
fig.suptitle("Распределение окраски по моделям")
|
170 |
|
171 |
-
models = ['
|
172 |
for i, model in enumerate(models):
|
173 |
ax = axs[i // 2, i % 2]
|
174 |
sentiment_counts = df[model].value_counts()
|
|
|
16 |
mystem = Mystem()
|
17 |
|
18 |
# Set up the sentiment analyzers
|
19 |
+
|
20 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
21 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
22 |
finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
|
23 |
+
sberubert = pipeline("sentiment-analysis", model = "ai-forever/ruBert-base")
|
24 |
|
25 |
# Function for lemmatizing Russian text
|
26 |
def lemmatize_text(text):
|
|
|
60 |
return translated_text
|
61 |
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
# Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
|
64 |
def get_mapped_sentiment(result):
|
65 |
label = result['label'].lower()
|
|
|
69 |
return "Negative"
|
70 |
return "Neutral"
|
71 |
|
72 |
+
def get_sberubert_sentiment(text):
|
73 |
+
result = sberubert(text, truncation=True, max_length=512)[0]
|
74 |
+
return get_mapped_sentiment(result)
|
75 |
+
|
76 |
+
|
77 |
def get_finbert_sentiment(text):
|
78 |
result = finbert(text, truncation=True, max_length=512)[0]
|
79 |
return get_mapped_sentiment(result)
|
|
|
131 |
progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
|
132 |
|
133 |
# Perform sentiment analysis
|
134 |
+
rubert_results = [get_sberubert_sentiment(text) for text in translated_texts]
|
135 |
finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
|
136 |
roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
|
137 |
finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
|
138 |
|
139 |
# Add results to DataFrame
|
140 |
+
df['ruBERT'] = rubert_results
|
141 |
df['FinBERT'] = finbert_results
|
142 |
df['RoBERTa'] = roberta_results
|
143 |
df['FinBERT-Tone'] = finbert_tone_results
|
144 |
df['Translated']
|
145 |
|
146 |
# Reorder columns
|
147 |
+
columns_order = ['Объект', 'ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста', 'Translated' ]
|
148 |
df = df[columns_order]
|
149 |
|
150 |
return df
|
151 |
|
152 |
def main():
|
153 |
+
st.title("... приступим к анализу... версия 21")
|
154 |
|
155 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
156 |
|
|
|
164 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
165 |
fig.suptitle("Распределение окраски по моделям")
|
166 |
|
167 |
+
models = ['ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
|
168 |
for i, model in enumerate(models):
|
169 |
ax = axs[i // 2, i % 2]
|
170 |
sentiment_counts = df[model].value_counts()
|