pentarosarium commited on
Commit
55dd648
·
1 Parent(s): b4b8d2a

progress more 21

Browse files
Files changed (1) hide show
  1. app.py +12 -16
app.py CHANGED
@@ -16,10 +16,11 @@ import torch
16
  mystem = Mystem()
17
 
18
  # Set up the sentiment analyzers
19
- vader_analyzer = SentimentIntensityAnalyzer()
20
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
21
  roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
22
  finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
 
23
 
24
  # Function for lemmatizing Russian text
25
  def lemmatize_text(text):
@@ -59,16 +60,6 @@ def translate(text):
59
  return translated_text
60
 
61
 
62
-
63
- # Function for VADER sentiment analysis with label mapping
64
- def get_vader_sentiment(text):
65
- score = vader_analyzer.polarity_scores(text)["compound"]
66
- if score > 0.2:
67
- return "Positive"
68
- elif score < -0.2:
69
- return "Negative"
70
- return "Neutral"
71
-
72
  # Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
73
  def get_mapped_sentiment(result):
74
  label = result['label'].lower()
@@ -78,6 +69,11 @@ def get_mapped_sentiment(result):
78
  return "Negative"
79
  return "Neutral"
80
 
 
 
 
 
 
81
  def get_finbert_sentiment(text):
82
  result = finbert(text, truncation=True, max_length=512)[0]
83
  return get_mapped_sentiment(result)
@@ -135,26 +131,26 @@ def process_file(uploaded_file):
135
  progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
136
 
137
  # Perform sentiment analysis
138
- vader_results = [get_vader_sentiment(text) for text in translated_texts]
139
  finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
140
  roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
141
  finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
142
 
143
  # Add results to DataFrame
144
- df['VADER'] = vader_results
145
  df['FinBERT'] = finbert_results
146
  df['RoBERTa'] = roberta_results
147
  df['FinBERT-Tone'] = finbert_tone_results
148
  df['Translated']
149
 
150
  # Reorder columns
151
- columns_order = ['Объект', 'VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста', 'Translated' ]
152
  df = df[columns_order]
153
 
154
  return df
155
 
156
  def main():
157
- st.title("... приступим к анализу... версия 20")
158
 
159
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
160
 
@@ -168,7 +164,7 @@ def main():
168
  fig, axs = plt.subplots(2, 2, figsize=(12, 8))
169
  fig.suptitle("Распределение окраски по моделям")
170
 
171
- models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
172
  for i, model in enumerate(models):
173
  ax = axs[i // 2, i % 2]
174
  sentiment_counts = df[model].value_counts()
 
16
  mystem = Mystem()
17
 
18
  # Set up the sentiment analyzers
19
+
20
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
21
  roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
22
  finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")
23
+ sberubert = pipeline("sentiment-analysis", model = "ai-forever/ruBert-base")
24
 
25
  # Function for lemmatizing Russian text
26
  def lemmatize_text(text):
 
60
  return translated_text
61
 
62
 
 
 
 
 
 
 
 
 
 
 
63
  # Functions for FinBERT, RoBERTa, and FinBERT-Tone with label mapping
64
  def get_mapped_sentiment(result):
65
  label = result['label'].lower()
 
69
  return "Negative"
70
  return "Neutral"
71
 
72
+ def get_sberubert_sentiment(text):
73
+ result = sberubert(text, truncation=True, max_length=512)[0]
74
+ return get_mapped_sentiment(result)
75
+
76
+
77
  def get_finbert_sentiment(text):
78
  result = finbert(text, truncation=True, max_length=512)[0]
79
  return get_mapped_sentiment(result)
 
131
  progress_text.text(f"{i + 1} из {total_news} сообщений переведено")
132
 
133
  # Perform sentiment analysis
134
+ rubert_results = [get_sberubert_sentiment(text) for text in translated_texts]
135
  finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
136
  roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
137
  finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
138
 
139
  # Add results to DataFrame
140
+ df['ruBERT'] = rubert_results
141
  df['FinBERT'] = finbert_results
142
  df['RoBERTa'] = roberta_results
143
  df['FinBERT-Tone'] = finbert_tone_results
144
  df['Translated']
145
 
146
  # Reorder columns
147
+ columns_order = ['Объект', 'ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста', 'Translated' ]
148
  df = df[columns_order]
149
 
150
  return df
151
 
152
  def main():
153
+ st.title("... приступим к анализу... версия 21")
154
 
155
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
156
 
 
164
  fig, axs = plt.subplots(2, 2, figsize=(12, 8))
165
  fig.suptitle("Распределение окраски по моделям")
166
 
167
+ models = ['ruBERT', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
168
  for i, model in enumerate(models):
169
  ax = axs[i // 2, i % 2]
170
  sentiment_counts = df[model].value_counts()