Spaces:
Running
Running
Commit
·
2d683e0
1
Parent(s):
d851af8
russification and optimization
Browse files
app.py
CHANGED
@@ -2,8 +2,8 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import time
|
4 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
5 |
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
6 |
-
from transformers import MarianMTModel, MarianTokenizer
|
7 |
import matplotlib.pyplot as plt
|
8 |
from pymystem3 import Mystem
|
9 |
import io
|
@@ -25,13 +25,13 @@ def lemmatize_text(text):
|
|
25 |
|
26 |
# Translation model for Russian to English
|
27 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
28 |
-
translation_tokenizer =
|
29 |
-
translation_model =
|
|
|
|
|
30 |
|
31 |
def translate(text):
|
32 |
-
|
33 |
-
translated_tokens = translation_model.generate(**inputs)
|
34 |
-
return translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
|
35 |
|
36 |
# Function for VADER sentiment analysis with label mapping
|
37 |
def get_vader_sentiment(text):
|
@@ -111,19 +111,19 @@ def process_file(uploaded_file):
|
|
111 |
return df
|
112 |
|
113 |
def main():
|
114 |
-
st.title("
|
115 |
|
116 |
-
uploaded_file = st.file_uploader("
|
117 |
|
118 |
if uploaded_file is not None:
|
119 |
df = process_file(uploaded_file)
|
120 |
|
121 |
-
st.subheader("
|
122 |
st.write(df.head())
|
123 |
|
124 |
-
st.subheader("
|
125 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
126 |
-
fig.suptitle("
|
127 |
|
128 |
models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
|
129 |
for i, model in enumerate(models):
|
@@ -143,7 +143,7 @@ def main():
|
|
143 |
df.to_excel(writer, index=False)
|
144 |
output.seek(0)
|
145 |
st.download_button(
|
146 |
-
label="
|
147 |
data=output,
|
148 |
file_name="sentiment_analysis_results.xlsx",
|
149 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
2 |
import pandas as pd
|
3 |
import time
|
4 |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
6 |
+
#from transformers import MarianMTModel, MarianTokenizer
|
7 |
import matplotlib.pyplot as plt
|
8 |
from pymystem3 import Mystem
|
9 |
import io
|
|
|
25 |
|
26 |
# Translation model for Russian to English
|
27 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
28 |
+
translation_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
29 |
+
translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
30 |
+
|
31 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en")
|
32 |
|
33 |
def translate(text):
|
34 |
+
return translator(text)[0]['translation_text']
|
|
|
|
|
35 |
|
36 |
# Function for VADER sentiment analysis with label mapping
|
37 |
def get_vader_sentiment(text):
|
|
|
111 |
return df
|
112 |
|
113 |
def main():
|
114 |
+
st.title("... приступим к анализу...")
|
115 |
|
116 |
+
uploaded_file = st.file_uploader("ВЫБИРАЙТЕ EXCEL-файл", type="xlsx")
|
117 |
|
118 |
if uploaded_file is not None:
|
119 |
df = process_file(uploaded_file)
|
120 |
|
121 |
+
st.subheader("Предпросмотр данных")
|
122 |
st.write(df.head())
|
123 |
|
124 |
+
st.subheader("Распределение окраски")
|
125 |
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
|
126 |
+
fig.suptitle("Распределение окраски по моделям")
|
127 |
|
128 |
models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
|
129 |
for i, model in enumerate(models):
|
|
|
143 |
df.to_excel(writer, index=False)
|
144 |
output.seek(0)
|
145 |
st.download_button(
|
146 |
+
label="Хотите загрузить результат? Вот он",
|
147 |
data=output,
|
148 |
file_name="sentiment_analysis_results.xlsx",
|
149 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|