import streamlit as st import pandas as pd import time from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from transformers import pipeline, MarianMTModel, MarianTokenizer import matplotlib.pyplot as plt from pymystem3 import Mystem import io from rapidfuzz import fuzz # Initialize components (VADER, FinBERT, RoBERTa, FinBERT-Tone, Mystem, translation model) # (Copy the initialization code from your original script) # Define helper functions (lemmatize_text, translate, get_vader_sentiment...) # (Copy these functions from your original script) def process_file(uploaded_file): df = pd.read_excel(uploaded_file, sheet_name='Публикации') # Apply fuzzy deduplication df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)).reset_index(drop=True) # Translate texts translated_texts = [] progress_bar = st.progress(0) for i, text in enumerate(df['Выдержки из текста']): translated_text = translate(str(text)) translated_texts.append(translated_text) progress_bar.progress((i + 1) / len(df)) # Perform sentiment analysis vader_results = [get_vader_sentiment(text) for text in translated_texts] finbert_results = [get_finbert_sentiment(text) for text in translated_texts] roberta_results = [get_roberta_sentiment(text) for text in translated_texts] finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts] # Add results to DataFrame df['VADER'] = vader_results df['FinBERT'] = finbert_results df['RoBERTa'] = roberta_results df['FinBERT-Tone'] = finbert_tone_results # Reorder columns columns_order = ['Объект', 'VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста'] df = df[columns_order] return df def main(): st.title("Sentiment Analysis App") uploaded_file = st.file_uploader("Choose an Excel file", type="xlsx") if uploaded_file is not None: df = process_file(uploaded_file) st.subheader("Data Preview") st.write(df.head()) st.subheader("Sentiment Distribution") fig, axs = plt.subplots(2, 2, figsize=(12, 8)) fig.suptitle("Sentiment Distribution for Each Model") models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone'] for i, model in enumerate(models): ax = axs[i // 2, i % 2] sentiment_counts = df[model].value_counts() sentiment_counts.plot(kind='bar', ax=ax) ax.set_title(f"{model} Sentiment") ax.set_xlabel("Sentiment") ax.set_ylabel("Count") plt.tight_layout() st.pyplot(fig) # Offer download of results output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False) output.seek(0) st.download_button( label="Download results as Excel", data=output, file_name="sentiment_analysis_results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) if __name__ == "__main__": main()