Spaces:

Apasalic
/

sentiment-analyzer

Sleeping

File size: 6,644 Bytes

e50c383

import streamlit as st
import pandas as pd
from transformers import pipeline
import re

# Model configurations
MODELS = {
    "English": "MarieAngeA13/Sentiment-Analysis-BERT",
    "Danish": "larskjeldgaard/senda"
}

# Page config
st.set_page_config(
    page_title="Multi-language Sentiment Analyzer",
    page_icon="🎭",
    layout="wide"
)

# Load custom CSS
with open('style.css') as f:
    st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)

def process_sentiment(text, pipeline):
    """Process sentiment for a single text entry"""
    try:
        result = pipeline(str(text))
        # Convert sentiment to lowercase
        return result[0]['label'].lower(), result[0]['score']
    except Exception as e:
        st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
        return "unknown", 0.0

# App layout
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
    st.title("🎭 Multi-language Sentiment Analysis")
    
    selected_language = st.selectbox(
        "Select Language",
        options=list(MODELS.keys()),
        index=0
    )
    
    st.markdown("""
        <div class="privacy-notice">
            ⚠️ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
        </div>
        """, unsafe_allow_html=True)

    uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])

    if uploaded_file:
        try:
            df = pd.read_csv(uploaded_file)
            if "text" not in df.columns:
                st.error("CSV must contain a 'text' column")
            else:
                with st.spinner(f"📊 Analyzing sentiments in {selected_language}..."):
                    def clean_transcript_text(text):
                        speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
                        timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
                        cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
                        if cleaned_text == text:
                            cleaned_text = re.sub(timestamp_pattern, '', text)
                        cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
                        return cleaned_text.strip()
                    
                    df['cleaned_text'] = df['text'].apply(clean_transcript_text)

                    sentiment_pipeline = pipeline(
                        "text-classification",
                        model=MODELS[selected_language],
                        truncation=True,
                        max_length=512
                    )
                    
                    results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
                    df["sentiment"] = [r[0] for r in results]
                    df["confidence"] = [r[1] for r in results]

                    st.markdown("### 📈 Analysis Results")
                    
                    # Fix the sentiment counting logic
                    if selected_language == 'English':
                        pos_count = len(df[df["sentiment"] == "positive"])
                        neu_count = len(df[df["sentiment"] == "neutral"])
                        neg_count = len(df[df["sentiment"] == "negative"])
                    else:  # Danish
                        pos_count = len(df[df["sentiment"] == "positiv"])
                        neu_count = len(df[df["sentiment"] == "neutral"])
                        neg_count = len(df[df["sentiment"] == "negativ"])

                    metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
                    
                    with metric_col1:
                        st.metric(
                            "Positive Sentiments",
                            f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
                        )
                    with metric_col2:
                        st.metric(
                            "Neutral Sentiments",
                            f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
                        )
                    with metric_col3:
                        st.metric(
                            "Negative Sentiments",
                            f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
                        )
                    with metric_col4:
                        st.metric(
                            "Average Confidence",
                            f"{df['confidence'].mean():.1%}"
                        )

                    st.markdown("#### Preview")
                    
                    preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
                    preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")
                    
                    def highlight_sentiment(val):
                        if val in ["positive", "positiv"]:
                            return 'background-color: rgba(0, 255, 0, 0.2)'
                        elif val in ["negative", "negativ"]:
                            return 'background-color: rgba(255, 0, 0, 0.2)'
                        elif val == "neutral":
                            return 'background-color: rgba(128, 128, 128, 0.2)'
                        return ''
                    
                    st.dataframe(
                        preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
                        use_container_width=True
                    )

                    st.markdown("### 💾 Download Results")
                    csv_data = df.to_csv(index=False)
                    st.download_button(
                        label="Download Complete Analysis",
                        data=csv_data,
                        file_name=f"sentiment_results_{selected_language.lower()}.csv",
                        mime="text/csv"
                    )

        except Exception as e:
            st.error(f"Error processing file: {str(e)}")
            st.error("Full error details:")
            st.code(str(e))
    else:
        st.markdown("""
            <div class="instructions">
                <h4>📝 How to use:</h4>
                <ol>
                    <li>Select your desired language</li>
                    <li>Prepare a CSV file with a column named "text"</li>
                    <li>Upload your file using the button above</li>
                    <li>Wait for the analysis to complete</li>
                    <li>Download the results with sentiment labels</li>
                </ol>
            </div>
            """, unsafe_allow_html=True)