Spaces:
Sleeping
Sleeping
File size: 6,644 Bytes
e50c383 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
# Model configurations
MODELS = {
"English": "MarieAngeA13/Sentiment-Analysis-BERT",
"Danish": "larskjeldgaard/senda"
}
# Page config
st.set_page_config(
page_title="Multi-language Sentiment Analyzer",
page_icon="π",
layout="wide"
)
# Load custom CSS
with open('style.css') as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
def process_sentiment(text, pipeline):
"""Process sentiment for a single text entry"""
try:
result = pipeline(str(text))
# Convert sentiment to lowercase
return result[0]['label'].lower(), result[0]['score']
except Exception as e:
st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
return "unknown", 0.0
# App layout
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.title("π Multi-language Sentiment Analysis")
selected_language = st.selectbox(
"Select Language",
options=list(MODELS.keys()),
index=0
)
st.markdown("""
<div class="privacy-notice">
β οΈ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
</div>
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
if "text" not in df.columns:
st.error("CSV must contain a 'text' column")
else:
with st.spinner(f"π Analyzing sentiments in {selected_language}..."):
def clean_transcript_text(text):
speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
if cleaned_text == text:
cleaned_text = re.sub(timestamp_pattern, '', text)
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
return cleaned_text.strip()
df['cleaned_text'] = df['text'].apply(clean_transcript_text)
sentiment_pipeline = pipeline(
"text-classification",
model=MODELS[selected_language],
truncation=True,
max_length=512
)
results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
df["sentiment"] = [r[0] for r in results]
df["confidence"] = [r[1] for r in results]
st.markdown("### π Analysis Results")
# Fix the sentiment counting logic
if selected_language == 'English':
pos_count = len(df[df["sentiment"] == "positive"])
neu_count = len(df[df["sentiment"] == "neutral"])
neg_count = len(df[df["sentiment"] == "negative"])
else: # Danish
pos_count = len(df[df["sentiment"] == "positiv"])
neu_count = len(df[df["sentiment"] == "neutral"])
neg_count = len(df[df["sentiment"] == "negativ"])
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
with metric_col1:
st.metric(
"Positive Sentiments",
f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
)
with metric_col2:
st.metric(
"Neutral Sentiments",
f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
)
with metric_col3:
st.metric(
"Negative Sentiments",
f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
)
with metric_col4:
st.metric(
"Average Confidence",
f"{df['confidence'].mean():.1%}"
)
st.markdown("#### Preview")
preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")
def highlight_sentiment(val):
if val in ["positive", "positiv"]:
return 'background-color: rgba(0, 255, 0, 0.2)'
elif val in ["negative", "negativ"]:
return 'background-color: rgba(255, 0, 0, 0.2)'
elif val == "neutral":
return 'background-color: rgba(128, 128, 128, 0.2)'
return ''
st.dataframe(
preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
use_container_width=True
)
st.markdown("### πΎ Download Results")
csv_data = df.to_csv(index=False)
st.download_button(
label="Download Complete Analysis",
data=csv_data,
file_name=f"sentiment_results_{selected_language.lower()}.csv",
mime="text/csv"
)
except Exception as e:
st.error(f"Error processing file: {str(e)}")
st.error("Full error details:")
st.code(str(e))
else:
st.markdown("""
<div class="instructions">
<h4>π How to use:</h4>
<ol>
<li>Select your desired language</li>
<li>Prepare a CSV file with a column named "text"</li>
<li>Upload your file using the button above</li>
<li>Wait for the analysis to complete</li>
<li>Download the results with sentiment labels</li>
</ol>
</div>
""", unsafe_allow_html=True) |