Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
from textblob import TextBlob | |
from transformers import pipeline | |
import matplotlib.pyplot as plt | |
import base64 | |
import os | |
from wordcloud import WordCloud | |
import time | |
# Function to perform sentiment analysis using Hugging Face model | |
hf_sentiment_analyzer = pipeline( | |
"sentiment-analysis", "Dmyadav2001/Sentimental-Analysis" | |
) | |
def analyze_hf_sentiment(text): | |
if len(text) > 512: | |
temp = text[:511] | |
text = temp | |
result = hf_sentiment_analyzer(text) | |
label = result[0]["label"] | |
if label == "LABEL_1": | |
return "Positive" | |
elif label == "LABEL_0": | |
return "Negative" | |
elif label == "LABEL_2": | |
return "Neutral" | |
# Function to perform sentiment analysis using VADER | |
def analyze_vader_sentiment(text): | |
analyzer = SentimentIntensityAnalyzer() | |
vader_score = analyzer.polarity_scores(text)["compound"] | |
if vader_score > 0: | |
return "Positive" | |
elif vader_score == 0: | |
return "Neutral" | |
else: | |
return "Negative" | |
# Function to perform sentiment analysis using TextBlob | |
def analyze_textblob_sentiment(text): | |
analysis = TextBlob(text) | |
sentiment_score = analysis.sentiment.polarity | |
if sentiment_score > 0: | |
return "Positive" | |
elif sentiment_score == 0: | |
return "Neutral" | |
else: | |
return "Negative" | |
# Function to display DataFrame with updated sentiment column | |
def display_dataframe(df): | |
st.write(df) | |
# Function to display pie chart for sentiment distribution | |
def display_pie_chart(df, column): | |
sentiment_counts = df[column].value_counts() | |
fig, ax = plt.subplots() | |
ax.pie( | |
sentiment_counts, | |
labels=sentiment_counts.index, | |
autopct="%1.1f%%", | |
startangle=140, | |
) | |
ax.axis("equal") | |
st.pyplot(fig) | |
# Add a download button | |
if st.button('Download Pie Chart'): | |
# Save the pie chart as an image file | |
plt.savefig('pie_chart.png') | |
# Offer the image file for download | |
st.download_button(label='Download Pie Chart Image', data=open('pie_chart.png', 'rb').read(), file_name='pie_chart.png', mime='image/png') | |
# Function to display word cloud | |
def display_wordcloud(text_data): | |
wordcloud = WordCloud(width=800, height=400, background_color="white").generate( | |
text_data | |
) | |
fig, ax = plt.subplots(figsize=(10, 5)) | |
ax.imshow(wordcloud, interpolation="bilinear") | |
ax.axis("off") | |
st.pyplot(fig) | |
# Add a download button | |
if st.button('Download Word Cloud'): | |
# Save the word cloud as an image file | |
plt.savefig('word_cloud.png') | |
# Offer the image file for download | |
st.download_button(label='Download Word Cloud Image', data=open('word_cloud.png', 'rb').read(), file_name='word_cloud.png', mime='image/png') | |
# Function to download CSV file | |
def download_csv(df): | |
csv = df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() # B64 encoding | |
href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
# Function to count occurrences of keywords and sentiment distribution | |
def count_reviews_with_keywords(df,keywords): | |
# keywords=['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'] | |
keyword_counts = {keyword: {"Positive": 0, "Negative": 0, "Total": 0} for keyword in keywords} | |
for _, row in df.iterrows(): | |
review_text = row["review_text"] | |
sentiment = row["Sentiment"] | |
for keyword in keywords: | |
if keyword.lower() in review_text.lower(): | |
keyword_counts[keyword]["Total"] += 1 | |
if sentiment == "Positive": | |
keyword_counts[keyword]["Positive"] += 1 | |
elif sentiment == "Negative": | |
keyword_counts[keyword]["Negative"] += 1 | |
return keyword_counts | |
# Streamlit UI | |
st.set_page_config(page_title="SentimentAnalysis App", page_icon=":smiley:") | |
# st.title("SentimentAnalysis App") | |
text = "SentimentAnalysis App" | |
t = st.empty() | |
for i in range(len(text) + 1): | |
t.markdown("## %s" % text[0:i]) | |
time.sleep(0.1) | |
# Sidebar | |
st.sidebar.title("Options") | |
input_option = st.sidebar.radio("Select Input Option", ("Free Text", "CSV Files")) | |
selected_model = st.sidebar.radio( | |
"Select Sentiment Analysis Model", ("VADER", "TextBlob", "Hugging Face") | |
) | |
result_option = st.sidebar.radio( | |
"Select Result Display Option", | |
("DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "WordCloud", "Comparative Sentiment Analysis"), | |
) | |
# Main content | |
progress_label = st.empty() # Define progress label | |
progress_bar = st.progress(0) | |
progress = 0 | |
# Directory path to store processed files | |
processed_directory = "processed_files" | |
# Ensure the directory exists, if not create it | |
os.makedirs(processed_directory, exist_ok=True) | |
# List to store processed filenames | |
processed_files = [] | |
# Function to get filenames from the processed directory | |
def get_processed_filenames(): | |
return [ | |
f | |
for f in os.listdir(processed_directory) | |
if os.path.isfile(os.path.join(processed_directory, f)) | |
] | |
if input_option == "Free Text": | |
st.subheader("Enter review for sentiment analysis:") | |
user_input = st.text_area("", "") | |
if not user_input: | |
st.info("Enter some text above for sentiment analysis.") | |
else: | |
with st.spinner("Analyzing..."): | |
if selected_model == "Hugging Face": | |
result = analyze_hf_sentiment(user_input) | |
elif selected_model == "VADER": | |
result = analyze_vader_sentiment(user_input) | |
elif selected_model == "TextBlob": | |
result = analyze_textblob_sentiment(user_input) | |
st.write("Sentiment:", result) | |
if input_option == "CSV Files": | |
st.subheader("Select CSV files for sentiment analysis:") | |
# Uploading new file | |
files = st.file_uploader( | |
"Upload New File", type=["csv"], accept_multiple_files=True | |
) | |
if files: | |
# Process uploaded new files | |
for file in files: | |
if file.type != "text/csv": | |
st.warning( | |
"Uploaded file is not a CSV file. Please upload a CSV file only." | |
) | |
else: | |
df = pd.read_csv(file) | |
if "review_text" not in df.columns: | |
st.warning( | |
"Uploaded CSV file doesn't contain 'review_text' column. Please check the CSV file format." | |
) | |
else: | |
total_rows = len(df) | |
sentiments_v = [] | |
sentiments_tb = [] | |
sentiments_hf = [] | |
for review_text in df["review_text"]: | |
sentiments_v.append(analyze_vader_sentiment(review_text)) | |
sentiments_tb.append(analyze_textblob_sentiment(review_text)) | |
sentiments_hf.append(analyze_hf_sentiment(review_text)) | |
progress += 1 | |
progress_label.text(f"{progress}/{total_rows}") | |
progress_bar.progress(min(progress / total_rows, 1.0)) | |
df["VADER Sentiment"] = sentiments_v | |
df["TextBlob Sentiment"] = sentiments_tb | |
df["HuggingFace Sentiment"] = sentiments_hf | |
# Save processed file with modified filename | |
new_filename = os.path.splitext(file.name)[0] + "1.csv" | |
df.to_csv( | |
os.path.join(processed_directory, new_filename), index=False | |
) | |
st.success(f"New file processed and saved as {new_filename}") | |
# List of already processed files | |
processed_files = get_processed_filenames() | |
selected_files = st.multiselect("Select from Processed Files", processed_files) | |
if not files and not selected_files: | |
st.info( | |
"Upload a new CSV file or select from processed files above for sentiment analysis." | |
) | |
all_dfs = [] | |
# Process already selected files | |
for file_name in selected_files: | |
df = pd.read_csv(os.path.join(processed_directory, file_name)) | |
all_dfs.append(df) | |
# Results | |
if all_dfs: | |
combined_df = pd.concat(all_dfs, ignore_index=True) | |
if selected_model == "TextBlob": | |
result = "TextBlob Sentiment" | |
combined_df.drop( | |
columns=["VADER Sentiment", "HuggingFace Sentiment"], | |
inplace=True, | |
) | |
elif selected_model == "VADER": | |
result = "VADER Sentiment" | |
combined_df.drop( | |
columns=["TextBlob Sentiment", "HuggingFace Sentiment"], | |
inplace=True, | |
) | |
elif selected_model == "Hugging Face": | |
result = "HuggingFace Sentiment" | |
combined_df.drop( | |
columns=["TextBlob Sentiment", "VADER Sentiment"], | |
inplace=True, | |
) | |
combined_df.rename(columns={result: "Sentiment"}, inplace=True) | |
if result_option == "DataFrame": | |
st.subheader("Sentiment Analysis Results") | |
display_dataframe(combined_df) | |
download_csv(combined_df) | |
elif result_option == "Pie Chart": | |
st.subheader("Sentiment Distribution") | |
display_pie_chart(combined_df, "Sentiment") | |
elif result_option == "Bar Chart": | |
# Calculate value counts | |
sentiment_counts = combined_df["Sentiment"].value_counts() | |
# Display bar chart | |
st.bar_chart(sentiment_counts) | |
# Add a download button | |
if st.button('Download Sentiment Counts Chart'): | |
# Plot the bar chart | |
fig, ax = plt.subplots() | |
sentiment_counts.plot(kind='bar', ax=ax) | |
plt.xlabel('Sentiment') | |
plt.ylabel('Count') | |
plt.title('Sentiment Counts') | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
# Save the bar chart as an image file | |
plt.savefig('sentiment_counts_chart.png') | |
# Offer the image file for download | |
st.download_button(label='Download Sentiment Counts Chart Image', data=open('sentiment_counts_chart.png', 'rb').read(), file_name='sentiment_counts_chart.png', mime='image/png') | |
elif result_option == "Keyword Frequency": | |
st.subheader("Keyword Frequency") | |
# List of keywords | |
keywords = [ | |
"delivery", | |
"shipping", | |
"parcel", | |
"package", | |
"tracking", | |
"shipment", | |
"cargo", | |
"freight", | |
"automation", | |
"automated", | |
"robotic", | |
"robots", | |
"AI", | |
"artificial intelligence", | |
"machine learning", | |
"chatbot", | |
"virtual assistant", | |
"customer support", | |
"real-time", | |
"instant", | |
"live update", | |
"status", | |
"IoT", | |
"internet of things", | |
"connected devices", | |
"smart technology", | |
"blockchain", | |
"ledger", | |
"transparency", | |
"security", | |
"sustainability", | |
"eco-friendly", | |
"green logistics", | |
"carbon footprint", | |
"customer service", | |
"support", | |
"experience", | |
"satisfaction", | |
"data analytics", | |
"big data", | |
"analysis", | |
"insights", | |
"cloud computing", | |
"cloud-based", | |
"digital infrastructure", | |
"storage", | |
"5G", | |
"connectivity", | |
"network speed", | |
"wireless", | |
"drone", | |
"aerial delivery", | |
"UAV", | |
"drone shipping", | |
"augmented reality", | |
"AR", | |
"virtual reality", | |
"VR", | |
"3D printing", | |
"additive manufacturing", | |
"custom parts", | |
"prototyping", | |
"inventory management", | |
"stock levels", | |
"warehouse management", | |
"storage solutions", | |
"supply chain", | |
"logistics", | |
"supply network", | |
"distribution", | |
"eco-packaging", | |
"sustainable materials", | |
"recycling", | |
"waste reduction", | |
"digital platform", | |
"e-commerce", | |
"online shopping", | |
"online order", | |
"cybersecurity", | |
"data protection", | |
"privacy", | |
"encryption", | |
"predictive modeling", | |
"forecasting", | |
"demand planning", | |
"trend analysis", | |
"robotics", | |
"automated vehicles", | |
"self-driving cars", | |
"logistics automation", | |
"visibility", | |
"supply chain visibility", | |
"track and trace", | |
"monitoring", | |
"integration", | |
"ERP", | |
"supply chain integration", | |
"software", | |
"optimization", | |
"efficiency", | |
"process improvement", | |
"lean logistics", | |
"personalization", | |
"customization", | |
"tailored services", | |
"personal touch", | |
"ethical sourcing", | |
"fair trade", | |
"labor rights", | |
"ethical business", | |
"user experience", | |
"UX", | |
"customer journey", | |
"service design", | |
"visibility", | |
] | |
text_data = " ".join(combined_df["review_text"]) | |
keyword_frequency = ( | |
pd.Series(text_data.split()).value_counts().reset_index() | |
) | |
keyword_frequency.columns = ["Keyword", "Frequency"] | |
# Filter keyword frequency for specific keywords | |
filtered_keyword_frequency = keyword_frequency[ | |
keyword_frequency["Keyword"].isin(keywords) | |
] | |
# Display bar chart for filtered keyword frequency | |
st.bar_chart(filtered_keyword_frequency.set_index("Keyword")) | |
# Add a download button | |
if st.button('Download Keyword Frequency Chart'): | |
# Plot the bar chart | |
fig, ax = plt.subplots() | |
filtered_keyword_frequency.plot(kind='bar', x='Keyword', y='Frequency', ax=ax) | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
# Save the bar chart as an image file | |
plt.savefig('keyword_frequency_chart.png') | |
# Offer the image file for download | |
st.download_button(label='Download Keyword Frequency Chart Image', data=open('keyword_frequency_chart.png', 'rb').read(), file_name='keyword_frequency_chart.png', mime='image/png') | |
elif result_option == "Word Cloud": | |
st.subheader("Word Cloud") | |
text_data = " ".join(combined_df["review_text"]) | |
display_wordcloud(text_data) | |
else: | |
st.subheader("Comparative Sentiment Analysis") | |
supply_chain_areas = { | |
'logistics': ['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'], | |
'delivery': ['delivery', 'shipping', 'courier', 'postal', 'parcel'], | |
'inventory': ['inventory', 'stock', 'storage', 'warehouse', 'security’'], | |
'customer service': ['customer service', 'support', 'helpdesk', 'service center', 'experience', 'refund'], | |
'procurement': ['procurement', 'sourcing', 'purchasing', 'buying', 'order'], | |
'distribution': ['distribution', 'supply network', 'distribution center'], | |
'manufacturing': ['manufacturing', 'production', 'assembly', 'quality', 'defect'] | |
} | |
supply_chain_area = st.sidebar.radio( | |
"Select Supply Chain Area", | |
("logistics", "delivery", "inventory", "customer service", "procurement", "distribution","manufacturing"), | |
) | |
# Call the function to count occurrences of keywords and sentiment distribution | |
keyword_counts = count_reviews_with_keywords(combined_df,supply_chain_areas[supply_chain_area]) | |
# Convert keyword_counts to DataFrame | |
df_counts = pd.DataFrame(keyword_counts).transpose() | |
# Plot dual bar chart horizontally | |
st.bar_chart(df_counts[["Positive", "Negative"]], use_container_width=True, height=500) |