Spaces:

NIXBLACK
/

SentimentAnalysis_LASER_

Sleeping

App Files Files Community

NIXBLACK commited on Dec 1, 2023

Commit

2bc2000

1 Parent(s): 5275263

Create app.py

Browse files

Files changed (1) hide show

app.py +134 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import chardet
+import matplotlib.pyplot as plt
+from laser_encoders import LaserEncoderPipeline
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense
+from tqdm import tqdm
+with open('./train.csv', 'rb') as f:
+    result = chardet.detect(f.read())
+# Use the detected encoding when reading the CSV file
+data = pd.read_csv('./train.csv', encoding=result['encoding'])
+data = data[['sentiment', 'text']]
+sentiments = []
+texts = []
+for index, row in data.iterrows():
+    sentiment = row['sentiment'].lower()  # Convert to lowercase for case-insensitivity
+    if sentiment == 'neutral':
+        sentiments.append(1)
+    elif sentiment == 'positive':
+        sentiments.append(2)
+    elif sentiment == 'negative':
+        sentiments.append(3)
+    else:
+        # Handle the case where sentiment is not one of the expected values
+        # You may choose to skip this row or handle it differently based on your requirements
+        print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
+        continue  # Skip the rest of the loop for this row
+    text = row['text']
+    if not isinstance(text, float):
+        texts.append(text)
+    else:
+        # Skip the sentiment for this row as well
+        print(f"Warning: Skipping row {index} with float text value")
+        sentiments.pop()  # Remove the last added sentiment
+label_encoder = LabelEncoder()
+encoded_sentiments = label_encoder.fit_transform(sentiments)
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
+# Initialize the LaserEncoder
+encoder = LaserEncoderPipeline(lang="eng_Latn")
+# Initialize empty arrays to store embeddings
+X_train_embeddings = []
+X_test_embeddings = []
+for sentence in tqdm(X_train):
+    embeddings = encoder.encode_sentences([sentence])[0]
+    X_train_embeddings.append(embeddings)
+for sentence in tqdm(X_test):
+    embeddings = encoder.encode_sentences([sentence])[0]
+    X_test_embeddings.append(embeddings)
+# Convert lists to numpy arrays
+X_train_embeddings = np.array(X_train_embeddings)
+X_test_embeddings = np.array(X_test_embeddings)
+# Sentiment Prediction with RNN Neural Network and Confusion Matrix
+from keras.models import Sequential
+from keras.layers import Dense, SimpleRNN, Reshape, Dropout
+from keras.optimizers import Adam
+from keras.callbacks import LearningRateScheduler
+from sklearn.metrics import confusion_matrix
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+# Build a neural network model with RNN
+model = Sequential()
+model.add(Dense(256, input_shape=(1024,), activation='tanh'))
+model.add(Reshape((1, 256)))
+model.add(SimpleRNN(128, activation='relu'))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))  # Adding dropout for regularization
+model.add(Dense(3, activation='softmax'))
+# Use a learning rate scheduler
+def lr_schedule(epoch):
+    return 0.0001 * 0.9 ** epoch
+opt = Adam(learning_rate=0.0001)
+lr_scheduler = LearningRateScheduler(lr_schedule)
+#
+# Compile the model
+model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+# Print model summary to check the architecture
+model.summary()
+# Train the model with the learning rate scheduler
+model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
+# Evaluate the model on the test set
+accuracy = model.evaluate(X_test_embeddings, y_test)[1]
+# Predictions on the test set
+y_pred_probabilities = model.predict(X_test_embeddings)
+y_pred = np.argmax(y_pred_probabilities, axis=1)
+language = st.slider('Enter the language:')
+user_text = st.slider('Enter the text:')
+encoder = LaserEncoderPipeline(lang=language)
+user_text_embedding = encoder.encode_sentences([user_text])[0]
+user_text_embedding = np.reshape(user_text_embedding, (1, -1))
+predicted_sentiment = np.argmax(model.predict(user_text_embedding))
+predicted_sentiment_no = label_encoder.inverse_transform([predicted_sentiment])[0]
+if predicted_sentiment_no == 1:
+  predicted_sentiment_label = 'neutral'
+elif predicted_sentiment_no == 2:
+  predicted_sentiment_label = 'positive'
+else:
+  predicted_sentiment_label = 'negative'
+st.write("Predicted Sentiment:"+predicted_sentiment_label)