Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import chardet
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from laser_encoders import LaserEncoderPipeline
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
+
from sklearn.metrics import accuracy_score
|
9 |
+
from sklearn.linear_model import LogisticRegression
|
10 |
+
from sklearn.preprocessing import LabelEncoder
|
11 |
+
from tensorflow.keras.models import Sequential
|
12 |
+
from tensorflow.keras.layers import Dense
|
13 |
+
from tqdm import tqdm
|
14 |
+
|
15 |
+
with open('./train.csv', 'rb') as f:
|
16 |
+
result = chardet.detect(f.read())
|
17 |
+
|
18 |
+
# Use the detected encoding when reading the CSV file
|
19 |
+
data = pd.read_csv('./train.csv', encoding=result['encoding'])
|
20 |
+
data = data[['sentiment', 'text']]
|
21 |
+
|
22 |
+
sentiments = []
|
23 |
+
texts = []
|
24 |
+
|
25 |
+
for index, row in data.iterrows():
|
26 |
+
sentiment = row['sentiment'].lower() # Convert to lowercase for case-insensitivity
|
27 |
+
if sentiment == 'neutral':
|
28 |
+
sentiments.append(1)
|
29 |
+
elif sentiment == 'positive':
|
30 |
+
sentiments.append(2)
|
31 |
+
elif sentiment == 'negative':
|
32 |
+
sentiments.append(3)
|
33 |
+
else:
|
34 |
+
# Handle the case where sentiment is not one of the expected values
|
35 |
+
# You may choose to skip this row or handle it differently based on your requirements
|
36 |
+
print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
|
37 |
+
continue # Skip the rest of the loop for this row
|
38 |
+
|
39 |
+
text = row['text']
|
40 |
+
if not isinstance(text, float):
|
41 |
+
texts.append(text)
|
42 |
+
else:
|
43 |
+
# Skip the sentiment for this row as well
|
44 |
+
print(f"Warning: Skipping row {index} with float text value")
|
45 |
+
sentiments.pop() # Remove the last added sentiment
|
46 |
+
|
47 |
+
|
48 |
+
label_encoder = LabelEncoder()
|
49 |
+
encoded_sentiments = label_encoder.fit_transform(sentiments)
|
50 |
+
|
51 |
+
# Split the data into training and testing sets
|
52 |
+
X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
|
53 |
+
|
54 |
+
# Initialize the LaserEncoder
|
55 |
+
encoder = LaserEncoderPipeline(lang="eng_Latn")
|
56 |
+
|
57 |
+
# Initialize empty arrays to store embeddings
|
58 |
+
X_train_embeddings = []
|
59 |
+
X_test_embeddings = []
|
60 |
+
|
61 |
+
|
62 |
+
for sentence in tqdm(X_train):
|
63 |
+
embeddings = encoder.encode_sentences([sentence])[0]
|
64 |
+
X_train_embeddings.append(embeddings)
|
65 |
+
|
66 |
+
for sentence in tqdm(X_test):
|
67 |
+
embeddings = encoder.encode_sentences([sentence])[0]
|
68 |
+
X_test_embeddings.append(embeddings)
|
69 |
+
|
70 |
+
# Convert lists to numpy arrays
|
71 |
+
X_train_embeddings = np.array(X_train_embeddings)
|
72 |
+
X_test_embeddings = np.array(X_test_embeddings)
|
73 |
+
|
74 |
+
# Sentiment Prediction with RNN Neural Network and Confusion Matrix
|
75 |
+
|
76 |
+
from keras.models import Sequential
|
77 |
+
from keras.layers import Dense, SimpleRNN, Reshape, Dropout
|
78 |
+
from keras.optimizers import Adam
|
79 |
+
from keras.callbacks import LearningRateScheduler
|
80 |
+
from sklearn.metrics import confusion_matrix
|
81 |
+
import seaborn as sns
|
82 |
+
import matplotlib.pyplot as plt
|
83 |
+
import numpy as np
|
84 |
+
|
85 |
+
# Build a neural network model with RNN
|
86 |
+
model = Sequential()
|
87 |
+
model.add(Dense(256, input_shape=(1024,), activation='tanh'))
|
88 |
+
model.add(Reshape((1, 256)))
|
89 |
+
model.add(SimpleRNN(128, activation='relu'))
|
90 |
+
model.add(Dense(64, activation='relu'))
|
91 |
+
model.add(Dropout(0.5)) # Adding dropout for regularization
|
92 |
+
model.add(Dense(3, activation='softmax'))
|
93 |
+
|
94 |
+
# Use a learning rate scheduler
|
95 |
+
def lr_schedule(epoch):
|
96 |
+
return 0.0001 * 0.9 ** epoch
|
97 |
+
|
98 |
+
opt = Adam(learning_rate=0.0001)
|
99 |
+
lr_scheduler = LearningRateScheduler(lr_schedule)
|
100 |
+
#
|
101 |
+
# Compile the model
|
102 |
+
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
103 |
+
|
104 |
+
# Print model summary to check the architecture
|
105 |
+
model.summary()
|
106 |
+
|
107 |
+
# Train the model with the learning rate scheduler
|
108 |
+
model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
|
109 |
+
|
110 |
+
# Evaluate the model on the test set
|
111 |
+
accuracy = model.evaluate(X_test_embeddings, y_test)[1]
|
112 |
+
|
113 |
+
# Predictions on the test set
|
114 |
+
y_pred_probabilities = model.predict(X_test_embeddings)
|
115 |
+
y_pred = np.argmax(y_pred_probabilities, axis=1)
|
116 |
+
|
117 |
+
language = st.slider('Enter the language:')
|
118 |
+
user_text = st.slider('Enter the text:')
|
119 |
+
|
120 |
+
encoder = LaserEncoderPipeline(lang=language)
|
121 |
+
|
122 |
+
user_text_embedding = encoder.encode_sentences([user_text])[0]
|
123 |
+
user_text_embedding = np.reshape(user_text_embedding, (1, -1))
|
124 |
+
|
125 |
+
predicted_sentiment = np.argmax(model.predict(user_text_embedding))
|
126 |
+
predicted_sentiment_no = label_encoder.inverse_transform([predicted_sentiment])[0]
|
127 |
+
if predicted_sentiment_no == 1:
|
128 |
+
predicted_sentiment_label = 'neutral'
|
129 |
+
elif predicted_sentiment_no == 2:
|
130 |
+
predicted_sentiment_label = 'positive'
|
131 |
+
else:
|
132 |
+
predicted_sentiment_label = 'negative'
|
133 |
+
|
134 |
+
st.write("Predicted Sentiment:"+predicted_sentiment_label)
|