Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,174 +1,10 @@
|
|
1 |
import streamlit as st
|
2 |
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
import chardet
|
5 |
-
import matplotlib.pyplot as plt
|
6 |
from laser_encoders import LaserEncoderPipeline
|
7 |
-
from
|
8 |
-
from sklearn.metrics import accuracy_score
|
9 |
-
from sklearn.linear_model import LogisticRegression
|
10 |
-
from sklearn.preprocessing import LabelEncoder
|
11 |
-
from tensorflow.keras.models import Sequential
|
12 |
-
from tensorflow.keras.layers import Dense
|
13 |
-
from tqdm import tqdm
|
14 |
-
|
15 |
-
with open('./train.csv', 'rb') as f:
|
16 |
-
result = chardet.detect(f.read())
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
data = data[['sentiment', 'text']]
|
21 |
-
|
22 |
-
sentiments = []
|
23 |
-
texts = []
|
24 |
-
|
25 |
-
for index, row in data.iterrows():
|
26 |
-
sentiment = row['sentiment'].lower() # Convert to lowercase for case-insensitivity
|
27 |
-
if sentiment == 'neutral':
|
28 |
-
sentiments.append(1)
|
29 |
-
elif sentiment == 'positive':
|
30 |
-
sentiments.append(2)
|
31 |
-
elif sentiment == 'negative':
|
32 |
-
sentiments.append(3)
|
33 |
-
else:
|
34 |
-
# Handle the case where sentiment is not one of the expected values
|
35 |
-
# You may choose to skip this row or handle it differently based on your requirements
|
36 |
-
print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
|
37 |
-
continue # Skip the rest of the loop for this row
|
38 |
-
|
39 |
-
text = row['text']
|
40 |
-
if not isinstance(text, float):
|
41 |
-
texts.append(text)
|
42 |
-
else:
|
43 |
-
# Skip the sentiment for this row as well
|
44 |
-
print(f"Warning: Skipping row {index} with float text value")
|
45 |
-
sentiments.pop() # Remove the last added sentiment
|
46 |
-
|
47 |
-
|
48 |
-
label_encoder = LabelEncoder()
|
49 |
-
encoded_sentiments = label_encoder.fit_transform(sentiments)
|
50 |
-
|
51 |
-
# Split the data into training and testing sets
|
52 |
-
X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
|
53 |
-
|
54 |
-
# Initialize the LaserEncoder
|
55 |
-
encoder = LaserEncoderPipeline(lang="eng_Latn")
|
56 |
-
|
57 |
-
# Initialize empty arrays to store embeddings
|
58 |
-
X_train_embeddings = []
|
59 |
-
X_test_embeddings = []
|
60 |
-
|
61 |
-
|
62 |
-
for sentence in tqdm(X_train):
|
63 |
-
embeddings = encoder.encode_sentences([sentence])[0]
|
64 |
-
X_train_embeddings.append(embeddings)
|
65 |
-
|
66 |
-
for sentence in tqdm(X_test):
|
67 |
-
embeddings = encoder.encode_sentences([sentence])[0]
|
68 |
-
X_test_embeddings.append(embeddings)
|
69 |
-
|
70 |
-
# Convert lists to numpy arrays
|
71 |
-
X_train_embeddings = np.array(X_train_embeddings)
|
72 |
-
X_test_embeddings = np.array(X_test_embeddings)
|
73 |
-
|
74 |
-
# Convert lists to numpy arrays
|
75 |
-
X_train = np.array(X_train_embeddings)
|
76 |
-
X_test = np.array(X_test_embeddings)
|
77 |
-
|
78 |
-
# # Sentiment Prediction with RNN Neural Network and Confusion Matrix
|
79 |
-
|
80 |
-
# from keras.models import Sequential
|
81 |
-
# from keras.layers import Dense, SimpleRNN, Reshape, Dropout
|
82 |
-
# from keras.optimizers import Adam
|
83 |
-
# from keras.callbacks import LearningRateScheduler
|
84 |
-
# from sklearn.metrics import confusion_matrix
|
85 |
-
# import seaborn as sns
|
86 |
-
# import matplotlib.pyplot as plt
|
87 |
-
# import numpy as np
|
88 |
-
# from keras.models import Sequential
|
89 |
-
# from keras.layers import Embedding, SpatialDropout1D, LSTM, Dense
|
90 |
-
|
91 |
-
# max_features = 10000 # Adjust this based on your vocabulary size
|
92 |
-
# embed_dim = 128 # Adjust this based on the desired dimension of the embedding space
|
93 |
-
# lstm_out = 64 # Adjust this based on the number of LSTM units
|
94 |
-
|
95 |
-
# model = Sequential()
|
96 |
-
# model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1]))
|
97 |
-
# model.add(SpatialDropout1D(0.4))
|
98 |
-
# model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
|
99 |
-
# model.add(Dense(3, activation='softmax')) # Adjust the number of units based on your task
|
100 |
-
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
101 |
-
# print(model.summary())
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
# # Use a learning rate scheduler
|
106 |
-
# def lr_schedule(epoch):
|
107 |
-
# return 0.0001 * 0.9 ** epoch
|
108 |
-
|
109 |
-
# opt = Adam(learning_rate=0.0001)
|
110 |
-
# lr_scheduler = LearningRateScheduler(lr_schedule)
|
111 |
-
|
112 |
-
# # Compile the model
|
113 |
-
# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
114 |
-
|
115 |
-
# # Print model summary to check the architecture
|
116 |
-
# model.summary()
|
117 |
-
|
118 |
-
# # Train the model with the learning rate scheduler
|
119 |
-
# model.fit(X_train_embeddings, y_train, epochs=1, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
|
120 |
-
|
121 |
-
# # Evaluate the model on the test set
|
122 |
-
# accuracy = model.evaluate(X_test_embeddings, y_test)[1]
|
123 |
-
# print(f"Accuracy: {accuracy * 100:.2f}%")
|
124 |
-
|
125 |
-
# # Predictions on the test set
|
126 |
-
# y_pred_probabilities = model.predict(X_test_embeddings)
|
127 |
-
# y_pred = np.argmax(y_pred_probabilities, axis=1)
|
128 |
-
|
129 |
-
|
130 |
-
# Sentiment Prediction with RNN Neural Network and Confusion Matrix
|
131 |
-
|
132 |
-
from keras.models import Sequential
|
133 |
-
from keras.layers import Dense, SimpleRNN, Reshape, Dropout
|
134 |
-
from keras.optimizers import Adam
|
135 |
-
from keras.callbacks import LearningRateScheduler
|
136 |
-
from sklearn.metrics import confusion_matrix
|
137 |
-
import seaborn as sns
|
138 |
-
import matplotlib.pyplot as plt
|
139 |
-
import numpy as np
|
140 |
-
|
141 |
-
# Build a neural network model with RNN
|
142 |
-
model = Sequential()
|
143 |
-
model.add(Dense(256, input_shape=(1024,), activation='tanh'))
|
144 |
-
model.add(Reshape((1, 256)))
|
145 |
-
model.add(SimpleRNN(128, activation='relu'))
|
146 |
-
model.add(Dense(64, activation='relu'))
|
147 |
-
model.add(Dropout(0.5)) # Adding dropout for regularization
|
148 |
-
model.add(Dense(3, activation='softmax'))
|
149 |
-
|
150 |
-
# Use a learning rate scheduler
|
151 |
-
def lr_schedule(epoch):
|
152 |
-
return 0.0001 * 0.9 ** epoch
|
153 |
-
|
154 |
-
opt = Adam(learning_rate=0.0001)
|
155 |
-
lr_scheduler = LearningRateScheduler(lr_schedule)
|
156 |
-
#
|
157 |
-
# Compile the model
|
158 |
-
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
159 |
-
|
160 |
-
# Print model summary to check the architecture
|
161 |
-
model.summary()
|
162 |
-
|
163 |
-
# Train the model with the learning rate scheduler
|
164 |
-
model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
|
165 |
-
|
166 |
-
# Evaluate the model on the test set
|
167 |
-
accuracy = model.evaluate(X_test_embeddings, y_test)[1]
|
168 |
-
|
169 |
-
# Predictions on the test set
|
170 |
-
y_pred_probabilities = model.predict(X_test_embeddings)
|
171 |
-
y_pred = np.argmax(y_pred_probabilities, axis=1)
|
172 |
|
173 |
language = st.slider('Enter the language:')
|
174 |
user_text = st.slider('Enter the text:')
|
@@ -187,4 +23,4 @@ elif predicted_sentiment_no == 2:
|
|
187 |
else:
|
188 |
predicted_sentiment_label = 'negative'
|
189 |
|
190 |
-
st.write("Predicted Sentiment:"+predicted_sentiment_label)
|
|
|
1 |
import streamlit as st
|
2 |
import numpy as np
|
|
|
|
|
|
|
3 |
from laser_encoders import LaserEncoderPipeline
|
4 |
+
from keras.models import load_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load the saved model
|
7 |
+
model = load_model("sentiment_model.h5")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
language = st.slider('Enter the language:')
|
10 |
user_text = st.slider('Enter the text:')
|
|
|
23 |
else:
|
24 |
predicted_sentiment_label = 'negative'
|
25 |
|
26 |
+
st.write("Predicted Sentiment:" + predicted_sentiment_label)
|