NIXBLACK commited on
Commit
9e77460
·
1 Parent(s): bf6bb57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -168
app.py CHANGED
@@ -1,174 +1,10 @@
1
  import streamlit as st
2
  import numpy as np
3
- import pandas as pd
4
- import chardet
5
- import matplotlib.pyplot as plt
6
  from laser_encoders import LaserEncoderPipeline
7
- from sklearn.model_selection import train_test_split
8
- from sklearn.metrics import accuracy_score
9
- from sklearn.linear_model import LogisticRegression
10
- from sklearn.preprocessing import LabelEncoder
11
- from tensorflow.keras.models import Sequential
12
- from tensorflow.keras.layers import Dense
13
- from tqdm import tqdm
14
-
15
- with open('./train.csv', 'rb') as f:
16
- result = chardet.detect(f.read())
17
 
18
- # Use the detected encoding when reading the CSV file
19
- data = pd.read_csv('./train.csv', encoding=result['encoding'])
20
- data = data[['sentiment', 'text']]
21
-
22
- sentiments = []
23
- texts = []
24
-
25
- for index, row in data.iterrows():
26
- sentiment = row['sentiment'].lower() # Convert to lowercase for case-insensitivity
27
- if sentiment == 'neutral':
28
- sentiments.append(1)
29
- elif sentiment == 'positive':
30
- sentiments.append(2)
31
- elif sentiment == 'negative':
32
- sentiments.append(3)
33
- else:
34
- # Handle the case where sentiment is not one of the expected values
35
- # You may choose to skip this row or handle it differently based on your requirements
36
- print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
37
- continue # Skip the rest of the loop for this row
38
-
39
- text = row['text']
40
- if not isinstance(text, float):
41
- texts.append(text)
42
- else:
43
- # Skip the sentiment for this row as well
44
- print(f"Warning: Skipping row {index} with float text value")
45
- sentiments.pop() # Remove the last added sentiment
46
-
47
-
48
- label_encoder = LabelEncoder()
49
- encoded_sentiments = label_encoder.fit_transform(sentiments)
50
-
51
- # Split the data into training and testing sets
52
- X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
53
-
54
- # Initialize the LaserEncoder
55
- encoder = LaserEncoderPipeline(lang="eng_Latn")
56
-
57
- # Initialize empty arrays to store embeddings
58
- X_train_embeddings = []
59
- X_test_embeddings = []
60
-
61
-
62
- for sentence in tqdm(X_train):
63
- embeddings = encoder.encode_sentences([sentence])[0]
64
- X_train_embeddings.append(embeddings)
65
-
66
- for sentence in tqdm(X_test):
67
- embeddings = encoder.encode_sentences([sentence])[0]
68
- X_test_embeddings.append(embeddings)
69
-
70
- # Convert lists to numpy arrays
71
- X_train_embeddings = np.array(X_train_embeddings)
72
- X_test_embeddings = np.array(X_test_embeddings)
73
-
74
- # Convert lists to numpy arrays
75
- X_train = np.array(X_train_embeddings)
76
- X_test = np.array(X_test_embeddings)
77
-
78
- # # Sentiment Prediction with RNN Neural Network and Confusion Matrix
79
-
80
- # from keras.models import Sequential
81
- # from keras.layers import Dense, SimpleRNN, Reshape, Dropout
82
- # from keras.optimizers import Adam
83
- # from keras.callbacks import LearningRateScheduler
84
- # from sklearn.metrics import confusion_matrix
85
- # import seaborn as sns
86
- # import matplotlib.pyplot as plt
87
- # import numpy as np
88
- # from keras.models import Sequential
89
- # from keras.layers import Embedding, SpatialDropout1D, LSTM, Dense
90
-
91
- # max_features = 10000 # Adjust this based on your vocabulary size
92
- # embed_dim = 128 # Adjust this based on the desired dimension of the embedding space
93
- # lstm_out = 64 # Adjust this based on the number of LSTM units
94
-
95
- # model = Sequential()
96
- # model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1]))
97
- # model.add(SpatialDropout1D(0.4))
98
- # model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
99
- # model.add(Dense(3, activation='softmax')) # Adjust the number of units based on your task
100
- # model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
101
- # print(model.summary())
102
-
103
-
104
-
105
- # # Use a learning rate scheduler
106
- # def lr_schedule(epoch):
107
- # return 0.0001 * 0.9 ** epoch
108
-
109
- # opt = Adam(learning_rate=0.0001)
110
- # lr_scheduler = LearningRateScheduler(lr_schedule)
111
-
112
- # # Compile the model
113
- # model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
114
-
115
- # # Print model summary to check the architecture
116
- # model.summary()
117
-
118
- # # Train the model with the learning rate scheduler
119
- # model.fit(X_train_embeddings, y_train, epochs=1, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
120
-
121
- # # Evaluate the model on the test set
122
- # accuracy = model.evaluate(X_test_embeddings, y_test)[1]
123
- # print(f"Accuracy: {accuracy * 100:.2f}%")
124
-
125
- # # Predictions on the test set
126
- # y_pred_probabilities = model.predict(X_test_embeddings)
127
- # y_pred = np.argmax(y_pred_probabilities, axis=1)
128
-
129
-
130
- # Sentiment Prediction with RNN Neural Network and Confusion Matrix
131
-
132
- from keras.models import Sequential
133
- from keras.layers import Dense, SimpleRNN, Reshape, Dropout
134
- from keras.optimizers import Adam
135
- from keras.callbacks import LearningRateScheduler
136
- from sklearn.metrics import confusion_matrix
137
- import seaborn as sns
138
- import matplotlib.pyplot as plt
139
- import numpy as np
140
-
141
- # Build a neural network model with RNN
142
- model = Sequential()
143
- model.add(Dense(256, input_shape=(1024,), activation='tanh'))
144
- model.add(Reshape((1, 256)))
145
- model.add(SimpleRNN(128, activation='relu'))
146
- model.add(Dense(64, activation='relu'))
147
- model.add(Dropout(0.5)) # Adding dropout for regularization
148
- model.add(Dense(3, activation='softmax'))
149
-
150
- # Use a learning rate scheduler
151
- def lr_schedule(epoch):
152
- return 0.0001 * 0.9 ** epoch
153
-
154
- opt = Adam(learning_rate=0.0001)
155
- lr_scheduler = LearningRateScheduler(lr_schedule)
156
- #
157
- # Compile the model
158
- model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
159
-
160
- # Print model summary to check the architecture
161
- model.summary()
162
-
163
- # Train the model with the learning rate scheduler
164
- model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
165
-
166
- # Evaluate the model on the test set
167
- accuracy = model.evaluate(X_test_embeddings, y_test)[1]
168
-
169
- # Predictions on the test set
170
- y_pred_probabilities = model.predict(X_test_embeddings)
171
- y_pred = np.argmax(y_pred_probabilities, axis=1)
172
 
173
  language = st.slider('Enter the language:')
174
  user_text = st.slider('Enter the text:')
@@ -187,4 +23,4 @@ elif predicted_sentiment_no == 2:
187
  else:
188
  predicted_sentiment_label = 'negative'
189
 
190
- st.write("Predicted Sentiment:"+predicted_sentiment_label)
 
1
  import streamlit as st
2
  import numpy as np
 
 
 
3
  from laser_encoders import LaserEncoderPipeline
4
+ from keras.models import load_model
 
 
 
 
 
 
 
 
 
5
 
6
+ # Load the saved model
7
+ model = load_model("sentiment_model.h5")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  language = st.slider('Enter the language:')
10
  user_text = st.slider('Enter the text:')
 
23
  else:
24
  predicted_sentiment_label = 'negative'
25
 
26
+ st.write("Predicted Sentiment:" + predicted_sentiment_label)