NIXBLACK commited on
Commit
2bc2000
·
1 Parent(s): 5275263

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import chardet
5
+ import matplotlib.pyplot as plt
6
+ from laser_encoders import LaserEncoderPipeline
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import accuracy_score
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn.preprocessing import LabelEncoder
11
+ from tensorflow.keras.models import Sequential
12
+ from tensorflow.keras.layers import Dense
13
+ from tqdm import tqdm
14
+
15
+ with open('./train.csv', 'rb') as f:
16
+ result = chardet.detect(f.read())
17
+
18
+ # Use the detected encoding when reading the CSV file
19
+ data = pd.read_csv('./train.csv', encoding=result['encoding'])
20
+ data = data[['sentiment', 'text']]
21
+
22
+ sentiments = []
23
+ texts = []
24
+
25
+ for index, row in data.iterrows():
26
+ sentiment = row['sentiment'].lower() # Convert to lowercase for case-insensitivity
27
+ if sentiment == 'neutral':
28
+ sentiments.append(1)
29
+ elif sentiment == 'positive':
30
+ sentiments.append(2)
31
+ elif sentiment == 'negative':
32
+ sentiments.append(3)
33
+ else:
34
+ # Handle the case where sentiment is not one of the expected values
35
+ # You may choose to skip this row or handle it differently based on your requirements
36
+ print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
37
+ continue # Skip the rest of the loop for this row
38
+
39
+ text = row['text']
40
+ if not isinstance(text, float):
41
+ texts.append(text)
42
+ else:
43
+ # Skip the sentiment for this row as well
44
+ print(f"Warning: Skipping row {index} with float text value")
45
+ sentiments.pop() # Remove the last added sentiment
46
+
47
+
48
+ label_encoder = LabelEncoder()
49
+ encoded_sentiments = label_encoder.fit_transform(sentiments)
50
+
51
+ # Split the data into training and testing sets
52
+ X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
53
+
54
+ # Initialize the LaserEncoder
55
+ encoder = LaserEncoderPipeline(lang="eng_Latn")
56
+
57
+ # Initialize empty arrays to store embeddings
58
+ X_train_embeddings = []
59
+ X_test_embeddings = []
60
+
61
+
62
+ for sentence in tqdm(X_train):
63
+ embeddings = encoder.encode_sentences([sentence])[0]
64
+ X_train_embeddings.append(embeddings)
65
+
66
+ for sentence in tqdm(X_test):
67
+ embeddings = encoder.encode_sentences([sentence])[0]
68
+ X_test_embeddings.append(embeddings)
69
+
70
+ # Convert lists to numpy arrays
71
+ X_train_embeddings = np.array(X_train_embeddings)
72
+ X_test_embeddings = np.array(X_test_embeddings)
73
+
74
+ # Sentiment Prediction with RNN Neural Network and Confusion Matrix
75
+
76
+ from keras.models import Sequential
77
+ from keras.layers import Dense, SimpleRNN, Reshape, Dropout
78
+ from keras.optimizers import Adam
79
+ from keras.callbacks import LearningRateScheduler
80
+ from sklearn.metrics import confusion_matrix
81
+ import seaborn as sns
82
+ import matplotlib.pyplot as plt
83
+ import numpy as np
84
+
85
+ # Build a neural network model with RNN
86
+ model = Sequential()
87
+ model.add(Dense(256, input_shape=(1024,), activation='tanh'))
88
+ model.add(Reshape((1, 256)))
89
+ model.add(SimpleRNN(128, activation='relu'))
90
+ model.add(Dense(64, activation='relu'))
91
+ model.add(Dropout(0.5)) # Adding dropout for regularization
92
+ model.add(Dense(3, activation='softmax'))
93
+
94
+ # Use a learning rate scheduler
95
+ def lr_schedule(epoch):
96
+ return 0.0001 * 0.9 ** epoch
97
+
98
+ opt = Adam(learning_rate=0.0001)
99
+ lr_scheduler = LearningRateScheduler(lr_schedule)
100
+ #
101
+ # Compile the model
102
+ model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
103
+
104
+ # Print model summary to check the architecture
105
+ model.summary()
106
+
107
+ # Train the model with the learning rate scheduler
108
+ model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
109
+
110
+ # Evaluate the model on the test set
111
+ accuracy = model.evaluate(X_test_embeddings, y_test)[1]
112
+
113
+ # Predictions on the test set
114
+ y_pred_probabilities = model.predict(X_test_embeddings)
115
+ y_pred = np.argmax(y_pred_probabilities, axis=1)
116
+
117
+ language = st.slider('Enter the language:')
118
+ user_text = st.slider('Enter the text:')
119
+
120
+ encoder = LaserEncoderPipeline(lang=language)
121
+
122
+ user_text_embedding = encoder.encode_sentences([user_text])[0]
123
+ user_text_embedding = np.reshape(user_text_embedding, (1, -1))
124
+
125
+ predicted_sentiment = np.argmax(model.predict(user_text_embedding))
126
+ predicted_sentiment_no = label_encoder.inverse_transform([predicted_sentiment])[0]
127
+ if predicted_sentiment_no == 1:
128
+ predicted_sentiment_label = 'neutral'
129
+ elif predicted_sentiment_no == 2:
130
+ predicted_sentiment_label = 'positive'
131
+ else:
132
+ predicted_sentiment_label = 'negative'
133
+
134
+ st.write("Predicted Sentiment:"+predicted_sentiment_label)