Spaces:

NIXBLACK
/

SentimentAnalysis_LASER_

Sleeping

App Files Files Community

NIXBLACK commited on Dec 4, 2023

Commit

9e77460

1 Parent(s): bf6bb57

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -168

app.py CHANGED Viewed

@@ -1,174 +1,10 @@
 import streamlit as st
 import numpy as np
-import pandas as pd
-import chardet
-import matplotlib.pyplot as plt
 from laser_encoders import LaserEncoderPipeline
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
-from sklearn.linear_model import LogisticRegression
-from sklearn.preprocessing import LabelEncoder
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense
-from tqdm import tqdm
-with open('./train.csv', 'rb') as f:
-    result = chardet.detect(f.read())
-# Use the detected encoding when reading the CSV file
-data = pd.read_csv('./train.csv', encoding=result['encoding'])
-data = data[['sentiment', 'text']]
-sentiments = []
-texts = []
-for index, row in data.iterrows():
-    sentiment = row['sentiment'].lower()  # Convert to lowercase for case-insensitivity
-    if sentiment == 'neutral':
-        sentiments.append(1)
-    elif sentiment == 'positive':
-        sentiments.append(2)
-    elif sentiment == 'negative':
-        sentiments.append(3)
-    else:
-        # Handle the case where sentiment is not one of the expected values
-        # You may choose to skip this row or handle it differently based on your requirements
-        print(f"Warning: Unknown sentiment '{sentiment}' in row {index}")
-        continue  # Skip the rest of the loop for this row
-    text = row['text']
-    if not isinstance(text, float):
-        texts.append(text)
-    else:
-        # Skip the sentiment for this row as well
-        print(f"Warning: Skipping row {index} with float text value")
-        sentiments.pop()  # Remove the last added sentiment
-label_encoder = LabelEncoder()
-encoded_sentiments = label_encoder.fit_transform(sentiments)
-# Split the data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(texts, encoded_sentiments, test_size=0.2, random_state=42)
-# Initialize the LaserEncoder
-encoder = LaserEncoderPipeline(lang="eng_Latn")
-# Initialize empty arrays to store embeddings
-X_train_embeddings = []
-X_test_embeddings = []
-for sentence in tqdm(X_train):
-    embeddings = encoder.encode_sentences([sentence])[0]
-    X_train_embeddings.append(embeddings)
-for sentence in tqdm(X_test):
-    embeddings = encoder.encode_sentences([sentence])[0]
-    X_test_embeddings.append(embeddings)
-# Convert lists to numpy arrays
-X_train_embeddings = np.array(X_train_embeddings)
-X_test_embeddings = np.array(X_test_embeddings)
-# Convert lists to numpy arrays
-X_train = np.array(X_train_embeddings)
-X_test = np.array(X_test_embeddings)
-# # Sentiment Prediction with RNN Neural Network and Confusion Matrix
-# from keras.models import Sequential
-# from keras.layers import Dense, SimpleRNN, Reshape, Dropout
-# from keras.optimizers import Adam
-# from keras.callbacks import LearningRateScheduler
-# from sklearn.metrics import confusion_matrix
-# import seaborn as sns
-# import matplotlib.pyplot as plt
-# import numpy as np
-# from keras.models import Sequential
-# from keras.layers import Embedding, SpatialDropout1D, LSTM, Dense
-# max_features = 10000  # Adjust this based on your vocabulary size
-# embed_dim = 128      # Adjust this based on the desired dimension of the embedding space
-# lstm_out = 64        # Adjust this based on the number of LSTM units
-# model = Sequential()
-# model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1]))
-# model.add(SpatialDropout1D(0.4))
-# model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
-# model.add(Dense(3, activation='softmax'))  # Adjust the number of units based on your task
-# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
-# print(model.summary())
-# # Use a learning rate scheduler
-# def lr_schedule(epoch):
-#     return 0.0001 * 0.9 ** epoch
-# opt = Adam(learning_rate=0.0001)
-# lr_scheduler = LearningRateScheduler(lr_schedule)
-# # Compile the model
-# model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
-# # Print model summary to check the architecture
-# model.summary()
-# # Train the model with the learning rate scheduler
-# model.fit(X_train_embeddings, y_train, epochs=1, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
-# # Evaluate the model on the test set
-# accuracy = model.evaluate(X_test_embeddings, y_test)[1]
-# print(f"Accuracy: {accuracy * 100:.2f}%")
-# # Predictions on the test set
-# y_pred_probabilities = model.predict(X_test_embeddings)
-# y_pred = np.argmax(y_pred_probabilities, axis=1)
-# Sentiment Prediction with RNN Neural Network and Confusion Matrix
-from keras.models import Sequential
-from keras.layers import Dense, SimpleRNN, Reshape, Dropout
-from keras.optimizers import Adam
-from keras.callbacks import LearningRateScheduler
-from sklearn.metrics import confusion_matrix
-import seaborn as sns
-import matplotlib.pyplot as plt
-import numpy as np
-# Build a neural network model with RNN
-model = Sequential()
-model.add(Dense(256, input_shape=(1024,), activation='tanh'))
-model.add(Reshape((1, 256)))
-model.add(SimpleRNN(128, activation='relu'))
-model.add(Dense(64, activation='relu'))
-model.add(Dropout(0.5))  # Adding dropout for regularization
-model.add(Dense(3, activation='softmax'))
-# Use a learning rate scheduler
-def lr_schedule(epoch):
-    return 0.0001 * 0.9 ** epoch
-opt = Adam(learning_rate=0.0001)
-lr_scheduler = LearningRateScheduler(lr_schedule)
-#
-# Compile the model
-model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
-# Print model summary to check the architecture
-model.summary()
-# Train the model with the learning rate scheduler
-model.fit(X_train_embeddings, y_train, epochs=30, batch_size=32, validation_split=0.1, callbacks=[lr_scheduler])
-# Evaluate the model on the test set
-accuracy = model.evaluate(X_test_embeddings, y_test)[1]
-# Predictions on the test set
-y_pred_probabilities = model.predict(X_test_embeddings)
-y_pred = np.argmax(y_pred_probabilities, axis=1)
 language = st.slider('Enter the language:')
 user_text = st.slider('Enter the text:')
@@ -187,4 +23,4 @@ elif predicted_sentiment_no == 2:
 else:
   predicted_sentiment_label = 'negative'
-st.write("Predicted Sentiment:"+predicted_sentiment_label)

 import streamlit as st
 import numpy as np
 from laser_encoders import LaserEncoderPipeline
+from keras.models import load_model
+# Load the saved model
+model = load_model("sentiment_model.h5")
 language = st.slider('Enter the language:')
 user_text = st.slider('Enter the text:')
 else:
   predicted_sentiment_label = 'negative'
+st.write("Predicted Sentiment:" + predicted_sentiment_label)