|
import re |
|
import emoji |
|
import joblib |
|
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score |
|
|
|
import torch |
|
import torch.nn as nn |
|
import torchtext.vocab as vocab |
|
|
|
import gradio as gr |
|
|
|
|
|
glove = vocab.GloVe(name='6B', dim=100) |
|
|
|
def remove_html(text) : |
|
patt_html = r"<.*?>" |
|
text = re.sub(patt_html, "", text) |
|
return text |
|
|
|
def remove_url(text): |
|
patt_url = r"https?://\S+|www\.\S+" |
|
text = re.sub(patt_url, "", text) |
|
return text |
|
|
|
def emoji_to_text(text) : |
|
res_str = "" |
|
for ch in text : |
|
if emoji.is_emoji(ch) : |
|
res_str += f" {emoji.demojize(ch)} " |
|
|
|
else : |
|
res_str += ch |
|
return res_str |
|
|
|
def clean_review_text(text): |
|
|
|
|
|
text = remove_html(text) |
|
|
|
|
|
text = remove_url(text) |
|
|
|
|
|
text = emoji_to_text(text) |
|
|
|
|
|
text = text.lower() |
|
|
|
|
|
return text |
|
|
|
|
|
|
|
|
|
|
|
class CNNHotelReviewsModel(nn.Module): |
|
def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, dropout): |
|
super().__init__() |
|
|
|
self.embedding = nn.Embedding.from_pretrained(glove.vectors, freeze=True) |
|
self.convs = nn.ModuleList([ |
|
nn.Conv2d(in_channels=1, |
|
out_channels=n_filters, |
|
kernel_size=(fs, embedding_dim)) |
|
for fs in filter_sizes |
|
]) |
|
|
|
self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim) |
|
self.dropout = nn.Dropout(dropout) |
|
self.sigmoid = nn.Sigmoid() |
|
|
|
def forward(self, text): |
|
embedded = self.embedding(text) |
|
embedded = embedded.unsqueeze(1) |
|
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs] |
|
pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved] |
|
cat = self.dropout(torch.cat(pooled, dim=1)) |
|
return self.sigmoid(self.fc(cat)).squeeze(1) |
|
|
|
|
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
EMBEDDING_DIM = 100 |
|
OUTPUT_DIM = 1 |
|
|
|
N_FILTERS = 250 |
|
FILTER_SIZES = [2, 3, 4] |
|
DROPOUT = 0.1 |
|
|
|
|
|
CNN_Model = CNNHotelReviewsModel(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT) |
|
|
|
|
|
|
|
|
|
CNN_Model.load_state_dict(torch.load("hotel_review_model.pth", map_location=device)) |
|
CNN_Model = CNN_Model.to(device) |
|
CNN_Model.eval() |
|
|
|
|
|
lda_model = joblib.load('lda_model.pkl') |
|
dictionary = joblib.load('dictionary.pkl') |
|
|
|
|
|
def predict_review(model, review, max_len=128): |
|
|
|
tokens = review.split() |
|
indices = [glove.stoi.get(token, 0) for token in tokens] |
|
|
|
|
|
if len(indices) < max_len: |
|
indices += [0] * (max_len - len(indices)) |
|
else: |
|
indices = indices[:max_len] |
|
|
|
|
|
tensor = torch.tensor(indices).unsqueeze(0) |
|
|
|
|
|
model.eval() |
|
with torch.no_grad(): |
|
output = model(tensor.to(device)) |
|
|
|
|
|
prob = output.item() |
|
|
|
|
|
return {'positive': prob, 'negative': 1-prob} |
|
|
|
|
|
|
|
|
|
aspect_label = { |
|
0: "Reception & Service Efficiency", |
|
1: "Transportation & Proximity", |
|
2: "Room Comfort & Staff Courtesy", |
|
3: "Location & Staff Quality", |
|
4: "Room Discrepancies", |
|
5: "Hotel Quality vs Price", |
|
6: "Booking & Payment Issues", |
|
7: "Room Ambiance & Noise", |
|
8: "Amenities & Value", |
|
9: "Room Size & Condition", |
|
} |
|
|
|
def dominant_topic(text): |
|
text = text.split() |
|
bow = dictionary.doc2bow(text) |
|
topics = lda_model.get_document_topics(bow) |
|
main_topic = max(topics, key=lambda x: x[1]) |
|
return { aspect_label[itm[0]]: float(itm[1]) for itm in topics } |
|
|
|
|
|
def gr_fun(Review): |
|
review = clean_review_text(Review) |
|
pred_label = predict_review(CNN_Model, review) |
|
pred_aspect = dominant_topic(review) |
|
|
|
return pred_label, pred_aspect |
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=gr_fun, |
|
inputs="text", |
|
outputs=[gr.Label(), gr.Label(num_top_classes=5)], |
|
examples=[ |
|
"room condition was very bad", |
|
"Staff where excellent and the room was lovely really great hotel will definitely be back", |
|
"Couldn t find ice machine The junior suite was excellent with a fantastic bar", |
|
"Furniture in the room was a bit worn and tired for the money you pay would just expect a bit more it was ok", |
|
"Room was West facing and was far too warm particularly as the a c didn t seem to be working to well The shower room was excellent and large enough for my lady and I to be rude in Loved it" |
|
] |
|
) |
|
|
|
|
|
iface.launch(inline = False) |