File size: 2,330 Bytes
3a5c4f5
 
 
b14184c
 
ebfb422
b14184c
 
3a5c4f5
55fdaad
b14184c
12ac323
3a5c4f5
b14184c
 
3a5c4f5
 
b14184c
12ac323
 
 
3a5c4f5
b14184c
3a5c4f5
 
b14184c
 
 
 
 
3a5c4f5
b14184c
3a5c4f5
b14184c
ebfb422
 
3a5c4f5
55fdaad
 
 
 
 
 
 
 
 
 
 
 
 
 
3a5c4f5
55fdaad
 
 
f94741c
12ac323
55fdaad
 
9cb81e0
 
7a2c3d1
55fdaad
 
 
 
 
9cb81e0
55fdaad
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import numpy as np
import pandas as pd
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from nltk.corpus import stopwords
import tensorflow as tf
import nltk

# Download stopwords
nltk.download('stopwords')

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = TFBertModel.from_pretrained("bert-base-uncased")


# Clean text
def remove_tags(txt):
    result = re.sub(r'<[^>]+>', '', txt)
    result = re.sub(r'https?://\S+', '', result)
    result = re.sub(r'[^a-zA-Z0-9\s]', ' ', result)
    return result.lower()

def remove_stop_words(txt):
    stop_words = set(stopwords.words('english'))
    return ' '.join([word for word in txt.split() if word not in stop_words])


# Load the trained model
classifier=load_model('movie_sentiment_model.h5')

# Single input BERT embeddings
def bert_embeddings(text, max_length=64):
    inputs = tokenizer(
        [text],
        return_tensors="tf",
        padding=True,
        truncation=True,
        max_length=max_length
    )
    outputs = bert_model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
    cls_embeddings = outputs.last_hidden_state[:, 0, :]
    return cls_embeddings.numpy()

# Define Gradio function
def fn(test_review):
    review = remove_tags(test_review)
    review = remove_stop_words(review)
    cls_embeddings = bert_embeddings(review)
    prediction = classifier.predict(cls_embeddings)
    return "Positive" if prediction[0] > 0.5 else "Negative"

# Gradio Interface
description = "Give a review of a movie that you like (or hate, sarcasm ) and the model will let you know just how much your review truly reflects your emotions"
input_text = gr.Textbox(label="Enter Movie Review Text in English")
output_text = gr.Textbox(label="Output Text")

app = gr.Interface(
    fn=fn,
    inputs=input_text,
    outputs=output_text,
    title="Sentiment Analysis of Movie Reviews in English",
    description=description,
    allow_flagging="auto",
    flagging_dir='flagging_records'
)

app.launch(inline=False)