Spaces:
Sleeping
Sleeping
File size: 2,330 Bytes
3a5c4f5 b14184c ebfb422 b14184c 3a5c4f5 55fdaad b14184c 12ac323 3a5c4f5 b14184c 3a5c4f5 b14184c 12ac323 3a5c4f5 b14184c 3a5c4f5 b14184c 3a5c4f5 b14184c 3a5c4f5 b14184c ebfb422 3a5c4f5 55fdaad 3a5c4f5 55fdaad f94741c 12ac323 55fdaad 9cb81e0 7a2c3d1 55fdaad 9cb81e0 55fdaad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import numpy as np
import pandas as pd
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense
from transformers import BertTokenizer, TFBertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from nltk.corpus import stopwords
import tensorflow as tf
import nltk
# Download stopwords
nltk.download('stopwords')
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = TFBertModel.from_pretrained("bert-base-uncased")
# Clean text
def remove_tags(txt):
result = re.sub(r'<[^>]+>', '', txt)
result = re.sub(r'https?://\S+', '', result)
result = re.sub(r'[^a-zA-Z0-9\s]', ' ', result)
return result.lower()
def remove_stop_words(txt):
stop_words = set(stopwords.words('english'))
return ' '.join([word for word in txt.split() if word not in stop_words])
# Load the trained model
classifier=load_model('movie_sentiment_model.h5')
# Single input BERT embeddings
def bert_embeddings(text, max_length=64):
inputs = tokenizer(
[text],
return_tensors="tf",
padding=True,
truncation=True,
max_length=max_length
)
outputs = bert_model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
cls_embeddings = outputs.last_hidden_state[:, 0, :]
return cls_embeddings.numpy()
# Define Gradio function
def fn(test_review):
review = remove_tags(test_review)
review = remove_stop_words(review)
cls_embeddings = bert_embeddings(review)
prediction = classifier.predict(cls_embeddings)
return "Positive" if prediction[0] > 0.5 else "Negative"
# Gradio Interface
description = "Give a review of a movie that you like (or hate, sarcasm ) and the model will let you know just how much your review truly reflects your emotions"
input_text = gr.Textbox(label="Enter Movie Review Text in English")
output_text = gr.Textbox(label="Output Text")
app = gr.Interface(
fn=fn,
inputs=input_text,
outputs=output_text,
title="Sentiment Analysis of Movie Reviews in English",
description=description,
allow_flagging="auto",
flagging_dir='flagging_records'
)
app.launch(inline=False)
|