Spaces:

rviana
/

IMDb-Sentiment-Analysis

Sleeping

App Files Files Community

IMDb-Sentiment-Analysis / app.py

rviana

Update sentiment analysis app

d8dbb05 11 months ago

raw

history blame

1.8 kB

	import gradio as gr
	from datasets import load_dataset
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
	import torch

	# Check if GPU is available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load the IMDb dataset
	dataset = load_dataset('imdb')

	# Initialize the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
	model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
	model.to(device)

	# Tokenize the dataset
	def tokenize_function(examples):
	return tokenizer(examples['text'], padding="max_length", truncation=True)

	tokenized_datasets = dataset.map(tokenize_function, batched=True)

	# Set up training arguments
	training_args = TrainingArguments(
	output_dir="./results",
	evaluation_strategy="epoch",
	learning_rate=2e-5,
	per_device_train_batch_size=16,
	per_device_eval_batch_size=16,
	num_train_epochs=1, # Start with fewer epochs for quicker runs
	weight_decay=0.01,
	)

	# Initialize the Trainer
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(1000)), # Use a subset for quicker runs
	eval_dataset=tokenized_datasets["test"].shuffle(seed=42).select(range(1000)),
	)

	# Train the model
	trainer.train()

	# Function to classify sentiment
	def classify_text(text):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
	outputs = model(**inputs)
	prediction = torch.argmax(outputs.logits, dim=-1).item()
	return "Positive" if prediction == 1 else "Negative"

	# Set up the Gradio interface
	iface = gr.Interface(fn=classify_text, inputs="text", outputs="text")
	iface.launch()