Spaces:
Sleeping
Sleeping
Update app.py for using pre-trained sentiment analysis pipeline
Browse files
app.py
CHANGED
@@ -1,53 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
|
4 |
-
import torch
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
-
# Load the IMDb dataset
|
10 |
-
dataset = load_dataset('imdb')
|
11 |
-
|
12 |
-
# Initialize the tokenizer and model
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
|
14 |
-
model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
|
15 |
-
model.to(device)
|
16 |
-
|
17 |
-
# Tokenize the dataset
|
18 |
-
def tokenize_function(examples):
|
19 |
-
return tokenizer(examples['text'], padding="max_length", truncation=True)
|
20 |
-
|
21 |
-
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
22 |
-
|
23 |
-
# Set up training arguments
|
24 |
-
training_args = TrainingArguments(
|
25 |
-
output_dir="./results",
|
26 |
-
evaluation_strategy="epoch",
|
27 |
-
learning_rate=2e-5,
|
28 |
-
per_device_train_batch_size=16,
|
29 |
-
per_device_eval_batch_size=16,
|
30 |
-
num_train_epochs=1, # Start with fewer epochs for quicker runs
|
31 |
-
weight_decay=0.01,
|
32 |
-
)
|
33 |
-
|
34 |
-
# Initialize the Trainer
|
35 |
-
trainer = Trainer(
|
36 |
-
model=model,
|
37 |
-
args=training_args,
|
38 |
-
train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(1000)), # Use a subset for quicker runs
|
39 |
-
eval_dataset=tokenized_datasets["test"].shuffle(seed=42).select(range(1000)),
|
40 |
-
)
|
41 |
-
|
42 |
-
# Train the model
|
43 |
-
trainer.train()
|
44 |
|
45 |
# Function to classify sentiment
|
46 |
def classify_text(text):
|
47 |
-
|
48 |
-
|
49 |
-
prediction = torch.argmax(outputs.logits, dim=-1).item()
|
50 |
-
return "Positive" if prediction == 1 else "Negative"
|
51 |
|
52 |
# Set up the Gradio interface
|
53 |
iface = gr.Interface(fn=classify_text, inputs="text", outputs="text")
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
|
|
|
|
3 |
|
4 |
+
# Load the pre-trained sentiment-analysis pipeline
|
5 |
+
classifier = pipeline('sentiment-analysis')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Function to classify sentiment
|
8 |
def classify_text(text):
|
9 |
+
result = classifier(text)[0]
|
10 |
+
return f"{result['label']} with score {result['score']}"
|
|
|
|
|
11 |
|
12 |
# Set up the Gradio interface
|
13 |
iface = gr.Interface(fn=classify_text, inputs="text", outputs="text")
|