vikigitonga11's picture
Update app.py
b212bb2 verified
import gradio as gr
import re
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# Load T5 paraphrase model (faster than PEGASUS)
model_name = "Vamsi/T5_Paraphrase_Paws"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
# Move model to CPU (remove if using GPU)
model.to("cpu")
# Initialize paraphrase pipeline with optimized settings
paraphrase_pipeline = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
truncation=True
)
def split_sentences(text):
"""Split text into sentences using regex (faster than nltk)."""
return re.split(r'(?<=[.!?])\s+', text.strip())
def paraphrase_text(text):
"""Paraphrases input text while maintaining sentence structure."""
if not text.strip():
return "⚠️ Please enter some text to paraphrase."
sentences = split_sentences(text)
# Apply T5 paraphrasing to each sentence
paraphrased_results = paraphrase_pipeline(
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings
)
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
return " ".join(paraphrased_sentences)
# Define Gradio Interface
demo = gr.Interface(
fn=paraphrase_text,
inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
outputs=gr.Textbox(label="Paraphrased Text", lines=10),
title="🚀 Fast & Clean T5 Paraphraser",
description="Enter text and let AI generate a paraphrased version using an optimized T5 model!",
theme="huggingface"
)
if __name__ == "__main__":
demo.launch()