import gradio as gr import re import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Load T5 paraphrase model (faster than PEGASUS) model_name = "Vamsi/T5_Paraphrase_Paws" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed # Move model to CPU (remove if using GPU) model.to("cpu") # Initialize paraphrase pipeline with optimized settings paraphrase_pipeline = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, truncation=True ) def split_sentences(text): """Split text into sentences using regex (faster than nltk).""" return re.split(r'(?<=[.!?])\s+', text.strip()) def paraphrase_text(text): """Paraphrases input text while maintaining sentence structure.""" if not text.strip(): return "⚠️ Please enter some text to paraphrase." sentences = split_sentences(text) # Apply T5 paraphrasing to each sentence paraphrased_results = paraphrase_pipeline( [f"paraphrase: {sentence} " for sentence in sentences if sentence], max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings ) paraphrased_sentences = [result['generated_text'] for result in paraphrased_results] return " ".join(paraphrased_sentences) # Define Gradio Interface demo = gr.Interface( fn=paraphrase_text, inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10), outputs=gr.Textbox(label="Paraphrased Text", lines=10), title="🚀 Fast & Clean T5 Paraphraser", description="Enter text and let AI generate a paraphrased version using an optimized T5 model!", theme="huggingface" ) if __name__ == "__main__": demo.launch()