|
import gradio as gr |
|
import re |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
|
|
model_name = "Vamsi/T5_Paraphrase_Paws" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) |
|
|
|
|
|
model.to("cpu") |
|
|
|
|
|
paraphrase_pipeline = pipeline( |
|
"text2text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
truncation=True |
|
) |
|
|
|
def split_sentences(text): |
|
"""Split text into sentences using regex (faster than nltk).""" |
|
return re.split(r'(?<=[.!?])\s+', text.strip()) |
|
|
|
def paraphrase_text(text): |
|
"""Paraphrases input text while maintaining sentence structure.""" |
|
if not text.strip(): |
|
return "⚠️ Please enter some text to paraphrase." |
|
|
|
sentences = split_sentences(text) |
|
|
|
|
|
paraphrased_results = paraphrase_pipeline( |
|
[f"paraphrase: {sentence} </s>" for sentence in sentences if sentence], |
|
max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 |
|
) |
|
|
|
paraphrased_sentences = [result['generated_text'] for result in paraphrased_results] |
|
return " ".join(paraphrased_sentences) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=paraphrase_text, |
|
inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10), |
|
outputs=gr.Textbox(label="Paraphrased Text", lines=10), |
|
title="🚀 Fast & Clean T5 Paraphraser", |
|
description="Enter text and let AI generate a paraphrased version using an optimized T5 model!", |
|
theme="huggingface" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|