t5-paraphrase-generation

Running

App Files Files Community

vikigitonga11 commited on Mar 18

Commit

3a56540

verified ·

1 Parent(s): 356817f

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -13

app.py CHANGED Viewed

@@ -3,24 +3,26 @@ import re
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# Load T5-small paraphrase model
-model_name = "t5-small"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)  # Use fp16 for speed
-# Move model to CPU (remove if using GPU)
-model.to("cpu")
-# Initialize paraphrase pipeline with optimized settings
 paraphrase_pipeline = pipeline(
     "text2text-generation",
     model=model,
     tokenizer=tokenizer,
-    truncation=True
 )
 def split_sentences(text):
-    """Split text into sentences using regex (faster than nltk)."""
     return re.split(r'(?<=[.!?])\s+', text.strip())
 def paraphrase_text(text):
@@ -30,10 +32,10 @@ def paraphrase_text(text):
     sentences = split_sentences(text)
-    # Apply T5 paraphrasing to each sentence
     paraphrased_results = paraphrase_pipeline(
-        [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
-        max_length=50, do_sample=True, batch_size=8, num_return_sequences=1  # Faster settings
     )
     paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
@@ -44,8 +46,8 @@ demo = gr.Interface(
     fn=paraphrase_text,
     inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
     outputs=gr.Textbox(label="Paraphrased Text", lines=10),
-    title="🚀 Fast & Clean T5-Small Paraphraser",
-    description="Enter text and let AI generate a paraphrased version using an optimized T5-small model!",
     theme="huggingface"
 )

 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Load FLAN-T5 Large model
+model_name = "google/flan-t5-large"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)  # Use float16 for efficiency
+# Move model to CPU (Change to "cuda" if using GPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+# Initialize paraphrase pipeline
 paraphrase_pipeline = pipeline(
     "text2text-generation",
     model=model,
     tokenizer=tokenizer,
+    truncation=True,
+    device=0 if device == "cuda" else -1  # Use GPU if available
 )
 def split_sentences(text):
+    """Split text into sentences using regex."""
     return re.split(r'(?<=[.!?])\s+', text.strip())
 def paraphrase_text(text):
     sentences = split_sentences(text)
+    # Apply FLAN-T5 paraphrasing to each sentence
     paraphrased_results = paraphrase_pipeline(
+        [f"Rephrase this sentence: {sentence}" for sentence in sentences if sentence],
+        max_length=50, do_sample=True, batch_size=8, num_return_sequences=1
     )
     paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
     fn=paraphrase_text,
     inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
     outputs=gr.Textbox(label="Paraphrased Text", lines=10),
+    title="🚀 FLAN-T5 Paraphraser",
+    description="Enter text and let AI generate a paraphrased version using FLAN-T5-Large!",
     theme="huggingface"
 )