vikigitonga11 commited on
Commit
3a56540
·
verified ·
1 Parent(s): 356817f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -3,24 +3,26 @@ import re
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
- # Load T5-small paraphrase model
7
- model_name = "t5-small"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use fp16 for speed
10
 
11
- # Move model to CPU (remove if using GPU)
12
- model.to("cpu")
 
13
 
14
- # Initialize paraphrase pipeline with optimized settings
15
  paraphrase_pipeline = pipeline(
16
  "text2text-generation",
17
  model=model,
18
  tokenizer=tokenizer,
19
- truncation=True
 
20
  )
21
 
22
  def split_sentences(text):
23
- """Split text into sentences using regex (faster than nltk)."""
24
  return re.split(r'(?<=[.!?])\s+', text.strip())
25
 
26
  def paraphrase_text(text):
@@ -30,10 +32,10 @@ def paraphrase_text(text):
30
 
31
  sentences = split_sentences(text)
32
 
33
- # Apply T5 paraphrasing to each sentence
34
  paraphrased_results = paraphrase_pipeline(
35
- [f"paraphrase: {sentence} </s>" for sentence in sentences if sentence],
36
- max_length=50, do_sample=True, batch_size=8, num_return_sequences=1 # Faster settings
37
  )
38
 
39
  paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
@@ -44,8 +46,8 @@ demo = gr.Interface(
44
  fn=paraphrase_text,
45
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
46
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
47
- title="🚀 Fast & Clean T5-Small Paraphraser",
48
- description="Enter text and let AI generate a paraphrased version using an optimized T5-small model!",
49
  theme="huggingface"
50
  )
51
 
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
+ # Load FLAN-T5 Large model
7
+ model_name = "google/flan-t5-large"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16) # Use float16 for efficiency
10
 
11
+ # Move model to CPU (Change to "cuda" if using GPU)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model.to(device)
14
 
15
+ # Initialize paraphrase pipeline
16
  paraphrase_pipeline = pipeline(
17
  "text2text-generation",
18
  model=model,
19
  tokenizer=tokenizer,
20
+ truncation=True,
21
+ device=0 if device == "cuda" else -1 # Use GPU if available
22
  )
23
 
24
  def split_sentences(text):
25
+ """Split text into sentences using regex."""
26
  return re.split(r'(?<=[.!?])\s+', text.strip())
27
 
28
  def paraphrase_text(text):
 
32
 
33
  sentences = split_sentences(text)
34
 
35
+ # Apply FLAN-T5 paraphrasing to each sentence
36
  paraphrased_results = paraphrase_pipeline(
37
+ [f"Rephrase this sentence: {sentence}" for sentence in sentences if sentence],
38
+ max_length=50, do_sample=True, batch_size=8, num_return_sequences=1
39
  )
40
 
41
  paraphrased_sentences = [result['generated_text'] for result in paraphrased_results]
 
46
  fn=paraphrase_text,
47
  inputs=gr.Textbox(label="Enter text", placeholder="Type your text to paraphrase...", lines=10),
48
  outputs=gr.Textbox(label="Paraphrased Text", lines=10),
49
+ title="🚀 FLAN-T5 Paraphraser",
50
+ description="Enter text and let AI generate a paraphrased version using FLAN-T5-Large!",
51
  theme="huggingface"
52
  )
53