Spaces:

ravindravala
/

humaniser

Running

App Files Files Community

ravindravala commited on Mar 16

Commit

3aeccd5

1 Parent(s): 25bb492

disable greedy search

Browse files

Files changed (1) hide show

app.py +24 -7

app.py CHANGED Viewed

@@ -12,7 +12,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
-model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
 # Function to paraphrase text
 def humanize_text(text, temperature=0.7, max_length=512):
@@ -24,25 +27,35 @@ def humanize_text(text, temperature=0.7, max_length=512):
         truncation=True,
     ).input_ids.to(device)
     outputs = model.generate(
         input_ids,
         max_length=max_length,
-        temperature=temperature,
-        num_beams=1,
-        num_beam_groups=1,
-        num_return_sequences=1,
         repetition_penalty=2.0,
-        diversity_penalty=0.5,
         no_repeat_ngram_size=2,
     )
     paraphrased_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return random.choice(paraphrased_texts)
 # Function to split input into sentences
 def split_into_sentences(text):
     return re.split(r"(?<=[.!?])\s+", text)
 # Function to process multi-line text
 def process_text(input_text):
     lines = input_text.split("\n")
@@ -53,11 +66,15 @@ def process_text(input_text):
             processed_lines.append(line)
         else:
             sentences = split_into_sentences(line)
-            processed_sentences = [humanize_text(sentence, max_length=len(sentence)) for sentence in sentences]
             processed_lines.append(" ".join(processed_sentences))
     return "\n".join(processed_lines)
 # Gradio Interface
 iface = gr.Interface(
     fn=process_text,

 # Load the model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
+model = AutoModelForSeq2SeqLM.from_pretrained(
+    "humarin/chatgpt_paraphraser_on_T5_base"
+).to(device)
 # Function to paraphrase text
 def humanize_text(text, temperature=0.7, max_length=512):
         truncation=True,
     ).input_ids.to(device)
+    # outputs = model.generate(
+    #     input_ids,
+    #     max_length=max_length,
+    #     temperature=temperature,
+    #     num_beams=1,
+    #     num_beam_groups=1,
+    #     num_return_sequences=1,
+    #     repetition_penalty=2.0,
+    #     diversity_penalty=0.5,
+    #     no_repeat_ngram_size=2,
+    # )
     outputs = model.generate(
         input_ids,
         max_length=max_length,
+        do_sample=False,
         repetition_penalty=2.0,
         no_repeat_ngram_size=2,
     )
     paraphrased_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return random.choice(paraphrased_texts)
 # Function to split input into sentences
 def split_into_sentences(text):
     return re.split(r"(?<=[.!?])\s+", text)
 # Function to process multi-line text
 def process_text(input_text):
     lines = input_text.split("\n")
             processed_lines.append(line)
         else:
             sentences = split_into_sentences(line)
+            processed_sentences = [
+                humanize_text(sentence, max_length=len(sentence))
+                for sentence in sentences
+            ]
             processed_lines.append(" ".join(processed_sentences))
     return "\n".join(processed_lines)
 # Gradio Interface
 iface = gr.Interface(
     fn=process_text,