ravindravala commited on
Commit
3aeccd5
·
1 Parent(s): 25bb492

disable greedy search

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -12,7 +12,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
  # Load the model and tokenizer
14
  tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
15
- model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
 
 
 
16
 
17
  # Function to paraphrase text
18
  def humanize_text(text, temperature=0.7, max_length=512):
@@ -24,25 +27,35 @@ def humanize_text(text, temperature=0.7, max_length=512):
24
  truncation=True,
25
  ).input_ids.to(device)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  outputs = model.generate(
28
  input_ids,
29
  max_length=max_length,
30
- temperature=temperature,
31
- num_beams=1,
32
- num_beam_groups=1,
33
- num_return_sequences=1,
34
  repetition_penalty=2.0,
35
- diversity_penalty=0.5,
36
  no_repeat_ngram_size=2,
37
  )
38
 
39
  paraphrased_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
40
  return random.choice(paraphrased_texts)
41
 
 
42
  # Function to split input into sentences
43
  def split_into_sentences(text):
44
  return re.split(r"(?<=[.!?])\s+", text)
45
 
 
46
  # Function to process multi-line text
47
  def process_text(input_text):
48
  lines = input_text.split("\n")
@@ -53,11 +66,15 @@ def process_text(input_text):
53
  processed_lines.append(line)
54
  else:
55
  sentences = split_into_sentences(line)
56
- processed_sentences = [humanize_text(sentence, max_length=len(sentence)) for sentence in sentences]
 
 
 
57
  processed_lines.append(" ".join(processed_sentences))
58
 
59
  return "\n".join(processed_lines)
60
 
 
61
  # Gradio Interface
62
  iface = gr.Interface(
63
  fn=process_text,
 
12
 
13
  # Load the model and tokenizer
14
  tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
15
+ model = AutoModelForSeq2SeqLM.from_pretrained(
16
+ "humarin/chatgpt_paraphraser_on_T5_base"
17
+ ).to(device)
18
+
19
 
20
  # Function to paraphrase text
21
  def humanize_text(text, temperature=0.7, max_length=512):
 
27
  truncation=True,
28
  ).input_ids.to(device)
29
 
30
+ # outputs = model.generate(
31
+ # input_ids,
32
+ # max_length=max_length,
33
+ # temperature=temperature,
34
+ # num_beams=1,
35
+ # num_beam_groups=1,
36
+ # num_return_sequences=1,
37
+ # repetition_penalty=2.0,
38
+ # diversity_penalty=0.5,
39
+ # no_repeat_ngram_size=2,
40
+ # )
41
+
42
  outputs = model.generate(
43
  input_ids,
44
  max_length=max_length,
45
+ do_sample=False,
 
 
 
46
  repetition_penalty=2.0,
 
47
  no_repeat_ngram_size=2,
48
  )
49
 
50
  paraphrased_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
51
  return random.choice(paraphrased_texts)
52
 
53
+
54
  # Function to split input into sentences
55
  def split_into_sentences(text):
56
  return re.split(r"(?<=[.!?])\s+", text)
57
 
58
+
59
  # Function to process multi-line text
60
  def process_text(input_text):
61
  lines = input_text.split("\n")
 
66
  processed_lines.append(line)
67
  else:
68
  sentences = split_into_sentences(line)
69
+ processed_sentences = [
70
+ humanize_text(sentence, max_length=len(sentence))
71
+ for sentence in sentences
72
+ ]
73
  processed_lines.append(" ".join(processed_sentences))
74
 
75
  return "\n".join(processed_lines)
76
 
77
+
78
  # Gradio Interface
79
  iface = gr.Interface(
80
  fn=process_text,