mano96 commited on
Commit
5a794ad
·
1 Parent(s): b5db1e4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from newspaper import Article
2
+
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
6
+ model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
7
+
8
+ import nltk
9
+ nltk.download('punkt')
10
+ from nltk.tokenize import sent_tokenize
11
+
12
+ def my_paraphrase(sentence):
13
+
14
+ sentence = "paraphrase: " + sentence + " </s>"
15
+ encoding = tokenizer.encode_plus(sentence,padding=True, return_tensors="pt")
16
+ input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
17
+
18
+ outputs = model.generate(
19
+ input_ids=input_ids, attention_mask=attention_masks,
20
+ max_length=256,
21
+ do_sample=True,
22
+ top_k=120,
23
+ top_p=0.95,
24
+ early_stopping=True,
25
+ num_return_sequences=1)
26
+ output = tokenizer.decode(outputs[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)
27
+
28
+ return(output)
29
+
30
+ def text(url):
31
+ article = Article(url)
32
+ article.download()
33
+ article.parse()
34
+
35
+ input_text = article.text
36
+ output = " ".join([my_paraphrase(sent) for sent in sent_tokenize(input_text)])
37
+
38
+ return output
39
+
40
+ import gradio as gr
41
+ def summarize(URL):
42
+
43
+ outputtext = text(URL)
44
+ return outputtext
45
+ gr.Interface(fn=summarize, inputs=gr.inputs.Textbox(lines=7, placeholder="Enter text here"), outputs=[gr.outputs.Textbox(label="Paraphrased Text")],examples=[["developed by python team"
46
+ ]]).launch(inline=False)