Shrey commited on
Commit
1bf0288
·
1 Parent(s): 2bea29b
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #the inference function
2
+ from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM
3
+ from transformers import BertTokenizer
4
+
5
+ #load the tokenizer
6
+ tokenizer_path_1="/Users/mv96/Downloads/vocabularies/trained_tokenizer/vocab.txt"
7
+ tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1)
8
+
9
+ #load the model path
10
+ model_path="/Users/mv96/Downloads/bert_lm_10"
11
+ model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path)
12
+
13
+ #build the unmasker pipeline using HF for inference
14
+ unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1)
15
+
16
+ #try on a sample of txt
17
+ txt="a polynomial [MASK] from 3-SAT." #reduction
18
+ #results=unmasker(txt,top_k=5)
19
+
20
+ #show the results
21
+ for res in results:
22
+ print(res["sequence"])
23
+ print(res["score"])
24
+
25
+ #make a function out of the unmasker
26
+
27
+ def unmask_words(txt_with_mask,k_suggestions=5):
28
+ results=unmasker(txt_with_mask,top_k=k_suggestions)
29
+ labels={}
30
+ for res in results:
31
+ labels["".join(res["token_str"].split(" "))]=res["score"]
32
+ return labels
33
+
34
+ #trying our function
35
+ #val=unmask_words(txt)
36
+
37
+ import gradio as gr
38
+ description="""CC bert is a MLM model pretrained on data collected from ~200k papers in mainly Computational Complexity
39
+ or related domain. For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
40
41
+
42
+ """
43
+ examples=[["as pspace is [MASK] under complement."],
44
+ ["n!-(n-1)[MASK]"],
45
+ ["[MASK] these two classes is a major problem."],
46
+ ["This would show that the polynomial heirarchy at the second [MASK], which is considered only"],
47
+ ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
48
+ and their combined [MASK]"""]
49
+ ]
50
+
51
+
52
+
53
+ input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
54
+ interface=gr.Interface(fn=unmask_words,inputs=[input_box,
55
+ gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
56
+ outputs=gr.outputs.Label(label="top words:"),
57
+ examples=examples,
58
+ title="CC-Bert MLM",description=description)
59
+
60
+ interface.launch(debug=True,share=True,auth=("test", "test"))