Spaces:

Shrey
/

cc-bert

Sleeping

App Files Files Community

Shrey commited on Oct 18, 2022

Commit

1bf0288

1 Parent(s): 2bea29b

first

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+#the inference function
+from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM
+from transformers import BertTokenizer
+#load the tokenizer
+tokenizer_path_1="/Users/mv96/Downloads/vocabularies/trained_tokenizer/vocab.txt"
+tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1)
+#load the model path
+model_path="/Users/mv96/Downloads/bert_lm_10"
+model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path)
+#build the unmasker pipeline using HF for inference
+unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1)
+#try on a sample of txt
+txt="a polynomial [MASK] from 3-SAT." #reduction
+#results=unmasker(txt,top_k=5)
+#show the results
+for res in results:
+    print(res["sequence"])
+    print(res["score"])
+#make a function out of the unmasker
+def unmask_words(txt_with_mask,k_suggestions=5):
+    results=unmasker(txt_with_mask,top_k=k_suggestions)
+    labels={}
+    for res in results:
+        labels["".join(res["token_str"].split(" "))]=res["score"]
+    return labels
+#trying our function
+#val=unmask_words(txt)
+import gradio as gr
+description="""CC bert is a MLM model pretrained on data collected from ~200k papers in mainly Computational Complexity
+or related domain.  For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
+or contact [[email protected]]([email protected]).
+"""
+examples=[["as pspace is [MASK] under complement."],
+          ["n!-(n-1)[MASK]"],
+         ["[MASK] these two classes is a major problem."],
+          ["This would show that the polynomial heirarchy at the second [MASK], which is considered only"],
+          ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
+    and their combined [MASK]"""]
+         ]
+input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
+interface=gr.Interface(fn=unmask_words,inputs=[input_box,
+                                               gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
+                       outputs=gr.outputs.Label(label="top words:"),
+                       examples=examples,
+                      title="CC-Bert MLM",description=description)
+interface.launch(debug=True,share=True,auth=("test", "test"))