Corran commited on
Commit
4ccdc70
·
verified ·
1 Parent(s): 79a1852

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from usearch.index import Index
4
+ from sentence_transformers import SentenceTransformer
5
+ from datasets import load_dataset
6
+ from sentencex import segment
7
+
8
+ model = SentenceTransformer("Corran/SciGenAllMiniLM")
9
+
10
+ train = load_dataset("Corran/SciGenColbertTriplets")['train']
11
+
12
+ rf = sorted(list(set(train['query'])))
13
+ rf_emb = model.encode(rf)
14
+ index = Index(ndim=rf_emb[0].size)
15
+ index.add(range(len(classes)), classes_emb)
16
+
17
+ def get_matches(input):
18
+ global index, model, rf
19
+ emb = model.encode(input,batch_size=128)
20
+ matches = index.search(emb,4)
21
+ if type(input)==list and len(input)>1:
22
+ matches = [m[0] for m in matches]
23
+ else:
24
+ matches = [m for m in matches]
25
+ return [(rf[m.key],m.distance) for m in matches]
26
+
27
+
28
+
29
+ def return_rf_scores(paragraph):
30
+
31
+ sentences = list(segment("en", paragraph))
32
+ matches = get_matches(sentences)
33
+
34
+ output = {}
35
+ for s,m in zip(sentences,matches):
36
+ output[s] = m
37
+
38
+ return output
39
+
40
+ demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="json")
41
+ demo.launch()