Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,55 @@ from usearch.index import Index
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from datasets import load_dataset
|
6 |
from sentencex import segment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
model = SentenceTransformer("Corran/SciGenAllMiniLM")
|
9 |
|
@@ -11,31 +60,42 @@ rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']
|
|
11 |
|
12 |
rf = list(rf)
|
13 |
rf_emb = model.encode(rf)
|
14 |
-
index = Index(ndim=rf_emb[0].size)
|
15 |
-
index.add(range(len(rf)), rf_emb)
|
16 |
|
17 |
-
|
|
|
18 |
global index, model, rf
|
19 |
-
|
20 |
-
matches = index.search(emb,4)
|
21 |
-
if type(input)==list and len(input)>1:
|
22 |
-
matches = [m[0] for m in matches]
|
23 |
-
else:
|
24 |
-
matches = [m for m in matches]
|
25 |
-
return [(rf[m.key],m.distance) for m in matches]
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
def return_rf_scores(paragraph):
|
30 |
|
31 |
sentences = list(segment("en", paragraph))
|
32 |
matches = get_matches(sentences)
|
33 |
-
|
34 |
-
output =
|
|
|
35 |
for s,m in zip(sentences,matches):
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return output
|
39 |
|
40 |
-
|
|
|
41 |
demo.launch()
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from datasets import load_dataset
|
6 |
from sentencex import segment
|
7 |
+
from usearch.index import search, MetricKind, Matches, BatchMatches
|
8 |
+
|
9 |
+
HTML_Output = """<html><head><style>/* Tooltip container */
|
10 |
+
.tooltip {
|
11 |
+
position: relative;
|
12 |
+
width: 600px;
|
13 |
+
display: inline-block;
|
14 |
+
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
|
15 |
+
}
|
16 |
+
|
17 |
+
/* Tooltip text */
|
18 |
+
.tooltip .tooltiptext {
|
19 |
+
visibility: hidden;
|
20 |
+
width: "100%";
|
21 |
+
background-color: #555;
|
22 |
+
color: #34e1eb;
|
23 |
+
text-align: center;
|
24 |
+
padding: 5px 0;
|
25 |
+
border-radius: 6px;
|
26 |
+
|
27 |
+
/* Position the tooltip text */
|
28 |
+
position: absolute;
|
29 |
+
z-index: 1;
|
30 |
+
top: 125%;
|
31 |
+
left: 50%;
|
32 |
+
margin-left: -60px;
|
33 |
+
|
34 |
+
/* Fade in tooltip */
|
35 |
+
opacity: 0;
|
36 |
+
transition: opacity 0.3s;
|
37 |
+
}
|
38 |
+
|
39 |
+
/* Tooltip arrow */
|
40 |
+
.tooltip .tooltiptext::before {
|
41 |
+
content: "";
|
42 |
+
position: absolute;
|
43 |
+
bottom: 100%;
|
44 |
+
left: 50%;
|
45 |
+
margin-left: -5px;
|
46 |
+
border-width: 5px;
|
47 |
+
border-style: solid;
|
48 |
+
border-color: #555 transparent transparent transparent;
|
49 |
+
}
|
50 |
+
|
51 |
+
/* Show the tooltip text when you mouse over the tooltip container */
|
52 |
+
.tooltip:hover .tooltiptext {
|
53 |
+
visibility: visible;
|
54 |
+
opacity: 1;
|
55 |
+
}</style></head><body>"""
|
56 |
|
57 |
model = SentenceTransformer("Corran/SciGenAllMiniLM")
|
58 |
|
|
|
60 |
|
61 |
rf = list(rf)
|
62 |
rf_emb = model.encode(rf)
|
|
|
|
|
63 |
|
64 |
+
|
65 |
+
def get_matches(inputs):
|
66 |
global index, model, rf
|
67 |
+
paragraph_matches = []
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
for input in inputs:
|
70 |
+
embs = model.encode(input,batch_size=128)
|
71 |
+
|
72 |
+
matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
|
73 |
+
sentence_matches = []
|
74 |
+
for match_ in matches:
|
75 |
+
sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
|
76 |
+
paragraph_matches.append(sentence_matches)
|
77 |
+
|
78 |
+
return paragraph_matches
|
79 |
|
80 |
|
81 |
def return_rf_scores(paragraph):
|
82 |
|
83 |
sentences = list(segment("en", paragraph))
|
84 |
matches = get_matches(sentences)
|
85 |
+
|
86 |
+
output = HTML_Output
|
87 |
+
|
88 |
for s,m in zip(sentences,matches):
|
89 |
+
tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
|
90 |
+
tooltip = "\n".join(tooltip)
|
91 |
+
output+=f"""<div class="tooltip">{s}
|
92 |
+
<span class="tooltiptext">{tooltip}</span>
|
93 |
+
</div><br>"""
|
94 |
+
|
95 |
+
output += "</body></html>"
|
96 |
|
97 |
return output
|
98 |
|
99 |
+
|
100 |
+
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html")
|
101 |
demo.launch()
|