Corran commited on
Commit
192fef8
·
verified ·
1 Parent(s): eec35d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -14
app.py CHANGED
@@ -4,6 +4,55 @@ from usearch.index import Index
4
  from sentence_transformers import SentenceTransformer
5
  from datasets import load_dataset
6
  from sentencex import segment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  model = SentenceTransformer("Corran/SciGenAllMiniLM")
9
 
@@ -11,31 +60,42 @@ rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']
11
 
12
  rf = list(rf)
13
  rf_emb = model.encode(rf)
14
- index = Index(ndim=rf_emb[0].size)
15
- index.add(range(len(rf)), rf_emb)
16
 
17
- def get_matches(input):
 
18
  global index, model, rf
19
- emb = model.encode(input,batch_size=128)
20
- matches = index.search(emb,4)
21
- if type(input)==list and len(input)>1:
22
- matches = [m[0] for m in matches]
23
- else:
24
- matches = [m for m in matches]
25
- return [(rf[m.key],m.distance) for m in matches]
26
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def return_rf_scores(paragraph):
30
 
31
  sentences = list(segment("en", paragraph))
32
  matches = get_matches(sentences)
33
-
34
- output = {}
 
35
  for s,m in zip(sentences,matches):
36
- output[s] = m
 
 
 
 
 
 
37
 
38
  return output
39
 
40
- demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="json")
 
41
  demo.launch()
 
4
  from sentence_transformers import SentenceTransformer
5
  from datasets import load_dataset
6
  from sentencex import segment
7
+ from usearch.index import search, MetricKind, Matches, BatchMatches
8
+
9
+ HTML_Output = """<html><head><style>/* Tooltip container */
10
+ .tooltip {
11
+ position: relative;
12
+ width: 600px;
13
+ display: inline-block;
14
+ border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
15
+ }
16
+
17
+ /* Tooltip text */
18
+ .tooltip .tooltiptext {
19
+ visibility: hidden;
20
+ width: "100%";
21
+ background-color: #555;
22
+ color: #34e1eb;
23
+ text-align: center;
24
+ padding: 5px 0;
25
+ border-radius: 6px;
26
+
27
+ /* Position the tooltip text */
28
+ position: absolute;
29
+ z-index: 1;
30
+ top: 125%;
31
+ left: 50%;
32
+ margin-left: -60px;
33
+
34
+ /* Fade in tooltip */
35
+ opacity: 0;
36
+ transition: opacity 0.3s;
37
+ }
38
+
39
+ /* Tooltip arrow */
40
+ .tooltip .tooltiptext::before {
41
+ content: "";
42
+ position: absolute;
43
+ bottom: 100%;
44
+ left: 50%;
45
+ margin-left: -5px;
46
+ border-width: 5px;
47
+ border-style: solid;
48
+ border-color: #555 transparent transparent transparent;
49
+ }
50
+
51
+ /* Show the tooltip text when you mouse over the tooltip container */
52
+ .tooltip:hover .tooltiptext {
53
+ visibility: visible;
54
+ opacity: 1;
55
+ }</style></head><body>"""
56
 
57
  model = SentenceTransformer("Corran/SciGenAllMiniLM")
58
 
 
60
 
61
  rf = list(rf)
62
  rf_emb = model.encode(rf)
 
 
63
 
64
+
65
+ def get_matches(inputs):
66
  global index, model, rf
67
+ paragraph_matches = []
 
 
 
 
 
 
68
 
69
+ for input in inputs:
70
+ embs = model.encode(input,batch_size=128)
71
+
72
+ matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
73
+ sentence_matches = []
74
+ for match_ in matches:
75
+ sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
76
+ paragraph_matches.append(sentence_matches)
77
+
78
+ return paragraph_matches
79
 
80
 
81
  def return_rf_scores(paragraph):
82
 
83
  sentences = list(segment("en", paragraph))
84
  matches = get_matches(sentences)
85
+
86
+ output = HTML_Output
87
+
88
  for s,m in zip(sentences,matches):
89
+ tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
90
+ tooltip = "\n".join(tooltip)
91
+ output+=f"""<div class="tooltip">{s}
92
+ <span class="tooltiptext">{tooltip}</span>
93
+ </div><br>"""
94
+
95
+ output += "</body></html>"
96
 
97
  return output
98
 
99
+
100
+ demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html")
101
  demo.launch()