File size: 2,406 Bytes
4ccdc70
 
 
 
 
 
192fef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ccdc70
35cc482
4ccdc70
eec35d0
4ccdc70
eec35d0
4ccdc70
 
192fef8
 
4ccdc70
192fef8
4ccdc70
192fef8
 
 
 
 
 
 
 
 
 
4ccdc70
 
53fe126
4ccdc70
53fe126
4ccdc70
192fef8
 
 
4ccdc70
192fef8
 
 
 
 
 
 
4ccdc70
 
 
192fef8
 
4ccdc70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
from usearch.index import search, MetricKind, Matches, BatchMatches

HTML_Output = """<html><head><style>/* Tooltip container */
.tooltip {
  position: relative;
  width: 600px;
  display: inline-block;
  border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}

/* Tooltip text */
.tooltip .tooltiptext {
  visibility: hidden;
  width: "100%";
  background-color: #555;
  color: #34e1eb;
  text-align: center;
  padding: 5px 0;
  border-radius: 6px;

  /* Position the tooltip text */
  position: absolute;
  z-index: 1;
  top: 125%;
  left: 50%;
  margin-left: -60px;

  /* Fade in tooltip */
  opacity: 0;
  transition: opacity 0.3s;
}

/* Tooltip arrow */
.tooltip .tooltiptext::before {
  content: "";
  position: absolute;
  bottom: 100%;
  left: 50%;
  margin-left: -5px;
  border-width: 5px;
  border-style: solid;
  border-color: #555 transparent transparent transparent;
}

/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
  visibility: visible;
  opacity: 1;
}</style></head><body>"""

model = SentenceTransformer("Corran/SciGenAllMiniTripletLoss")

rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']

rf = list(rf)
rf_emb = model.encode(rf)


def get_matches(inputs):
  global index, model, rf
  paragraph_matches = []

  for input in inputs:
    embs = model.encode(input,batch_size=128)

    matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
    sentence_matches = []
    for match_ in matches:
        sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
    paragraph_matches.append(sentence_matches)

  return paragraph_matches


def return_rf_scores(abstract):
    
    sentences = list(segment("en", abstract))
    matches = get_matches(sentences)

    output = HTML_Output

    for s,m in zip(sentences,matches):
        tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
        tooltip = "\n".join(tooltip)
        output+=f"""<div class="tooltip">{s}
  <span class="tooltiptext">{tooltip}</span>
</div><br>"""

    output += "</body></html>"
        
    return output


demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html")
demo.launch()