Spaces:

KaiserML
/

Demo-Sci-Rhetoric-Classifier

Sleeping

File size: 2,406 Bytes

4ccdc70
 
 
 
 
 
192fef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ccdc70
35cc482
4ccdc70
eec35d0
4ccdc70
eec35d0
4ccdc70
 
192fef8
 
4ccdc70
192fef8
4ccdc70
192fef8
 
 
 
 
 
 
 
 
 
4ccdc70
 
53fe126
4ccdc70
53fe126
4ccdc70
192fef8
 
 
4ccdc70
192fef8
 
 
 
 
 
 
4ccdc70
 
 
192fef8
 
4ccdc70

import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
from usearch.index import search, MetricKind, Matches, BatchMatches

HTML_Output = """<html><head><style>/* Tooltip container */
.tooltip {
  position: relative;
  width: 600px;
  display: inline-block;
  border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}

/* Tooltip text */
.tooltip .tooltiptext {
  visibility: hidden;
  width: "100%";
  background-color: #555;
  color: #34e1eb;
  text-align: center;
  padding: 5px 0;
  border-radius: 6px;

  /* Position the tooltip text */
  position: absolute;
  z-index: 1;
  top: 125%;
  left: 50%;
  margin-left: -60px;

  /* Fade in tooltip */
  opacity: 0;
  transition: opacity 0.3s;
}

/* Tooltip arrow */
.tooltip .tooltiptext::before {
  content: "";
  position: absolute;
  bottom: 100%;
  left: 50%;
  margin-left: -5px;
  border-width: 5px;
  border-style: solid;
  border-color: #555 transparent transparent transparent;
}

/* Show the tooltip text when you mouse over the tooltip container */
.tooltip:hover .tooltiptext {
  visibility: visible;
  opacity: 1;
}</style></head><body>"""

model = SentenceTransformer("Corran/SciGenAllMiniTripletLoss")

rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']

rf = list(rf)
rf_emb = model.encode(rf)


def get_matches(inputs):
  global index, model, rf
  paragraph_matches = []

  for input in inputs:
    embs = model.encode(input,batch_size=128)

    matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
    sentence_matches = []
    for match_ in matches:
        sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
    paragraph_matches.append(sentence_matches)

  return paragraph_matches


def return_rf_scores(abstract):
    
    sentences = list(segment("en", abstract))
    matches = get_matches(sentences)

    output = HTML_Output

    for s,m in zip(sentences,matches):
        tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
        tooltip = "\n".join(tooltip)
        output+=f"""<div class="tooltip">{s}
  <span class="tooltiptext">{tooltip}</span>
</div><br>"""

    output += "</body></html>"
        
    return output


demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html")
demo.launch()