Corran's picture
Create app.py
4ccdc70 verified
raw
history blame
1.07 kB
import gradio as gr
import numpy as np
from usearch.index import Index
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentencex import segment
model = SentenceTransformer("Corran/SciGenAllMiniLM")
train = load_dataset("Corran/SciGenColbertTriplets")['train']
rf = sorted(list(set(train['query'])))
rf_emb = model.encode(rf)
index = Index(ndim=rf_emb[0].size)
index.add(range(len(classes)), classes_emb)
def get_matches(input):
global index, model, rf
emb = model.encode(input,batch_size=128)
matches = index.search(emb,4)
if type(input)==list and len(input)>1:
matches = [m[0] for m in matches]
else:
matches = [m for m in matches]
return [(rf[m.key],m.distance) for m in matches]
def return_rf_scores(paragraph):
sentences = list(segment("en", paragraph))
matches = get_matches(sentences)
output = {}
for s,m in zip(sentences,matches):
output[s] = m
return output
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="json")
demo.launch()