File size: 1,595 Bytes
7d81e6b
 
 
 
 
76e5451
 
7d81e6b
76e5451
 
 
 
 
 
7d81e6b
76e5451
 
 
 
 
 
7d81e6b
76e5451
7d81e6b
76e5451
7d81e6b
 
76e5451
7d81e6b
76e5451
7d81e6b
76e5451
 
7d81e6b
 
 
 
 
e7d3e05
 
7d81e6b
 
 
 
 
 
 
 
 
 
76e5451
7d81e6b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from transformers import AutoModel, AutoTokenizer
from sklearn.neighbors import NearestNeighbors


models = ['cardiffnlp/twitter-roberta-base-jun2022',
 'cardiffnlp/twitter-roberta-base-2019-90m']

def topk_model(MODEL):
    # MODEL = "cardiffnlp/twitter-roberta-base-jun2022"
    model = AutoModel.from_pretrained(MODEL)
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    embedding_matrix = model.embeddings.word_embeddings.weight
    embedding_matrix = embedding_matrix.detach().numpy()

    knn_model = NearestNeighbors(n_neighbors=500,
                            metric='cosine',
                            algorithm='auto',
                            n_jobs=3)
                    
    nbrs = knn_model.fit(embedding_matrix)

    distances, indices = nbrs.kneighbors(embedding_matrix)

    return distances,indices,tokenizer


title = "How does a word's meaning change with time?"

def topk(word,model):
    outs = []
    distances, indices, tokenizer = topk_model(model)

    index = tokenizer.encode(f'{word}')
    for i in indices[index[1]]:
        outs.append(tokenizer.decode(i))
        print(tokenizer.decode(i))

    return outs

with gr.Blocks() as demo:
    gr.Markdown(f" # {title}")
    # gr.Markdown(f" ## {description1}")
    # gr.Markdown(f"{description2}")
    # gr.Markdown(f"{description3}")
    with gr.Row():
        word = gr.Textbox(label="Word")
    with gr.Row():
        greet_btn = gr.Button("Compute")
    with gr.Row():
        greet_btn.click(fn=topk, inputs=[word,gr.Dropdown(models)], outputs=gr.outputs.Textbox())

demo.launch()