bluuebunny commited on
Commit
e9ce7de
·
verified ·
1 Parent(s): a1f0d23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -31
app.py CHANGED
@@ -1,38 +1,33 @@
1
- # Import required libraries
2
- import gradio as gr # For interface
3
- from sentence_transformers import SentenceTransformer # For embedding the text
4
- import torch # For gpu
5
- import numpy as np
6
 
7
- # Make the app device agnostic
8
- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
9
 
10
- # Load a pretrained Sentence Transformer model and move it to the appropriate device
11
- model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
12
- model = model.to(device)
13
 
14
- # Function that does the embedding
15
- def predict(input_text):
16
 
17
- # Calculate embeddings by calling model.encode(), specifying the device
18
- embeddings = model.encode(input_text, device=device)
19
-
20
- # Set the print options to avoid truncation and use fixed-point notation
21
- np.set_printoptions(threshold=np.inf, precision=8, suppress=True, floatmode='fixed')
22
 
23
- # Convert the array to a string for display
24
- embeddings_str = np.array2string(embeddings, separator=',')
25
-
26
- return embeddings_str
27
 
28
- # Gradio app interface
29
- gradio_app = gr.Interface(
30
- predict,
31
- inputs=gr.Textbox(placeholder="Insert Text", label='Text'),
32
- outputs=gr.Textbox(max_lines=1, placeholder='Vector of dimensions 1024', label='Vector', show_label=True, show_copy_button=True),
33
- title="Text to Vector Generator",
34
- description="Embedding model: mixedbread-ai/mxbai-embed-large-v1."
35
- )
 
 
36
 
37
- if __name__ == "__main__":
38
- gradio_app.launch()
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import gradio as gr
3
+ import numpy as np
4
+ import torch
 
5
 
6
+ # Function to convert dense vector to binary vector
7
+ def dense_to_binary(dense_vector):
8
+ return np.packbits(np.where(dense_vector >= 0, 1, 0)).tobytes()
9
 
10
+ # Load the model
11
+ device="cuda" if torch.cuda.is_available() else "cpu"
12
+ model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1", device=device)
13
 
14
+ def embed(text:str):
 
15
 
16
+ # Float embedding
17
+ float_vector = model.encode(text, convert_to_numpy=True)
 
 
 
18
 
19
+ # Convert to binary vector
20
+ binary_vector = dense_to_binary(float_vector)
 
 
21
 
22
+ # Return both vectors
23
+ return float_vector, binary_vector
24
+
25
+
26
+ # Gradio interface
27
+ interface = gr.Interface(
28
+ fn=embed,
29
+ inputs=["text"],
30
+ outputs=["json", "text"]
31
+ )
32
 
33
+ interface.launch(server_port=7860)