Spaces:

codeblacks
/

embed

Running

File size: 1,265 Bytes

a5cc7a0
 
a7357eb
c44dddd
a5cc7a0
 
 
 
 
a7357eb
 
 
 
 
ef16b6d
a7357eb
 
 
 
 
 
 
 
 
a5cc7a0
 
0551a11
a5cc7a0
a7357eb
c44dddd
a5cc7a0
 
 
 
 
0551a11

from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
import numpy as np

# Load the pre-trained model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Define the function to process requests
def generate_embeddings(text):
    # Split the input text into chunks (if needed)
    chunks = text.split('\n')  # Assuming chunks are separated by new lines

    # Encode the input chunks to get the embeddings
    embeddings = embedding_model.encode(chunks, convert_to_tensor=False)

    # Convert the embeddings to a PyTorch tensor
    embeddings_tensor = torch.tensor(embeddings)

    # Add batch dimension to the tensor (if needed)
    embeddings_tensor = embeddings_tensor.unsqueeze(0)  # Uncomment if a batch dimension is required

    # Return the embeddings tensor and its shape
    return embeddings_tensor.tolist(), embeddings_tensor.shape

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_embeddings,
    inputs=gr.Textbox(lines=5, placeholder="Enter text chunks here..."),
    outputs=[gr.JSON(label="Embeddings"), gr.Label(label="Shape")],
    title="Sentence Transformer Embeddings",
    description="Generate embeddings for input text chunks."
)

# Launch the Gradio app
interface.launch()