Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from sklearn.neighbors import NearestNeighbors | |
# Load the sentence transformer model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Global variables for storing documents and embeddings | |
global_docs = [] | |
nn_model = None | |
doc_embeddings = None | |
# Function for semantic search | |
def semantic_search(query, top_k=3): | |
if not nn_model or not global_docs: | |
return "Please upload and index a file first." | |
query_vec = model.encode([query]) | |
distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k) | |
results = [ | |
f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n" | |
for i, idx in enumerate(indices[0]) | |
] | |
return "\n".join(results) | |
# Function to upload and index documents | |
def upload_and_index(file): | |
global global_docs, nn_model, doc_embeddings | |
try: | |
# Read the file content from the file path | |
with open(file.name, 'r', encoding='utf-8') as f: | |
content = f.read() | |
# Split content into lines and process | |
lines = content.splitlines() | |
global_docs = [line.strip() for line in lines if line.strip()] | |
# Create document embeddings | |
doc_embeddings = model.encode(global_docs) | |
# Initialize the nearest neighbors model | |
nn_model = NearestNeighbors(n_neighbors=3, metric='euclidean') | |
nn_model.fit(doc_embeddings) | |
return "Documents indexed successfully!" | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## π Semantic Search in Academic Papers (No FAISS)") | |
# File upload input | |
file_input = gr.File(label="Upload .txt file", file_types=[".txt"]) | |
# Upload and index button | |
upload_button = gr.Button("Upload & Index") | |
upload_output = gr.Textbox(label="Status") | |
# Query input | |
query_input = gr.Textbox(label="Enter your query") | |
# Search button | |
search_button = gr.Button("Search") | |
search_output = gr.Textbox(label="Results") | |
# Attach actions to buttons | |
upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output) | |
search_button.click(semantic_search, inputs=query_input, outputs=search_output) | |
# Launch the Gradio interface | |
demo.launch() | |