import gradio as gr import numpy as np from sentence_transformers import SentenceTransformer from sklearn.neighbors import NearestNeighbors # Load model model = SentenceTransformer('all-MiniLM-L6-v2') # Global storage global_docs = [] nn_model = None doc_embeddings = None # Load documents def load_documents(file_obj): docs = [line.strip() for line in file_obj if line.strip()] return docs # Build index using Nearest Neighbors def build_index(docs): embeddings = model.encode(docs) nn = NearestNeighbors(n_neighbors=3, metric='euclidean') nn.fit(embeddings) return nn, embeddings # Search def semantic_search(query, top_k=3): if not nn_model or not global_docs: return "Please upload and index a file first." query_vec = model.encode([query]) distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k) results = [ f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n" for i, idx in enumerate(indices[0]) ] return "\n".join(results) # Upload and index def upload_and_index(file): global global_docs, nn_model, doc_embeddings contents = file.read().decode("utf-8").splitlines() global_docs = [line.strip() for line in contents if line.strip()] nn_model, doc_embeddings = build_index(global_docs) return "Documents indexed successfully!" # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🔍 Semantic Search in Academic Papers (No FAISS)") file_input = gr.File(label="Upload .txt file", file_types=[".txt"]) upload_button = gr.Button("Upload & Index") upload_output = gr.Textbox(label="Status") query_input = gr.Textbox(label="Enter your query") search_button = gr.Button("Search") search_output = gr.Textbox(label="Results") upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output) search_button.click(semantic_search, inputs=query_input, outputs=search_output) demo.launch()