import gradio as gr import faiss import numpy as np from sentence_transformers import SentenceTransformer # Load the model once model = SentenceTransformer('all-MiniLM-L6-v2') # Global storage for documents and index global_docs = [] global_index = None # Load documents from uploaded file def load_documents(file_obj): docs = [line.strip() for line in file_obj if line.strip()] return docs # Build FAISS index def build_index(docs): embeddings = model.encode(docs).astype(np.float32) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return index, embeddings # Semantic search def semantic_search(query, top_k=3): if not global_index or not global_docs: return "Please upload a file first." query_embedding = model.encode([query]).astype(np.float32) distances, indices = global_index.search(query_embedding, top_k) results = [ f"Rank {rank + 1}:\nDocument: {global_docs[i]}\nL2 Distance: {distances[0][rank]:.4f}\n" for rank, i in enumerate(indices[0]) ] return "\n".join(results) # Handle file upload def upload_and_index(file): global global_docs, global_index contents = file.read().decode("utf-8").splitlines() global_docs = [line.strip() for line in contents if line.strip()] global_index, _ = build_index(global_docs) return "Document indexed successfully!" # Gradio Interface with gr.Blocks() as demo: gr.Markdown("## 🔍 Semantic Search in Academic Papers") file_input = gr.File(label="Upload Academic Paper (.txt)", file_types=['.txt']) upload_button = gr.Button("Upload & Index") upload_output = gr.Textbox(label="Status") query_input = gr.Textbox(label="Enter Search Query") search_button = gr.Button("Search") search_output = gr.Textbox(label="Top 3 Results") upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output) search_button.click(semantic_search, inputs=query_input, outputs=search_output) demo.launch()