import gradio as gr import numpy as np from sentence_transformers import SentenceTransformer from sklearn.neighbors import NearestNeighbors # Load the sentence transformer model model = SentenceTransformer('all-MiniLM-L6-v2') # Global variables for storing documents and embeddings global_docs = [] nn_model = None doc_embeddings = None # Function for semantic search def semantic_search(query, top_k=3): if not nn_model or not global_docs: return "Please upload and index a file first." query_vec = model.encode([query]) distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k) results = [ f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n" for i, idx in enumerate(indices[0]) ] return "\n".join(results) # Function to upload and index documents def upload_and_index(file): global global_docs, nn_model, doc_embeddings try: # Read the file content from the file path with open(file.name, 'r', encoding='utf-8') as f: content = f.read() # Split content into lines and process lines = content.splitlines() global_docs = [line.strip() for line in lines if line.strip()] # Create document embeddings doc_embeddings = model.encode(global_docs) # Initialize the nearest neighbors model nn_model = NearestNeighbors(n_neighbors=3, metric='euclidean') nn_model.fit(doc_embeddings) return "Documents indexed successfully!" except Exception as e: return f"Error: {str(e)}" # Gradio interface with gr.Blocks() as demo: gr.Markdown("## 🔍 Semantic Search in Academic Papers (No FAISS)") # File upload input file_input = gr.File(label="Upload .txt file", file_types=[".txt"]) # Upload and index button upload_button = gr.Button("Upload & Index") upload_output = gr.Textbox(label="Status") # Query input query_input = gr.Textbox(label="Enter your query") # Search button search_button = gr.Button("Search") search_output = gr.Textbox(label="Results") # Attach actions to buttons upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output) search_button.click(semantic_search, inputs=query_input, outputs=search_output) # Launch the Gradio interface demo.launch()