kavuuuu / app.py
Pradeepthi30's picture
Update app.py
198d1c2 verified
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Global variables for storing documents and embeddings
global_docs = []
nn_model = None
doc_embeddings = None
# Function for semantic search
def semantic_search(query, top_k=3):
if not nn_model or not global_docs:
return "Please upload and index a file first."
query_vec = model.encode([query])
distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k)
results = [
f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n"
for i, idx in enumerate(indices[0])
]
return "\n".join(results)
# Function to upload and index documents
def upload_and_index(file):
global global_docs, nn_model, doc_embeddings
try:
# Read the file content from the file path
with open(file.name, 'r', encoding='utf-8') as f:
content = f.read()
# Split content into lines and process
lines = content.splitlines()
global_docs = [line.strip() for line in lines if line.strip()]
# Create document embeddings
doc_embeddings = model.encode(global_docs)
# Initialize the nearest neighbors model
nn_model = NearestNeighbors(n_neighbors=3, metric='euclidean')
nn_model.fit(doc_embeddings)
return "Documents indexed successfully!"
except Exception as e:
return f"Error: {str(e)}"
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## πŸ” Semantic Search in Academic Papers (No FAISS)")
# File upload input
file_input = gr.File(label="Upload .txt file", file_types=[".txt"])
# Upload and index button
upload_button = gr.Button("Upload & Index")
upload_output = gr.Textbox(label="Status")
# Query input
query_input = gr.Textbox(label="Enter your query")
# Search button
search_button = gr.Button("Search")
search_output = gr.Textbox(label="Results")
# Attach actions to buttons
upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output)
search_button.click(semantic_search, inputs=query_input, outputs=search_output)
# Launch the Gradio interface
demo.launch()