Spaces:
Runtime error
Runtime error
import gradio as gr | |
import faiss_cpu as faiss | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
# Load the model once | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Global storage for documents and index | |
global_docs = [] | |
global_index = None | |
# Load documents from uploaded file | |
def load_documents(file_obj): | |
docs = [line.strip() for line in file_obj if line.strip()] | |
return docs | |
# Build FAISS index | |
def build_index(docs): | |
embeddings = model.encode(docs).astype(np.float32) | |
index = faiss.IndexFlatL2(embeddings.shape[1]) | |
index.add(embeddings) | |
return index, embeddings | |
# Semantic search | |
def semantic_search(query, top_k=3): | |
if not global_index or not global_docs: | |
return "Please upload a file first." | |
query_embedding = model.encode([query]).astype(np.float32) | |
distances, indices = global_index.search(query_embedding, top_k) | |
results = [ | |
f"Rank {rank + 1}:\nDocument: {global_docs[i]}\nL2 Distance: {distances[0][rank]:.4f}\n" | |
for rank, i in enumerate(indices[0]) | |
] | |
return "\n".join(results) | |
# Handle file upload | |
def upload_and_index(file): | |
global global_docs, global_index | |
contents = file.read().decode("utf-8").splitlines() | |
global_docs = [line.strip() for line in contents if line.strip()] | |
global_index, _ = build_index(global_docs) | |
return "Document indexed successfully!" | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## π Semantic Search in Academic Papers") | |
file_input = gr.File(label="Upload Academic Paper (.txt)", file_types=['.txt']) | |
upload_button = gr.Button("Upload & Index") | |
upload_output = gr.Textbox(label="Status") | |
query_input = gr.Textbox(label="Enter Search Query") | |
search_button = gr.Button("Search") | |
search_output = gr.Textbox(label="Top 3 Results") | |
upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output) | |
search_button.click(semantic_search, inputs=query_input, outputs=search_output) | |
demo.launch() | |