Spaces:
Sleeping
Sleeping
File size: 2,405 Bytes
758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d 6389fe4 198d1c2 6389fe4 d65caf3 6389fe4 d65caf3 6389fe4 d65caf3 6389fe4 d65caf3 758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d d65caf3 758071d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Global variables for storing documents and embeddings
global_docs = []
nn_model = None
doc_embeddings = None
# Function for semantic search
def semantic_search(query, top_k=3):
if not nn_model or not global_docs:
return "Please upload and index a file first."
query_vec = model.encode([query])
distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k)
results = [
f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n"
for i, idx in enumerate(indices[0])
]
return "\n".join(results)
# Function to upload and index documents
def upload_and_index(file):
global global_docs, nn_model, doc_embeddings
try:
# Read the file content from the file path
with open(file.name, 'r', encoding='utf-8') as f:
content = f.read()
# Split content into lines and process
lines = content.splitlines()
global_docs = [line.strip() for line in lines if line.strip()]
# Create document embeddings
doc_embeddings = model.encode(global_docs)
# Initialize the nearest neighbors model
nn_model = NearestNeighbors(n_neighbors=3, metric='euclidean')
nn_model.fit(doc_embeddings)
return "Documents indexed successfully!"
except Exception as e:
return f"Error: {str(e)}"
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## 🔍 Semantic Search in Academic Papers (No FAISS)")
# File upload input
file_input = gr.File(label="Upload .txt file", file_types=[".txt"])
# Upload and index button
upload_button = gr.Button("Upload & Index")
upload_output = gr.Textbox(label="Status")
# Query input
query_input = gr.Textbox(label="Enter your query")
# Search button
search_button = gr.Button("Search")
search_output = gr.Textbox(label="Results")
# Attach actions to buttons
upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output)
search_button.click(semantic_search, inputs=query_input, outputs=search_output)
# Launch the Gradio interface
demo.launch()
|