File size: 2,405 Bytes
758071d
 
 
 
 
d65caf3
758071d
d65caf3
 
758071d
 
 
 
d65caf3
758071d
 
 
 
 
 
 
 
 
 
 
d65caf3
758071d
 
6389fe4
198d1c2
 
 
 
 
6389fe4
 
d65caf3
 
6389fe4
d65caf3
 
6389fe4
 
d65caf3
6389fe4
 
 
 
d65caf3
758071d
 
d65caf3
 
758071d
d65caf3
 
758071d
 
d65caf3
 
758071d
d65caf3
 
758071d
 
d65caf3
 
758071d
 
 
d65caf3
758071d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Global variables for storing documents and embeddings
global_docs = []
nn_model = None
doc_embeddings = None

# Function for semantic search
def semantic_search(query, top_k=3):
    if not nn_model or not global_docs:
        return "Please upload and index a file first."
    query_vec = model.encode([query])
    distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k)
    results = [
        f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n"
        for i, idx in enumerate(indices[0])
    ]
    return "\n".join(results)

# Function to upload and index documents
def upload_and_index(file):
    global global_docs, nn_model, doc_embeddings
    try:
        # Read the file content from the file path
        with open(file.name, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Split content into lines and process
        lines = content.splitlines()
        global_docs = [line.strip() for line in lines if line.strip()]
        
        # Create document embeddings
        doc_embeddings = model.encode(global_docs)
        
        # Initialize the nearest neighbors model
        nn_model = NearestNeighbors(n_neighbors=3, metric='euclidean')
        nn_model.fit(doc_embeddings)
        
        return "Documents indexed successfully!"
    except Exception as e:
        return f"Error: {str(e)}"

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🔍 Semantic Search in Academic Papers (No FAISS)")
    
    # File upload input
    file_input = gr.File(label="Upload .txt file", file_types=[".txt"])
    
    # Upload and index button
    upload_button = gr.Button("Upload & Index")
    upload_output = gr.Textbox(label="Status")
    
    # Query input
    query_input = gr.Textbox(label="Enter your query")
    
    # Search button
    search_button = gr.Button("Search")
    search_output = gr.Textbox(label="Results")
    
    # Attach actions to buttons
    upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output)
    search_button.click(semantic_search, inputs=query_input, outputs=search_output)

# Launch the Gradio interface
demo.launch()