Pradeepthi30 commited on
Commit
a28f198
Β·
verified Β·
1 Parent(s): 572f455

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -27
app.py CHANGED
@@ -1,58 +1,59 @@
1
  import gradio as gr
2
- import faiss_cpu as faiss
3
  import numpy as np
4
  from sentence_transformers import SentenceTransformer
 
5
 
6
- # Load the model once
7
  model = SentenceTransformer('all-MiniLM-L6-v2')
8
 
9
- # Global storage for documents and index
10
  global_docs = []
11
- global_index = None
 
12
 
13
- # Load documents from uploaded file
14
  def load_documents(file_obj):
15
  docs = [line.strip() for line in file_obj if line.strip()]
16
  return docs
17
 
18
- # Build FAISS index
19
  def build_index(docs):
20
- embeddings = model.encode(docs).astype(np.float32)
21
- index = faiss.IndexFlatL2(embeddings.shape[1])
22
- index.add(embeddings)
23
- return index, embeddings
24
 
25
- # Semantic search
26
  def semantic_search(query, top_k=3):
27
- if not global_index or not global_docs:
28
- return "Please upload a file first."
29
- query_embedding = model.encode([query]).astype(np.float32)
30
- distances, indices = global_index.search(query_embedding, top_k)
31
  results = [
32
- f"Rank {rank + 1}:\nDocument: {global_docs[i]}\nL2 Distance: {distances[0][rank]:.4f}\n"
33
- for rank, i in enumerate(indices[0])
34
  ]
35
  return "\n".join(results)
36
 
37
- # Handle file upload
38
  def upload_and_index(file):
39
- global global_docs, global_index
40
  contents = file.read().decode("utf-8").splitlines()
41
  global_docs = [line.strip() for line in contents if line.strip()]
42
- global_index, _ = build_index(global_docs)
43
- return "Document indexed successfully!"
44
 
45
- # Gradio Interface
46
  with gr.Blocks() as demo:
47
- gr.Markdown("## πŸ” Semantic Search in Academic Papers")
48
-
49
- file_input = gr.File(label="Upload Academic Paper (.txt)", file_types=['.txt'])
50
  upload_button = gr.Button("Upload & Index")
51
  upload_output = gr.Textbox(label="Status")
52
 
53
- query_input = gr.Textbox(label="Enter Search Query")
54
  search_button = gr.Button("Search")
55
- search_output = gr.Textbox(label="Top 3 Results")
56
 
57
  upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output)
58
  search_button.click(semantic_search, inputs=query_input, outputs=search_output)
 
1
  import gradio as gr
 
2
  import numpy as np
3
  from sentence_transformers import SentenceTransformer
4
+ from sklearn.neighbors import NearestNeighbors
5
 
6
+ # Load model
7
  model = SentenceTransformer('all-MiniLM-L6-v2')
8
 
9
+ # Global storage
10
  global_docs = []
11
+ nn_model = None
12
+ doc_embeddings = None
13
 
14
+ # Load documents
15
  def load_documents(file_obj):
16
  docs = [line.strip() for line in file_obj if line.strip()]
17
  return docs
18
 
19
+ # Build index using Nearest Neighbors
20
  def build_index(docs):
21
+ embeddings = model.encode(docs)
22
+ nn = NearestNeighbors(n_neighbors=3, metric='euclidean')
23
+ nn.fit(embeddings)
24
+ return nn, embeddings
25
 
26
+ # Search
27
  def semantic_search(query, top_k=3):
28
+ if not nn_model or not global_docs:
29
+ return "Please upload and index a file first."
30
+ query_vec = model.encode([query])
31
+ distances, indices = nn_model.kneighbors(query_vec, n_neighbors=top_k)
32
  results = [
33
+ f"Rank {i+1}:\nDocument: {global_docs[idx]}\nDistance: {distances[0][i]:.4f}\n"
34
+ for i, idx in enumerate(indices[0])
35
  ]
36
  return "\n".join(results)
37
 
38
+ # Upload and index
39
  def upload_and_index(file):
40
+ global global_docs, nn_model, doc_embeddings
41
  contents = file.read().decode("utf-8").splitlines()
42
  global_docs = [line.strip() for line in contents if line.strip()]
43
+ nn_model, doc_embeddings = build_index(global_docs)
44
+ return "Documents indexed successfully!"
45
 
46
+ # Gradio UI
47
  with gr.Blocks() as demo:
48
+ gr.Markdown("## πŸ” Semantic Search in Academic Papers (No FAISS)")
49
+
50
+ file_input = gr.File(label="Upload .txt file", file_types=[".txt"])
51
  upload_button = gr.Button("Upload & Index")
52
  upload_output = gr.Textbox(label="Status")
53
 
54
+ query_input = gr.Textbox(label="Enter your query")
55
  search_button = gr.Button("Search")
56
+ search_output = gr.Textbox(label="Results")
57
 
58
  upload_button.click(upload_and_index, inputs=file_input, outputs=upload_output)
59
  search_button.click(semantic_search, inputs=query_input, outputs=search_output)