Spaces:
Running
Running
saving changes for summary gen
Browse files
.gitignore
CHANGED
@@ -4,3 +4,6 @@ tools/__pycache__/retriever_tool.cpython-312.pyc
|
|
4 |
tools/__pycache__/search_tool.cpython-312.pyc
|
5 |
__pycache__/agent.cpython-312.pyc
|
6 |
data/validation_set_surgery_with_web.numbers
|
|
|
|
|
|
|
|
4 |
tools/__pycache__/search_tool.cpython-312.pyc
|
5 |
__pycache__/agent.cpython-312.pyc
|
6 |
data/validation_set_surgery_with_web.numbers
|
7 |
+
__pycache__/
|
8 |
+
*.pyc
|
9 |
+
debug.log
|
__pycache__/agent.cpython-312.pyc
DELETED
Binary file (12.2 kB)
|
|
tools/__pycache__/retriever_tool.cpython-312.pyc
DELETED
Binary file (7.48 kB)
|
|
tools/retriever_tool.py
CHANGED
@@ -83,6 +83,36 @@ class DocumentRetriever:
|
|
83 |
faiss.normalize_L2(batch_embeddings)
|
84 |
self.index.add(np.array(batch_embeddings))
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
def query(self, question, include_metadata=True):
|
87 |
try:
|
88 |
q_embedding = self.model.encode([question])
|
@@ -98,7 +128,9 @@ class DocumentRetriever:
|
|
98 |
|
99 |
if include_metadata and idx < len(self.metadata):
|
100 |
meta = self.metadata[idx]
|
101 |
-
|
|
|
|
|
102 |
else:
|
103 |
doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
104 |
|
|
|
83 |
faiss.normalize_L2(batch_embeddings)
|
84 |
self.index.add(np.array(batch_embeddings))
|
85 |
|
86 |
+
# def query(self, question, include_metadata=True):
|
87 |
+
# try:
|
88 |
+
# q_embedding = self.model.encode([question])
|
89 |
+
# faiss.normalize_L2(q_embedding)
|
90 |
+
|
91 |
+
# k = min(self.top_k * 2, len(self.texts))
|
92 |
+
# scores, indices = self.index.search(np.array(q_embedding), k)
|
93 |
+
|
94 |
+
# results = []
|
95 |
+
# for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
|
96 |
+
# if idx != -1 and score >= self.similarity_threshold and i < self.top_k:
|
97 |
+
# doc_text = self.texts[idx]
|
98 |
+
|
99 |
+
# if include_metadata and idx < len(self.metadata):
|
100 |
+
# meta = self.metadata[idx]
|
101 |
+
# doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
|
102 |
+
# else:
|
103 |
+
# doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
104 |
+
|
105 |
+
# results.append(doc_info)
|
106 |
+
|
107 |
+
# gc.collect()
|
108 |
+
|
109 |
+
# if not results:
|
110 |
+
# return "No relevant documents found for this query."
|
111 |
+
|
112 |
+
# return "\n\n" + "-"*80 + "\n\n".join(results)
|
113 |
+
# except Exception as e:
|
114 |
+
# return f"Error during retrieval: {str(e)}"
|
115 |
+
|
116 |
def query(self, question, include_metadata=True):
|
117 |
try:
|
118 |
q_embedding = self.model.encode([question])
|
|
|
128 |
|
129 |
if include_metadata and idx < len(self.metadata):
|
130 |
meta = self.metadata[idx]
|
131 |
+
# Add description to the output
|
132 |
+
description = meta.get('description', 'No description available')
|
133 |
+
doc_info = f"[Document {i+1}] (Score: {score:.2f})\nSpecialty: {meta.get('medical_specialty', 'Unknown')}\nSample: {meta.get('sample_name', 'Unknown')}\nDescription: {description}\n\n{doc_text}"
|
134 |
else:
|
135 |
doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
|
136 |
|