atrmkj commited on
Commit
c27cd83
·
1 Parent(s): a5ddb69

saving changes for summary gen

Browse files
.gitignore CHANGED
@@ -4,3 +4,6 @@ tools/__pycache__/retriever_tool.cpython-312.pyc
4
  tools/__pycache__/search_tool.cpython-312.pyc
5
  __pycache__/agent.cpython-312.pyc
6
  data/validation_set_surgery_with_web.numbers
 
 
 
 
4
  tools/__pycache__/search_tool.cpython-312.pyc
5
  __pycache__/agent.cpython-312.pyc
6
  data/validation_set_surgery_with_web.numbers
7
+ __pycache__/
8
+ *.pyc
9
+ debug.log
__pycache__/agent.cpython-312.pyc DELETED
Binary file (12.2 kB)
 
tools/__pycache__/retriever_tool.cpython-312.pyc DELETED
Binary file (7.48 kB)
 
tools/retriever_tool.py CHANGED
@@ -83,6 +83,36 @@ class DocumentRetriever:
83
  faiss.normalize_L2(batch_embeddings)
84
  self.index.add(np.array(batch_embeddings))
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def query(self, question, include_metadata=True):
87
  try:
88
  q_embedding = self.model.encode([question])
@@ -98,7 +128,9 @@ class DocumentRetriever:
98
 
99
  if include_metadata and idx < len(self.metadata):
100
  meta = self.metadata[idx]
101
- doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
 
 
102
  else:
103
  doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
104
 
 
83
  faiss.normalize_L2(batch_embeddings)
84
  self.index.add(np.array(batch_embeddings))
85
 
86
+ # def query(self, question, include_metadata=True):
87
+ # try:
88
+ # q_embedding = self.model.encode([question])
89
+ # faiss.normalize_L2(q_embedding)
90
+
91
+ # k = min(self.top_k * 2, len(self.texts))
92
+ # scores, indices = self.index.search(np.array(q_embedding), k)
93
+
94
+ # results = []
95
+ # for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
96
+ # if idx != -1 and score >= self.similarity_threshold and i < self.top_k:
97
+ # doc_text = self.texts[idx]
98
+
99
+ # if include_metadata and idx < len(self.metadata):
100
+ # meta = self.metadata[idx]
101
+ # doc_info = f"[Document {i+1}] (Score: {score:.2f}, Specialty: {meta.get('medical_specialty', 'Unknown')}, Sample: {meta.get('sample_name', 'Unknown')})\n\n{doc_text}"
102
+ # else:
103
+ # doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
104
+
105
+ # results.append(doc_info)
106
+
107
+ # gc.collect()
108
+
109
+ # if not results:
110
+ # return "No relevant documents found for this query."
111
+
112
+ # return "\n\n" + "-"*80 + "\n\n".join(results)
113
+ # except Exception as e:
114
+ # return f"Error during retrieval: {str(e)}"
115
+
116
  def query(self, question, include_metadata=True):
117
  try:
118
  q_embedding = self.model.encode([question])
 
128
 
129
  if include_metadata and idx < len(self.metadata):
130
  meta = self.metadata[idx]
131
+ # Add description to the output
132
+ description = meta.get('description', 'No description available')
133
+ doc_info = f"[Document {i+1}] (Score: {score:.2f})\nSpecialty: {meta.get('medical_specialty', 'Unknown')}\nSample: {meta.get('sample_name', 'Unknown')}\nDescription: {description}\n\n{doc_text}"
134
  else:
135
  doc_info = f"[Document {i+1}] (Score: {score:.2f})\n\n{doc_text}"
136