Ali2206 commited on
Commit
ad85a12
·
verified ·
1 Parent(s): 6e39ead

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -112
app.py CHANGED
@@ -9,7 +9,6 @@ import shutil
9
  import re
10
  from datetime import datetime
11
  import time
12
- from collections import defaultdict
13
 
14
  # Configuration and setup
15
  persistent_dir = "/data/hf_cache"
@@ -52,88 +51,58 @@ def estimate_tokens(text: str) -> int:
52
  return len(text) // 3.5
53
 
54
 
55
- def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
56
- data = {
57
- 'bookings': defaultdict(list),
58
- 'medications': defaultdict(list),
59
- 'diagnoses': defaultdict(list),
60
- 'tests': defaultdict(list),
61
- 'procedures': defaultdict(list),
62
- 'doctors': set(),
63
- 'timeline': []
64
- }
65
-
66
- df = df.sort_values('Interview Date')
67
- for booking, group in df.groupby('Booking Number'):
68
- for _, row in group.iterrows():
69
- entry = {
70
- 'booking': booking,
71
- 'date': str(row['Interview Date']),
72
- 'doctor': str(row['Interviewer']),
73
- 'form': str(row['Form Name']),
74
- 'item': str(row['Form Item']),
75
- 'response': str(row['Item Response']),
76
- 'notes': str(row['Description'])
77
- }
78
-
79
- data['bookings'][booking].append(entry)
80
- data['timeline'].append(entry)
81
- data['doctors'].add(entry['doctor'])
82
-
83
- form_lower = entry['form'].lower()
84
- if 'medication' in form_lower or 'drug' in form_lower:
85
- data['medications'][entry['item']].append(entry)
86
- elif 'diagnosis' in form_lower or 'condition' in form_lower:
87
- data['diagnoses'][entry['item']].append(entry)
88
- elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
89
- data['tests'][entry['item']].append(entry)
90
- elif 'procedure' in form_lower or 'surgery' in form_lower:
91
- data['procedures'][entry['item']].append(entry)
92
-
93
- return data
94
-
95
-
96
- def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
97
- prompt_lines = [
98
- "### Patient Clinical Reasoning Task",
99
- "",
100
- "**Instructions for the AI model:**",
101
- "You are a clinical assistant reviewing the complete timeline of a single patient.",
102
- "Use the following structured timeline and medication history to identify:",
103
- "- Missed diagnoses",
104
- "- Medication errors or inconsistencies",
105
- "- Lack of follow-up",
106
- "- Inconsistencies between providers",
107
- "- Any signs doctors may have overlooked",
108
- "",
109
- "**Patient History Timeline:**"
110
- ]
111
-
112
- for entry in patient_data['timeline']:
113
- if entry['booking'] in bookings:
114
- prompt_lines.append(
115
- f"- [{entry['date']}] {entry['form']}: {entry['item']} → {entry['response']} ({entry['doctor']})"
116
- )
117
-
118
- prompt_lines.append("\n**Medication History:**")
119
- for med, entries in patient_data['medications'].items():
120
- history = " → ".join(
121
- f"[{e['date']}] {e['response']}" for e in entries if e['booking'] in bookings
122
- )
123
- prompt_lines.append(f"- {med}: {history}")
124
 
125
- prompt_lines.append("\n**Instructions:**")
126
- prompt_lines.append("Analyze this data to generate clinical insights.")
127
- prompt_lines.append("Structure your response as follows:\n")
128
- prompt_lines.extend([
129
- "### Diagnostic Patterns",
130
- "### Medication Analysis",
131
- "### Missed Opportunities",
132
- "### Inconsistencies",
133
- "### Recommendations"
134
- ])
135
 
136
- return "\n".join(prompt_lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
 
139
  def init_agent():
@@ -187,48 +156,24 @@ def analyze(file):
187
  raise gr.Error("Please upload a file")
188
 
189
  try:
190
- df = pd.read_excel(file.name)
191
- patient_data = process_patient_data(df)
192
- all_bookings = list(patient_data['bookings'].keys())
193
-
194
- # Chunking logic based on estimated token limits
195
- chunks = []
196
- current_chunk = []
197
- current_size = 0
198
-
199
- for booking in all_bookings:
200
- booking_entries = patient_data['bookings'][booking]
201
- booking_prompt = generate_analysis_prompt(patient_data, [booking])
202
- token_count = estimate_tokens(booking_prompt)
203
- if current_size + token_count > MAX_TOKENS:
204
- if current_chunk:
205
- chunks.append(current_chunk)
206
- current_chunk = [booking]
207
- current_size = token_count
208
- else:
209
- current_chunk.append(booking)
210
- current_size += token_count
211
-
212
- if current_chunk:
213
- chunks.append(current_chunk)
214
 
215
  chunk_responses = []
216
  for chunk in chunks:
217
- prompt = generate_analysis_prompt(patient_data, chunk) + "\n\n" + "\n".join([
218
- "**Please analyze this part of the patient history.**",
219
- "Focus on identifying patterns, issues, and possible missed opportunities."
220
- ])
221
  chunk_responses.append(analyze_with_agent(agent, prompt))
222
 
223
- final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key insights, missed diagnoses, medication issues, inconsistencies and follow-up recommendations in a clear and structured way."
224
  final_response = analyze_with_agent(agent, final_prompt)
225
- full_report = f"# \U0001f9e0 Full Patient History Analysis\n\n{final_response}"
 
226
 
227
  report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
228
  with open(report_path, 'w') as f:
229
  f.write(full_report)
230
 
231
- return [("user", "[Excel Uploaded: Processing Analysis...]"), ("assistant", full_report)], report_path
232
 
233
  except Exception as e:
234
  raise gr.Error(f"Error: {str(e)}")
 
9
  import re
10
  from datetime import datetime
11
  import time
 
12
 
13
  # Configuration and setup
14
  persistent_dir = "/data/hf_cache"
 
51
  return len(text) // 3.5
52
 
53
 
54
+ def extract_text_from_excel(file_path: str) -> str:
55
+ all_text = []
56
+ xls = pd.ExcelFile(file_path)
57
+ for sheet_name in xls.sheet_names:
58
+ df = xls.parse(sheet_name)
59
+ df = df.astype(str).fillna("")
60
+ rows = df.apply(lambda row: " | ".join(row), axis=1)
61
+ sheet_text = [f"[{sheet_name}] {line}" for line in rows]
62
+ all_text.extend(sheet_text)
63
+ return "\n".join(all_text)
64
+
65
+
66
+ def split_text_into_chunks(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
67
+ lines = text.split("\n")
68
+ chunks = []
69
+ current_chunk = []
70
+ current_tokens = 0
71
+
72
+ for line in lines:
73
+ tokens = estimate_tokens(line)
74
+ if current_tokens + tokens > max_tokens:
75
+ chunks.append("\n".join(current_chunk))
76
+ current_chunk = [line]
77
+ current_tokens = tokens
78
+ else:
79
+ current_chunk.append(line)
80
+ current_tokens += tokens
81
+
82
+ if current_chunk:
83
+ chunks.append("\n".join(current_chunk))
84
+ return chunks
85
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ def build_prompt_from_text(chunk: str) -> str:
88
+ return f"""
89
+ ### Unstructured Clinical Records
 
 
 
 
 
 
 
90
 
91
+ You are reviewing unstructured, mixed-format clinical documentation from various forms, tables, and sheets.
92
+
93
+ **Objective:** Identify patterns, missed diagnoses, inconsistencies, and follow-up gaps.
94
+
95
+ Here is the extracted content chunk:
96
+
97
+ {chunk}
98
+
99
+ Please analyze the above and provide:
100
+ - Diagnostic Patterns
101
+ - Medication Issues
102
+ - Missed Opportunities
103
+ - Inconsistencies
104
+ - Follow-up Recommendations
105
+ """
106
 
107
 
108
  def init_agent():
 
156
  raise gr.Error("Please upload a file")
157
 
158
  try:
159
+ extracted_text = extract_text_from_excel(file.name)
160
+ chunks = split_text_into_chunks(extracted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  chunk_responses = []
163
  for chunk in chunks:
164
+ prompt = build_prompt_from_text(chunk)
 
 
 
165
  chunk_responses.append(analyze_with_agent(agent, prompt))
166
 
167
+ final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key findings above."
168
  final_response = analyze_with_agent(agent, final_prompt)
169
+
170
+ full_report = f"# \U0001f9e0 Final Patient Report\n\n{final_response}"
171
 
172
  report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
173
  with open(report_path, 'w') as f:
174
  f.write(full_report)
175
 
176
+ return [("user", f"[Excel Uploaded: {file.name}]"), ("assistant", full_report)], report_path
177
 
178
  except Exception as e:
179
  raise gr.Error(f"Error: {str(e)}")