CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 24 days ago

Commit

ad85a12

verified ·

1 Parent(s): 6e39ead

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -112

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import shutil
 import re
 from datetime import datetime
 import time
-from collections import defaultdict
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
@@ -52,88 +51,58 @@ def estimate_tokens(text: str) -> int:
     return len(text) // 3.5
-def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
-    data = {
-        'bookings': defaultdict(list),
-        'medications': defaultdict(list),
-        'diagnoses': defaultdict(list),
-        'tests': defaultdict(list),
-        'procedures': defaultdict(list),
-        'doctors': set(),
-        'timeline': []
-    }
-    df = df.sort_values('Interview Date')
-    for booking, group in df.groupby('Booking Number'):
-        for _, row in group.iterrows():
-            entry = {
-                'booking': booking,
-                'date': str(row['Interview Date']),
-                'doctor': str(row['Interviewer']),
-                'form': str(row['Form Name']),
-                'item': str(row['Form Item']),
-                'response': str(row['Item Response']),
-                'notes': str(row['Description'])
-            }
-            data['bookings'][booking].append(entry)
-            data['timeline'].append(entry)
-            data['doctors'].add(entry['doctor'])
-            form_lower = entry['form'].lower()
-            if 'medication' in form_lower or 'drug' in form_lower:
-                data['medications'][entry['item']].append(entry)
-            elif 'diagnosis' in form_lower or 'condition' in form_lower:
-                data['diagnoses'][entry['item']].append(entry)
-            elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
-                data['tests'][entry['item']].append(entry)
-            elif 'procedure' in form_lower or 'surgery' in form_lower:
-                data['procedures'][entry['item']].append(entry)
-    return data
-def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
-    prompt_lines = [
-        "### Patient Clinical Reasoning Task",
-        "",
-        "**Instructions for the AI model:**",
-        "You are a clinical assistant reviewing the complete timeline of a single patient.",
-        "Use the following structured timeline and medication history to identify:",
-        "- Missed diagnoses",
-        "- Medication errors or inconsistencies",
-        "- Lack of follow-up",
-        "- Inconsistencies between providers",
-        "- Any signs doctors may have overlooked",
-        "",
-        "**Patient History Timeline:**"
-    ]
-    for entry in patient_data['timeline']:
-        if entry['booking'] in bookings:
-            prompt_lines.append(
-                f"- [{entry['date']}] {entry['form']}: {entry['item']} → {entry['response']} ({entry['doctor']})"
-            )
-    prompt_lines.append("\n**Medication History:**")
-    for med, entries in patient_data['medications'].items():
-        history = " → ".join(
-            f"[{e['date']}] {e['response']}" for e in entries if e['booking'] in bookings
-        )
-        prompt_lines.append(f"- {med}: {history}")
-    prompt_lines.append("\n**Instructions:**")
-    prompt_lines.append("Analyze this data to generate clinical insights.")
-    prompt_lines.append("Structure your response as follows:\n")
-    prompt_lines.extend([
-        "### Diagnostic Patterns",
-        "### Medication Analysis",
-        "### Missed Opportunities",
-        "### Inconsistencies",
-        "### Recommendations"
-    ])
-    return "\n".join(prompt_lines)
 def init_agent():
@@ -187,48 +156,24 @@ def analyze(file):
         raise gr.Error("Please upload a file")
     try:
-        df = pd.read_excel(file.name)
-        patient_data = process_patient_data(df)
-        all_bookings = list(patient_data['bookings'].keys())
-        # Chunking logic based on estimated token limits
-        chunks = []
-        current_chunk = []
-        current_size = 0
-        for booking in all_bookings:
-            booking_entries = patient_data['bookings'][booking]
-            booking_prompt = generate_analysis_prompt(patient_data, [booking])
-            token_count = estimate_tokens(booking_prompt)
-            if current_size + token_count > MAX_TOKENS:
-                if current_chunk:
-                    chunks.append(current_chunk)
-                current_chunk = [booking]
-                current_size = token_count
-            else:
-                current_chunk.append(booking)
-                current_size += token_count
-        if current_chunk:
-            chunks.append(current_chunk)
         chunk_responses = []
         for chunk in chunks:
-            prompt = generate_analysis_prompt(patient_data, chunk) + "\n\n" + "\n".join([
-                "**Please analyze this part of the patient history.**",
-                "Focus on identifying patterns, issues, and possible missed opportunities."
-            ])
             chunk_responses.append(analyze_with_agent(agent, prompt))
-        final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key insights, missed diagnoses, medication issues, inconsistencies and follow-up recommendations in a clear and structured way."
         final_response = analyze_with_agent(agent, final_prompt)
-        full_report = f"# \U0001f9e0 Full Patient History Analysis\n\n{final_response}"
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
         with open(report_path, 'w') as f:
             f.write(full_report)
-        return [("user", "[Excel Uploaded: Processing Analysis...]"), ("assistant", full_report)], report_path
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")

 import re
 from datetime import datetime
 import time
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
     return len(text) // 3.5
+def extract_text_from_excel(file_path: str) -> str:
+    all_text = []
+    xls = pd.ExcelFile(file_path)
+    for sheet_name in xls.sheet_names:
+        df = xls.parse(sheet_name)
+        df = df.astype(str).fillna("")
+        rows = df.apply(lambda row: " | ".join(row), axis=1)
+        sheet_text = [f"[{sheet_name}] {line}" for line in rows]
+        all_text.extend(sheet_text)
+    return "\n".join(all_text)
+def split_text_into_chunks(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
+    lines = text.split("\n")
+    chunks = []
+    current_chunk = []
+    current_tokens = 0
+    for line in lines:
+        tokens = estimate_tokens(line)
+        if current_tokens + tokens > max_tokens:
+            chunks.append("\n".join(current_chunk))
+            current_chunk = [line]
+            current_tokens = tokens
+        else:
+            current_chunk.append(line)
+            current_tokens += tokens
+    if current_chunk:
+        chunks.append("\n".join(current_chunk))
+    return chunks
+def build_prompt_from_text(chunk: str) -> str:
+    return f"""
+### Unstructured Clinical Records
+You are reviewing unstructured, mixed-format clinical documentation from various forms, tables, and sheets.
+**Objective:** Identify patterns, missed diagnoses, inconsistencies, and follow-up gaps.
+Here is the extracted content chunk:
+{chunk}
+Please analyze the above and provide:
+- Diagnostic Patterns
+- Medication Issues
+- Missed Opportunities
+- Inconsistencies
+- Follow-up Recommendations
+"""
 def init_agent():
         raise gr.Error("Please upload a file")
     try:
+        extracted_text = extract_text_from_excel(file.name)
+        chunks = split_text_into_chunks(extracted_text)
         chunk_responses = []
         for chunk in chunks:
+            prompt = build_prompt_from_text(chunk)
             chunk_responses.append(analyze_with_agent(agent, prompt))
+        final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key findings above."
         final_response = analyze_with_agent(agent, final_prompt)
+        full_report = f"# \U0001f9e0 Final Patient Report\n\n{final_response}"
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
         with open(report_path, 'w') as f:
             f.write(full_report)
+        return [("user", f"[Excel Uploaded: {file.name}]"), ("assistant", full_report)], report_path
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")