CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 26 days ago

Commit

d184610

verified ·

1 Parent(s): 4a93687

Update app.py

Browse files

Files changed (1) hide show

app.py +214 -223

app.py CHANGED Viewed

@@ -1,23 +1,16 @@
 import sys
 import os
-import polars as pl
 import json
 import gradio as gr
-from typing import List, Tuple
 import hashlib
 import shutil
 import re
 from datetime import datetime
 import time
-import asyncio
-import aiofiles
-import cachetools
-import logging
 import markdown
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
@@ -40,226 +33,185 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-# Cache for processed data
-cache = cachetools.LRUCache(maxsize=100)
 def file_hash(path: str) -> str:
-    """Generate MD5 hash of a file."""
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
 def clean_response(text: str) -> str:
-    """Clean text by removing unwanted characters and normalizing."""
     try:
         text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
     except UnicodeError:
         text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
-async def load_and_clean_data(file_path: str) -> pl.DataFrame:
-    """Load and clean Excel data using polars."""
-    try:
-        logger.info(f"Loading Excel file: {file_path}")
-        df = pl.read_excel(file_path).with_columns([
-            pl.col(col).str.strip_chars().fill_null("").alias(col) for col in [
-                "Booking Number", "Form Name", "Form Item", "Item Response",
-                "Interviewer", "Interview Date", "Description"
-            ]
-        ]).filter(pl.col("Booking Number").str.starts_with("BKG"))
-        logger.info(f"Loaded {len(df)} records")
-        return df
-    except Exception as e:
-        logger.error(f"Error loading data: {str(e)}")
-        raise
-def generate_summary(df: pl.DataFrame) -> tuple[str, dict]:
-    """Generate summary statistics and interesting fact."""
-    symptom_counts = {}
-    for desc in df["Description"]:
-        desc = desc.lower()
-        if "chest discomfort" in desc:
-            symptom_counts["Chest Discomfort"] = symptom_counts.get("Chest Discomfort", 0) + 1
-        if "headaches" in desc:
-            symptom_counts["Headaches"] = symptom_counts.get("Headaches", 0) + 1
-        if "weight loss" in desc:
-            symptom_counts["Weight Loss"] = symptom_counts.get("Weight Loss", 0) + 1
-        if "back pain" in desc:
-            symptom_counts["Chronic Back Pain"] = symptom_counts.get("Chronic Back Pain", 0) + 1
-        if "cough" in desc:
-            symptom_counts["Persistent Cough"] = symptom_counts.get("Persistent Cough", 0) + 1
-    total_records = len(df)
-    unique_bookings = df["Booking Number"].n_unique()
-    interesting_fact = (
-        f"Chest discomfort was reported in {symptom_counts.get('Chest Discomfort', 0)} records, "
-        "frequently leading to ECG/lab referrals. Inconsistent follow-up documentation raises "
-        "concerns about potential missed cardiovascular diagnoses."
-    )
-    summary = (
-        f"## Summary\n\n"
-        f"Analyzed {total_records:,} patient records from {unique_bookings:,} unique bookings in 2023. "
-        f"Key findings include a high prevalence of chest discomfort ({symptom_counts.get('Chest Discomfort', 0)} instances), "
-        f"suggesting possible underdiagnosis of cardiovascular issues.\n\n"
-        f"### Interesting Fact\n{interesting_fact}\n"
     )
-    return summary, symptom_counts
-def prepare_aggregate_prompt(df: pl.DataFrame) -> str:
-    """Prepare a single prompt for all patient data."""
-    groups = df.group_by("Booking Number").agg([
-        pl.col("Form Name"), pl.col("Form Item"),
-        pl.col("Item Response"), pl.col("Interviewer"),
-        pl.col("Interview Date"), pl.col("Description")
-    ])
-    records = []
-    for booking in groups.iter_rows(named=True):
-        booking_id = booking["Booking Number"]
-        for i in range(len(booking["Form Name"])):
-            record = (
-                f"- {booking['Form Name'][i]}: {booking['Form Item'][i]} = {booking['Item Response'][i]} "
-                f"({booking['Interview Date'][i]} by {booking['Interviewer'][i]})\n{booking['Description'][i]}"
-            )
-            records.append(clean_response(record))
-    record_text = "\n".join(records)
     prompt = f"""
-Patient Medical History Analysis
-Instructions:
-Analyze the following aggregated patient data from all bookings to identify potential missed diagnoses, medication conflicts, incomplete assessments, and urgent follow-up needs across the entire dataset. Provide a comprehensive summary under the specified markdown headings. Focus on patterns and recurring issues across multiple patients.
-Data:
-{record_text}
 ### Missed Diagnoses
-- ...
-### Medication Conflicts
-- ...
-### Incomplete Assessments
-- ...
 ### Urgent Follow-up
-- ...
 """
     return prompt
 def init_agent():
-    """Initialize TxAgent with tool configuration."""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
-    try:
-        agent = TxAgent(
-            model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-            rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-            tool_files_dict={"new_tool": target_tool_path},
-            force_finish=True,
-            enable_checker=True,
-            step_rag_num=4,
-            seed=100,
-            additional_default_tools=[],
-        )
-        agent.init_model()
-        return agent
-    except Exception as e:
-        logger.error(f"Failed to initialize TxAgent: {str(e)}")
-        raise
-async def generate_report(agent, df: pl.DataFrame, file_hash_value: str) -> tuple[str, str]:
-    """Generate a comprehensive markdown report."""
-    logger.info("Generating comprehensive report...")
-    report_path = os.path.join(report_dir, f"{file_hash_value}_report.md")
-    # Generate summary
-    summary, symptom_counts = generate_summary(df)
-    # Prepare and run aggregated analysis
-    prompt = prepare_aggregate_prompt(df)
-    full_output = ""
-    try:
-        chunk_output = ""
-        for result in agent.run_gradio_chat(
-            message=prompt,
-            history=[],
-            temperature=0.2,
-            max_new_tokens=2048,
-            max_token=8192,
-            call_agent=False,
-            conversation=[],
-        ):
-            if isinstance(result, list):
-                for r in result:
-                    if hasattr(r, 'content') and r.content:
-                        cleaned = clean_response(r.content)
-                        chunk_output += cleaned + "\n"
-            elif isinstance(result, str):
-                cleaned = clean_response(result)
-                chunk_output += cleaned + "\n"
-            full_output = chunk_output.strip()
-            yield full_output, None  # Stream partial results
-        # Filter out empty sections
-        sections = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
-        filtered_output = []
-        current_section = None
-        for line in full_output.split("\n"):
-            if any(line.startswith(f"### {section}") for section in sections):
-                current_section = line
-                filtered_output.append(line)
-            elif current_section and line.strip().startswith("-") and line.strip() != "- ...":
-                filtered_output.append(line)
-        # Compile final report
-        final_output = summary + "## Clinical Findings\n\n"
-        if filtered_output:
-            final_output += "\n".join(filtered_output) + "\n\n"
-        else:
-            final_output += "No significant clinical findings identified.\n\n"
-        final_output += (
-            "## Conclusion\n\n"
-            "The analysis reveals significant gaps in patient care, including potential missed cardiovascular diagnoses "
-            "due to inconsistent follow-up on chest discomfort and elevated vitals. Low medication adherence is a recurring "
-            "issue, likely impacting treatment efficacy. Incomplete assessments, particularly missing vital signs, hinder "
-            "comprehensive care. Urgent follow-up is recommended for patients with chest discomfort and elevated vitals to "
-            "prevent adverse outcomes."
-        )
-        # Save report
-        async with aiofiles.open(report_path, "w") as f:
-            await f.write(final_output)
-        logger.info(f"Report saved to {report_path}")
-        yield final_output, report_path
-    except Exception as e:
-        logger.error(f"Error generating report: {str(e)}")
-        yield f"Error: {str(e)}", None
 def create_ui(agent):
-    """Create Gradio interface for clinical oversight analysis."""
-    with gr.Blocks(
-        theme=gr.themes.Soft(),
-        title="Clinical Oversight Assistant",
-        css="""
-            .gradio-container {max-width: 1000px; margin: auto; font-family: Arial, sans-serif;}
-            #chatbot {border: 1px solid #e5e7eb; border-radius: 8px; padding: 10px; background: #f9fafb;}
-            .markdown {white-space: pre-wrap;}
-        """
-    ) as demo:
-        gr.Markdown("# 🏥 Clinical Oversight Assistant (Excel Optimized)")
         with gr.Tabs():
             with gr.TabItem("Analysis"):
@@ -268,7 +220,7 @@ def create_ui(agent):
                     with gr.Column(scale=1):
                         file_upload = gr.File(
                             label="Upload Excel File",
-                            file_types=[".xlsx"],
                             file_count="single",
                             interactive=True
                         )
@@ -288,7 +240,7 @@ def create_ui(agent):
                             height=600,
                             bubble_full_width=False,
                             show_copy_button=True,
-                            elem_id="chatbot"
                         )
                         download_output = gr.File(
                             label="Download Full Report",
@@ -301,65 +253,107 @@ def create_ui(agent):
                 1. **Upload Excel File**: Select your patient records Excel file
                 2. **Add Instructions** (Optional): Provide any specific analysis requests
-                3. **Click Analyze**: The system will process all patient records and generate a comprehensive report
                 4. **Review Results**: Analysis appears in the chat window
-                5. **Download Report**: Get a full markdown report of all findings
                 ### Excel File Requirements
                 Your Excel file must contain these columns:
-                - Booking Number
-                - Form Name
-                - Form Item
-                - Item Response
-                - Interview Date
-                - Interviewer
-                - Description
                 ### Analysis Includes
-                - Missed diagnoses
-                - Medication conflicts
-                - Incomplete assessments
-                - Urgent follow-up needs
                 """)
         def format_message(role: str, content: str) -> Tuple[str, str]:
-            """Format messages for the chatbot in (user, bot) format."""
             if role == "user":
                 return (content, None)
             else:
                 return (None, content)
-        async def analyze(message: str, chat_history: List[Tuple[str, str]], file) -> Tuple[List[Tuple[str, str]], str]:
-            """Analyze uploaded file and generate comprehensive report."""
             if not file:
                 raise gr.Error("Please upload an Excel file first")
             try:
-                # Initialize chat history
                 new_history = chat_history + [format_message("user", message)]
                 new_history.append(format_message("assistant", "⏳ Processing Excel data..."))
                 yield new_history, None
-                # Load and clean data
-                df = await load_and_clean_data(file.name)
                 file_hash_value = file_hash(file.name)
-                # Generate report
-                async for output, report_path in generate_report(agent, df, file_hash_value):
-                    if output:
-                        new_history[-1] = format_message("assistant", output)
-                        yield new_history, report_path
-                    else:
-                        yield new_history, report_path
             except Exception as e:
-                logger.error(f"Analysis failed: {str(e)}")
                 new_history.append(format_message("assistant", f"❌ Error: {str(e)}"))
                 yield new_history, None
                 raise gr.Error(f"Analysis failed: {str(e)}")
         def clear_chat():
-            """Clear chat history and download output."""
             return [], None
         # Event handlers
@@ -367,15 +361,13 @@ def create_ui(agent):
             analyze,
             inputs=[msg_input, chatbot, file_upload],
             outputs=[chatbot, download_output],
-            api_name="analyze",
-            queue=True
         )
         msg_input.submit(
             analyze,
             inputs=[msg_input, chatbot, file_upload],
-            outputs=[chatbot, download_output],
-            queue=True
         )
         clear_btn.click(
@@ -402,6 +394,5 @@ if __name__ == "__main__":
             share=False
         )
     except Exception as e:
-        logger.error(f"Failed to launch application: {str(e)}")
         print(f"Failed to launch application: {str(e)}")
         sys.exit(1)

 import sys
 import os
+import pandas as pd
 import json
 import gradio as gr
+from typing import List, Tuple, Dict, Any
 import hashlib
 import shutil
 import re
 from datetime import datetime
 import time
 import markdown
+from collections import defaultdict
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
 from txagent.txagent import TxAgent
 def file_hash(path: str) -> str:
+    """Generate MD5 hash of file contents"""
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
 def clean_response(text: str) -> str:
+    """Clean and normalize text output"""
     try:
         text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
     except UnicodeError:
         text = text.encode('utf-8', 'replace').decode('utf-8')
+    # Remove unwanted patterns and normalize whitespace
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
+def extract_medical_data(df: pd.DataFrame) -> Dict[str, Any]:
+    """Extract and organize medical data from DataFrame"""
+    medical_data = defaultdict(list)
+    for _, row in df.iterrows():
+        record = {
+            'form_name': row.get('Form Name', ''),
+            'form_item': row.get('Form Item', ''),
+            'response': row.get('Item Response', ''),
+            'date': row.get('Interview Date', ''),
+            'interviewer': row.get('Interviewer', ''),
+            'description': row.get('Description', '')
+        }
+        medical_data[row['Booking Number']].append(record)
+    return medical_data
+def identify_red_flags(records: List[Dict[str, Any]]) -> Dict[str, List[str]]:
+    """Identify potential red flags in medical records"""
+    red_flags = {
+        'symptoms': defaultdict(list),
+        'medications': defaultdict(list),
+        'diagnoses': defaultdict(list),
+        'vitals': defaultdict(list),
+        'labs': defaultdict(list)
+    }
+    for record in records:
+        form_name = record['form_name'].lower()
+        item = record['form_item'].lower()
+        response = record['response'].lower()
+        # Symptom patterns
+        if 'pain' in item or 'symptom' in form_name:
+            if 'severe' in response or 'chronic' in response:
+                red_flags['symptoms'][item].append(response)
+        # Medication checks
+        elif 'medication' in form_name or 'drug' in form_name:
+            if 'interaction' in response or 'allergy' in response:
+                red_flags['medications'][item].append(response)
+        # Diagnosis inconsistencies
+        elif 'diagnosis' in form_name:
+            if 'rule out' in response or 'possible' in response:
+                red_flags['diagnoses'][item].append(response)
+        # Abnormal vitals
+        elif 'vital' in form_name:
+            try:
+                value = float(re.search(r'\d+\.?\d*', response).group())
+                if ('blood pressure' in item and value > 140) or \
+                   ('heart rate' in item and (value < 50 or value > 100)) or \
+                   ('temperature' in item and value > 38):
+                    red_flags['vitals'][item].append(response)
+            except:
+                pass
+        # Abnormal labs
+        elif 'lab' in form_name or 'test' in form_name:
+            if 'abnormal' in response or 'high' in response or 'low' in response:
+                red_flags['labs'][item].append(response)
+    return red_flags
+def generate_analysis_prompt(booking: str, records: List[Dict[str, Any]], red_flags: Dict[str, Any]]) -> str:
+    """Generate structured prompt for analysis"""
+    records_text = "\n".join(
+        f"- {r['form_name']}: {r['form_item']} = {r['response']} ({r['date']} by {r['interviewer']})\n  {r['description']}"
+        for r in records
     )
+    red_flags_text = "\n".join(
+        f"### {category.capitalize()} Red Flags\n" + "\n".join(
+            f"- {item}: {', '.join(responses)}"
+            for item, responses in items.items()
+        )
+        for category, items in red_flags.items() if items
+    )
     prompt = f"""
+**Patient Booking Number**: {booking}
+**Medical Records Summary**:
+{records_text}
+**Identified Red Flags**:
+{red_flags_text if red_flags_text else "No obvious red flags detected"}
+**Comprehensive Analysis Instructions**:
+1. Review all medical data and red flags above
+2. Identify any potential missed diagnoses based on symptoms, labs, and clinical findings
+3. Check for medication conflicts or inappropriate prescriptions
+4. Note any incomplete assessments or missing diagnostic workups
+5. Flag any urgent follow-up needs or critical findings
+6. Provide recommendations in clear, actionable terms
+**Required Output Format**:
 ### Missed Diagnoses
+- [List any conditions that may have been overlooked based on the data]
+### Medication Issues
+- [List any medication conflicts, inappropriate prescriptions, or missing medications]
+### Assessment Gaps
+- [List any incomplete assessments or missing diagnostic tests]
 ### Urgent Follow-up
+- [List any findings requiring immediate attention]
+### Clinical Recommendations
+- [Provide specific recommendations for next steps]
 """
     return prompt
+def parse_excel_to_prompts(file_path: str) -> List[Tuple[str, str]]:
+    """Parse Excel file into analysis prompts with red flag detection"""
+    try:
+        xl = pd.ExcelFile(file_path)
+        df = xl.parse(xl.sheet_names[0], header=0).fillna("")
+        medical_data = extract_medical_data(df)
+        prompts = []
+        for booking, records in medical_data.items():
+            red_flags = identify_red_flags(records)
+            prompt = generate_analysis_prompt(booking, records, red_flags)
+            prompts.append((booking, prompt))
+        return prompts
+    except Exception as e:
+        raise ValueError(f"Error parsing Excel file: {str(e)}")
 def init_agent():
+    """Initialize the TxAgent with appropriate settings"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=4,
+        seed=100,
+        additional_default_tools=[],
+    )
+    agent.init_model()
+    return agent
+def format_markdown(text: str) -> str:
+    """Convert markdown text to HTML for better display"""
+    return markdown.markdown(text, extensions=['fenced_code', 'tables'])
 def create_ui(agent):
+    """Create Gradio UI interface"""
+    with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
+        gr.Markdown("# 🏥 Clinical Oversight Assistant (Missed Diagnosis Detection)")
         with gr.Tabs():
             with gr.TabItem("Analysis"):
                     with gr.Column(scale=1):
                         file_upload = gr.File(
                             label="Upload Excel File",
+                            file_types=[".xlsx"],
                             file_count="single",
                             interactive=True
                         )
                             height=600,
                             bubble_full_width=False,
                             show_copy_button=True,
+                            render_markdown=True
                         )
                         download_output = gr.File(
                             label="Download Full Report",
                 1. **Upload Excel File**: Select your patient records Excel file
                 2. **Add Instructions** (Optional): Provide any specific analysis requests
+                3. **Click Analyze**: The system will process each patient record
                 4. **Review Results**: Analysis appears in the chat window
+                5. **Download Report**: Get a full text report of all findings
                 ### Excel File Requirements
                 Your Excel file must contain these columns:
+                - Booking Number (patient identifier)
+                - Form Name (type of medical form)
+                - Form Item (specific field name)
+                - Item Response (patient response or value)
+                - Interview Date (date of recording)
+                - Interviewer (who recorded the data)
+                - Description (additional notes)
                 ### Analysis Includes
+                - **Missed diagnoses**: Potential conditions not identified
+                - **Medication issues**: Conflicts, side effects, inappropriate prescriptions
+                - **Assessment gaps**: Missing tests or incomplete evaluations
+                - **Urgent follow-up**: Critical findings needing immediate attention
+                - **Clinical recommendations**: Actionable next steps
                 """)
         def format_message(role: str, content: str) -> Tuple[str, str]:
+            """Format messages for the chatbot in (user, bot) format"""
             if role == "user":
                 return (content, None)
             else:
                 return (None, content)
+        def analyze(message: str, chat_history: List[Tuple[str, str]], file) -> Tuple[List[Tuple[str, str]], str]:
+            """Main analysis function"""
             if not file:
                 raise gr.Error("Please upload an Excel file first")
             try:
+                # Initialize chat history with user message
                 new_history = chat_history + [format_message("user", message)]
                 new_history.append(format_message("assistant", "⏳ Processing Excel data..."))
                 yield new_history, None
+                prompts = parse_excel_to_prompts(file.name)
+                full_output = ""
+                for idx, (booking, prompt) in enumerate(prompts, 1):
+                    chunk_output = ""
+                    try:
+                        for result in agent.run_gradio_chat(
+                            message=prompt,
+                            history=[],
+                            temperature=0.2,
+                            max_new_tokens=1024,
+                            max_token=4096,
+                            call_agent=False,
+                            conversation=[],
+                        ):
+                            if isinstance(result, list):
+                                for r in result:
+                                    if hasattr(r, 'content') and r.content:
+                                        cleaned = clean_response(r.content)
+                                        chunk_output += cleaned + "\n"
+                            elif isinstance(result, str):
+                                cleaned = clean_response(result)
+                                chunk_output += cleaned + "\n"
+                            if chunk_output:
+                                output = f"## Patient Booking: {booking}\n{chunk_output.strip()}\n"
+                                new_history[-1] = format_message("assistant", output)
+                                yield new_history, None
+                    except Exception as e:
+                        error_msg = f"⚠️ Error processing booking {booking}: {str(e)}"
+                        new_history.append(format_message("assistant", error_msg))
+                        yield new_history, None
+                        continue
+                    if chunk_output:
+                        output = f"## Patient Booking: {booking}\n{chunk_output.strip()}\n"
+                        new_history.append(format_message("assistant", output))
+                        full_output += output + "\n"
+                        yield new_history, None
+                # Save report
                 file_hash_value = file_hash(file.name)
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                report_path = os.path.join(report_dir, f"{file_hash_value}_{timestamp}_report.md")
+                with open(report_path, "w", encoding="utf-8") as f:
+                    f.write("# Clinical Oversight Analysis Report\n\n")
+                    f.write(f"**Generated on**: {timestamp}\n\n")
+                    f.write(f"**Source file**: {file.name}\n\n")
+                    f.write(full_output)
+                yield new_history, report_path if os.path.exists(report_path) else None
             except Exception as e:
                 new_history.append(format_message("assistant", f"❌ Error: {str(e)}"))
                 yield new_history, None
                 raise gr.Error(f"Analysis failed: {str(e)}")
         def clear_chat():
+            """Clear chat history and outputs"""
             return [], None
         # Event handlers
             analyze,
             inputs=[msg_input, chatbot, file_upload],
             outputs=[chatbot, download_output],
+            api_name="analyze"
         )
         msg_input.submit(
             analyze,
             inputs=[msg_input, chatbot, file_upload],
+            outputs=[chatbot, download_output]
         )
         clear_btn.click(
             share=False
         )
     except Exception as e:
         print(f"Failed to launch application: {str(e)}")
         sys.exit(1)