CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 16 days ago

Commit

a1a096d

verified ·

1 Parent(s): c5da27e

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -198

app.py CHANGED Viewed

@@ -1,27 +1,26 @@
-import sys
-import os
 import pandas as pd
-import json
-import gradio as gr
-from typing import List, Tuple, Dict, Any, Union
-import hashlib
-import shutil
-import re
 from datetime import datetime
-import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
-# Configuration and setup
-persistent_dir = "/data/hf_cache"
-os.makedirs(persistent_dir, exist_ok=True)
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
-for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
-    os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
@@ -32,62 +31,47 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-MAX_MODEL_TOKENS = 32768
-MAX_CHUNK_TOKENS = 8192
-MAX_NEW_TOKENS = 2048
-PROMPT_OVERHEAD = 500
 def clean_response(text: str) -> str:
-    try:
-        text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
-    except UnicodeError:
-        text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
-def estimate_tokens(text: str) -> int:
-    return len(text) // 3.5 + 1
-def extract_text_from_excel(file_path: str) -> str:
     all_text = []
     try:
-        xls = pd.ExcelFile(file_path)
-        for sheet_name in xls.sheet_names:
-            df = xls.parse(sheet_name)
-            df = df.astype(str).fillna("")
             rows = df.apply(lambda row: " | ".join(row), axis=1)
-            sheet_text = [f"[{sheet_name}] {line}" for line in rows]
-            all_text.extend(sheet_text)
     except Exception as e:
-        raise ValueError(f"Failed to extract text from Excel file: {str(e)}")
     return "\n".join(all_text)
-def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
-    effective_max_tokens = max_tokens - PROMPT_OVERHEAD
-    if effective_max_tokens <= 0:
-        raise ValueError("Effective max tokens must be positive.")
-    lines = text.split("\n")
-    chunks, current_chunk, current_tokens = [], [], 0
-    for line in lines:
-        line_tokens = estimate_tokens(line)
-        if current_tokens + line_tokens > effective_max_tokens:
-            if current_chunk:
-                chunks.append("\n".join(current_chunk))
-            current_chunk, current_tokens = [line], line_tokens
         else:
-            current_chunk.append(line)
-            current_tokens += line_tokens
-    if current_chunk:
-        chunks.append("\n".join(current_chunk))
     return chunks
-def build_prompt_from_text(chunk: str) -> str:
-    return f"""
-### Unstructured Clinical Records
-Analyze the following clinical notes and provide a detailed, concise summary focusing on:
 - Diagnostic Patterns
 - Medication Issues
 - Missed Opportunities
@@ -99,179 +83,147 @@ Analyze the following clinical notes and provide a detailed, concise summary foc
 {chunk}
 ---
-Respond in well-structured bullet points with medical reasoning.
-"""
-def init_agent():
-    default_tool_path = os.path.abspath("data/new_tool.json")
-    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(target_tool_path):
-        shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
-        seed=100,
-        additional_default_tools=[]
     )
     agent.init_model()
     return agent
-def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
-    messages = chatbot_state if chatbot_state else []
-    report_path = None
-    if file is None or not hasattr(file, "name"):
-        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
-        return messages, report_path
-    try:
-        messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
-        extracted_text = extract_text_from_excel(file.name)
-        chunks = split_text_into_chunks(extracted_text)
-        chunk_responses = [None] * len(chunks)
-        def analyze_chunk(index: int, chunk: str) -> Tuple[int, str]:
-            prompt = build_prompt_from_text(chunk)
-            prompt_tokens = estimate_tokens(prompt)
-            if prompt_tokens > MAX_MODEL_TOKENS:
-                return index, f"❌ Chunk {index+1} prompt too long. Skipping..."
             response = ""
-            try:
-                for result in agent.run_gradio_chat(
-                    message=prompt,
-                    history=[],
-                    temperature=0.2,
-                    max_new_tokens=MAX_NEW_TOKENS,
-                    max_token=MAX_MODEL_TOKENS,
-                    call_agent=False,
-                    conversation=[],
-                ):
-                    if isinstance(result, str):
-                        response += result
-                    elif isinstance(result, list):
-                        for r in result:
-                            if hasattr(r, "content"):
-                                response += r.content
-                    elif hasattr(result, "content"):
-                        response += result.content
-            except Exception as e:
-                return index, f"❌ Error analyzing chunk {index+1}: {str(e)}"
-            return index, clean_response(response)
-        with ThreadPoolExecutor(max_workers=1) as executor:
-            futures = [executor.submit(analyze_chunk, i, chunk) for i, chunk in enumerate(chunks)]
-            for future in as_completed(futures):
-                i, result = future.result()
-                chunk_responses[i] = result
-                if result.startswith("❌"):
-                    messages.append({"role": "assistant", "content": result})
-        valid_responses = [res for res in chunk_responses if not res.startswith("❌")]
-        if not valid_responses:
-            messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
-            return messages, report_path
-        summary = "\n\n".join(valid_responses)
-        final_prompt = f"Provide a structured, consolidated clinical analysis from these results:\n\n{summary}"
-        messages.append({"role": "assistant", "content": "📊 Generating final report..."})
-        final_report_text = ""
-        for result in agent.run_gradio_chat(
-            message=final_prompt,
-            history=[],
-            temperature=0.2,
-            max_new_tokens=MAX_NEW_TOKENS,
-            max_token=MAX_MODEL_TOKENS,
-            call_agent=False,
-            conversation=[],
-        ):
-            if isinstance(result, str):
-                final_report_text += result
-            elif isinstance(result, list):
-                for r in result:
-                    if hasattr(r, "content"):
-                        final_report_text += r.content
-            elif hasattr(result, "content"):
-                final_report_text += result.content
-        cleaned = clean_response(final_report_text)
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
-        with open(report_path, 'w') as f:
-            f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
-        messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{cleaned}"})
-        messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
     except Exception as e:
-        messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
-    return messages, report_path
 def create_ui(agent):
-    with gr.Blocks(css="""
-        html, body, .gradio-container {
-            height: 100vh;
-            width: 100vw;
-            padding: 0;
-            margin: 0;
-            font-family: 'Inter', sans-serif;
-            background: #ffffff;
-        }
-        .gr-button.primary {
-            background: #1e88e5;
-            color: #fff;
-            border: none;
-            border-radius: 6px;
-            font-weight: 600;
-        }
-        .gr-button.primary:hover {
-            background: #1565c0;
-        }
-        .gr-chatbot {
-            border: 1px solid #e0e0e0;
-            background: #f9f9f9;
-            border-radius: 10px;
-            padding: 1rem;
-            font-size: 15px;
-        }
-        .gr-markdown, .gr-file-upload {
-            background: #ffffff;
-            border-radius: 8px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.08);
-        }
-    """) as demo:
-        gr.Markdown("""
-        <h2 style='color:#1e88e5'>🩺 Patient History AI Assistant</h2>
-        <p>Upload a clinical Excel file and receive an advanced diagnostic summary.</p>
-        """)
         with gr.Row():
             with gr.Column(scale=3):
-                chatbot = gr.Chatbot(label="Clinical Assistant", height=700, type="messages")
             with gr.Column(scale=1):
-                file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
-                analyze_btn = gr.Button("🧠 Analyze", variant="primary")
-                report_output = gr.File(label="Download Report", visible=False, interactive=False)
-        chatbot_state = gr.State(value=[])
-        def update_ui(file, current_state):
-            messages, report_path = process_final_report(agent, file, current_state)
-            return messages, gr.update(visible=report_path is not None, value=report_path), messages
-        analyze_btn.click(fn=update_ui, inputs=[file_upload, chatbot_state], outputs=[chatbot, report_output, chatbot_state])
     return demo
 if __name__ == "__main__":
     try:
         agent = init_agent()
-        demo = create_ui(agent)
-        demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
-    except Exception as e:
-        print(f"Error: {str(e)}")
         sys.exit(1)

+import sys, os, json, shutil, re, time, gc, hashlib
 import pandas as pd
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import List, Tuple, Dict, Union
+import gradio as gr
+# Constants
+MAX_MODEL_TOKENS = 131072
+MAX_NEW_TOKENS = 4096
+MAX_CHUNK_TOKENS = 8192
+PROMPT_OVERHEAD = 300
+# Paths
+persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
+for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
+    os.makedirs(d, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
 from txagent.txagent import TxAgent
+def estimate_tokens(text: str) -> int:
+    return len(text) // 4 + 1
 def clean_response(text: str) -> str:
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
     return text.strip()
+def extract_text_from_excel(path: str) -> str:
     all_text = []
     try:
+        xls = pd.ExcelFile(path)
+        for sheet in xls.sheet_names:
+            df = xls.parse(sheet).astype(str).fillna("")
             rows = df.apply(lambda row: " | ".join(row), axis=1)
+            all_text += [f"[{sheet}] {line}" for line in rows]
     except Exception as e:
+        raise ValueError(f"Error reading Excel file: {str(e)}")
     return "\n".join(all_text)
+def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
+    effective_limit = max_tokens - PROMPT_OVERHEAD
+    chunks, current, current_tokens = [], [], 0
+    for line in text.split("\n"):
+        tokens = estimate_tokens(line)
+        if current_tokens + tokens > effective_limit:
+            if current:
+                chunks.append("\n".join(current))
+            current, current_tokens = [line], tokens
         else:
+            current.append(line)
+            current_tokens += tokens
+    if current:
+        chunks.append("\n".join(current))
     return chunks
+def build_prompt(chunk: str) -> str:
+    return f"""### Unstructured Clinical Records
+Analyze the clinical notes below and summarize with:
 - Diagnostic Patterns
 - Medication Issues
 - Missed Opportunities
 {chunk}
 ---
+Respond concisely in bullet points with clinical reasoning."""
+def init_agent() -> TxAgent:
+    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(tool_path):
+        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
+        seed=100
     )
     agent.init_model()
     return agent
+def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
+    results = [None] * len(chunks)
+    def analyze(i, chunk):
+        prompt = build_prompt(chunk)
+        try:
+            if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
+                return i, f"❌ Chunk {i+1} too long. Skipped."
             response = ""
+            for r in agent.run_gradio_chat(
+                message=prompt,
+                history=[],
+                temperature=0.2,
+                max_new_tokens=MAX_NEW_TOKENS,
+                max_token=MAX_MODEL_TOKENS,
+                call_agent=False,
+                conversation=[]
+            ):
+                if isinstance(r, str):
+                    response += r
+                elif isinstance(r, list):
+                    for m in r:
+                        if hasattr(m, "content"):
+                            response += m.content
+                elif hasattr(r, "content"):
+                    response += r.content
+            gc.collect()
+            return i, clean_response(response)
+        except Exception as e:
+            return i, f"❌ Error in chunk {i+1}: {str(e)}"
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = [executor.submit(analyze, i, chunk) for i, chunk in enumerate(chunks)]
+        for future in as_completed(futures):
+            i, res = future.result()
+            results[i] = res
+    return results
+def generate_final_summary(agent, combined: str) -> str:
+    final_prompt = f"""Provide a structured medical report based on the following summaries:
+{combined}
+Respond in detailed medical bullet points."""
+    full_report = ""
+    for r in agent.run_gradio_chat(
+        message=final_prompt,
+        history=[],
+        temperature=0.2,
+        max_new_tokens=MAX_NEW_TOKENS,
+        max_token=MAX_MODEL_TOKENS,
+        call_agent=False,
+        conversation=[]
+    ):
+        if isinstance(r, str):
+            full_report += r
+        elif isinstance(r, list):
+            for m in r:
+                if hasattr(m, "content"):
+                    full_report += m.content
+        elif hasattr(r, "content"):
+            full_report += r.content
+    return clean_response(full_report)
+def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
+    if not file or not hasattr(file, "name"):
+        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
+        return messages, None
+    messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
+    try:
+        extracted = extract_text_from_excel(file.name)
+        chunks = split_text(extracted)
+        messages.append({"role": "assistant", "content": f"🔍 Split into {len(chunks)} chunks. Analyzing..."})
+        chunk_results = analyze_chunks_parallel(agent, chunks)
+        valid = [res for res in chunk_results if not res.startswith("❌")]
+        if not valid:
+            messages.append({"role": "assistant", "content": "❌ No valid chunk outputs."})
+            return messages, None
+        summary = generate_final_summary(agent, "\n\n".join(valid))
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+        with open(report_path, 'w', encoding='utf-8') as f:
+            f.write(f"# 🧠 Final Medical Report\n\n{summary}")
+        messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
+        messages.append({"role": "assistant", "content": f"✅ Report saved: {os.path.basename(report_path)}"})
+        return messages, report_path
     except Exception as e:
+        messages.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
+        return messages, None
 def create_ui(agent):
+    with gr.Blocks() as demo:
+        gr.Markdown("<h2 style='color:#1e88e5'>🩺 Patient AI Assistant</h2><p>Upload a clinical Excel file and receive a diagnostic summary.</p>")
         with gr.Row():
             with gr.Column(scale=3):
+                chatbot = gr.Chatbot(label="Assistant", height=700, type="messages")
             with gr.Column(scale=1):
+                upload = gr.File(label="Upload Excel", file_types=[".xlsx"])
+                analyze = gr.Button("🧠 Analyze", variant="primary")
+                download = gr.File(label="Download Report", visible=False, interactive=False)
+        state = gr.State(value=[])
+        def handle_analysis(file, chat):
+            messages, report_path = process_report(agent, file, chat)
+            return messages, gr.update(visible=bool(report_path), value=report_path), messages
+        analyze.click(fn=handle_analysis, inputs=[upload, state], outputs=[chatbot, download, state])
     return demo
 if __name__ == "__main__":
     try:
         agent = init_agent()
+        ui = create_ui(agent)
+        ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
+    except Exception as err:
+        print(f"Startup failed: {err}")
         sys.exit(1)