CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 22 days ago

Commit

7e55ae2

verified ·

1 Parent(s): 0152260

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -125

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
-import sys, os, json, shutil, re, time, gc
 import pandas as pd
-from datetime import datetime
-from typing import List, Tuple, Dict, Union
 import gradio as gr
 from concurrent.futures import ThreadPoolExecutor
-# Constants
-MAX_MODEL_TOKENS = 131072
-MAX_NEW_TOKENS = 4096
-MAX_CHUNK_TOKENS = 8192
-PROMPT_OVERHEAD = 300
-BATCH_SIZE = 2  # NEW: batch 2 prompts together for faster processing
-# Paths
 persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
@@ -25,59 +24,66 @@ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
 def estimate_tokens(text: str) -> int:
     return len(text) // 4 + 1
-def clean_response(text: str) -> str:
-    text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
-    text = re.sub(r"\n{3,}", "\n\n", text)
-    text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
-    return text.strip()
 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
-    for sheet_name in xls.sheet_names:
         try:
-            df = xls.parse(sheet_name).astype(str).fillna("")
         except Exception:
             continue
-        for idx, row in df.iterrows():
             non_empty = [cell.strip() for cell in row if cell.strip()]
             if len(non_empty) >= 2:
-                text_line = " | ".join(non_empty)
-                if len(text_line) > 15:
-                    all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
-def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
-    chunks, current, current_tokens = [], [], 0
     for line in text.split("\n"):
-        tokens = estimate_tokens(line)
-        if current_tokens + tokens > effective_limit:
             if current:
                 chunks.append("\n".join(current))
-            current, current_tokens = [line], tokens
         else:
             current.append(line)
-            current_tokens += tokens
     if current:
         chunks.append("\n".join(current))
     return chunks
-def batch_chunks(chunks: List[str], batch_size: int = 2) -> List[List[str]]:
-    return [chunks[i:i+batch_size] for i in range(0, len(chunks), batch_size)]
 def build_prompt(chunk: str) -> str:
     return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
@@ -97,9 +103,9 @@ def init_agent() -> TxAgent:
 def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
     results = []
     for batch in batches:
-        prompt = "\n\n".join(build_prompt(chunk) for chunk in batch)
-        response = ""
         try:
             for r in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
@@ -119,14 +125,14 @@ def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
                     response += r.content
             results.append(clean_response(response))
         except Exception as e:
-            results.append(f"❌ Error in batch: {str(e)}")
-    torch.cuda.empty_cache()
-    gc.collect()
     return results
 def generate_final_summary(agent, combined: str) -> str:
-    final_prompt = f"""Provide a structured medical report based on the following summaries:\n\n{combined}\n\nRespond in detailed medical bullet points."""
-    full_report = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
         history=[],
@@ -137,41 +143,43 @@ def generate_final_summary(agent, combined: str) -> str:
         conversation=[]
     ):
         if isinstance(r, str):
-            full_report += r
         elif isinstance(r, list):
             for m in r:
                 if hasattr(m, "content"):
-                    full_report += m.content
         elif hasattr(r, "content"):
-            full_report += r.content
-    return clean_response(full_report)
-def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     if not file or not hasattr(file, "name"):
         messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
         return messages, None
-    messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
     try:
-        extracted = extract_text_from_excel(file.name)
-        chunks = split_text(extracted)
-        batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing..."})
-        batch_results = analyze_batches(agent, batches)
-        valid = [res for res in batch_results if not res.startswith("❌")]
-        if not valid:
             messages.append({"role": "assistant", "content": "❌ No valid batch outputs."})
             return messages, None
-        summary = generate_final_summary(agent, "\n\n".join(valid))
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
-        with open(report_path, 'w', encoding='utf-8') as f:
             f.write(f"# 🧠 Final Medical Report\n\n{summary}")
         messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
-        messages.append({"role": "assistant", "content": f"✅ Report saved: {os.path.basename(report_path)}"})
         return messages, report_path
     except Exception as e:
@@ -180,84 +188,27 @@ def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Di
 def create_ui(agent):
     with gr.Blocks(css="""
-    html, body, .gradio-container {
-        background-color: #0e1621;
-        color: #e0e0e0;
-        font-family: 'Inter', sans-serif;
-        padding: 0;
-        margin: 0;
-    }
-    h2, h3, h4 {
-        color: #89b4fa;
-        font-weight: 600;
-    }
-    button.gr-button-primary {
-        background-color: #007bff !important;
-        color: white !important;
-        font-weight: bold;
-        border-radius: 8px !important;
-        padding: 0.65em 1.2em !important;
-        font-size: 16px !important;
-        border: none;
-    }
-    button.gr-button-primary:hover {
-        background-color: #0056b3 !important;
-    }
-    .gr-chatbot, .gr-markdown, .gr-file-upload {
-        border-radius: 16px;
-        background-color: #1b2533;
-        border: 1px solid #2a2f45;
-        padding: 10px;
-    }
-    .gr-chatbot .message {
-        font-size: 16px;
-        padding: 12px 16px;
-        border-radius: 18px;
-        margin: 8px 0;
-        max-width: 80%;
-        word-break: break-word;
-        white-space: pre-wrap;
-    }
-    .gr-chatbot .message.user {
-        background-color: #334155;
-        align-self: flex-end;
-        margin-left: auto;
-    }
-    .gr-chatbot .message.assistant {
-        background-color: #1e293b;
-        align-self: flex-start;
-        margin-right: auto;
-    }
-    .gr-file-upload .file-name {
-        font-size: 14px;
-        color: #89b4fa;
-    }
     """) as demo:
-        gr.Markdown("""
-        <h2>📄 CPS: Clinical Patient Support System</h2>
-        <p>CPS Assistant helps you analyze and summarize unstructured medical files using AI.</p>
-        """)
-        with gr.Column():
-            chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
-            upload = gr.File(label="Upload Medical File", file_types=[".xlsx"])
-            analyze = gr.Button("🧠 Analyze", variant="primary")
-            download = gr.File(label="Download Report", visible=False, interactive=False)
-        state = gr.State(value=[])
-        def handle_analysis(file, chat):
-            messages, report_path = process_report(agent, file, chat)
             return messages, gr.update(visible=bool(report_path), value=report_path), messages
-        analyze.click(fn=handle_analysis, inputs=[upload, state], outputs=[chatbot, download, state])
     return demo
 if __name__ == "__main__":
-    try:
-        agent = init_agent()
-        ui = create_ui(agent)
-        ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
-    except Exception as err:
-        print(f"Startup failed: {err}")
-        sys.exit(1)

+import sys
+import os
 import pandas as pd
+import json
 import gradio as gr
+from datetime import datetime
+import shutil
+import gc
+import re
+import torch
+from typing import List, Tuple, Dict
 from concurrent.futures import ThreadPoolExecutor
+# Directories
 persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+# Paths
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
+# Constants
+MAX_MODEL_TOKENS = 131072
+MAX_NEW_TOKENS = 4096
+MAX_CHUNK_TOKENS = 8192
+PROMPT_OVERHEAD = 300
+BATCH_SIZE = 2
 def estimate_tokens(text: str) -> int:
     return len(text) // 4 + 1
 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
+    for sheet in xls.sheet_names:
         try:
+            df = xls.parse(sheet).astype(str).fillna("")
         except Exception:
             continue
+        for _, row in df.iterrows():
             non_empty = [cell.strip() for cell in row if cell.strip()]
             if len(non_empty) >= 2:
+                line = " | ".join(non_empty)
+                if len(line) > 15:
+                    all_text.append(f"[{sheet}] {line}")
     return "\n".join(all_text)
+def split_text(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
+    chunks, current, tokens = [], [], 0
     for line in text.split("\n"):
+        tks = estimate_tokens(line)
+        if tokens + tks > effective_limit:
             if current:
                 chunks.append("\n".join(current))
+            current, tokens = [line], tks
         else:
             current.append(line)
+            tokens += tks
     if current:
         chunks.append("\n".join(current))
     return chunks
+def batch_chunks(chunks: List[str], batch_size: int = BATCH_SIZE) -> List[List[str]]:
+    return [chunks[i:i + batch_size] for i in range(0, len(chunks), batch_size)]
 def build_prompt(chunk: str) -> str:
     return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
+def clean_response(text: str) -> str:
+    text = re.sub(r"\[.*?\]", "", text, flags=re.DOTALL)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
 def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
     results = []
     for batch in batches:
+        prompt = "\n\n".join(build_prompt(c) for c in batch)
         try:
+            response = ""
             for r in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
                     response += r.content
             results.append(clean_response(response))
         except Exception as e:
+            results.append(f"❌ Error: {str(e)}")
+        torch.cuda.empty_cache()
+        gc.collect()
     return results
 def generate_final_summary(agent, combined: str) -> str:
+    final_prompt = f"""Summarize the following clinical summaries into a final medical report:\n\n{combined}"""
+    response = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
         history=[],
         conversation=[]
     ):
         if isinstance(r, str):
+            response += r
         elif isinstance(r, list):
             for m in r:
                 if hasattr(m, "content"):
+                    response += m.content
         elif hasattr(r, "content"):
+            response += r.content
+    return clean_response(response)
+def process_file(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]:
     if not file or not hasattr(file, "name"):
         messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
         return messages, None
+    messages.append({"role": "user", "content": f"📂 Processing file: {file.name}"})
     try:
+        extracted_text = extract_text_from_excel(file.name)
+        chunks = split_text(extracted_text)
+        batches = batch_chunks(chunks)
         messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing..."})
+        batch_outputs = analyze_batches(agent, batches)
+        valid_outputs = [res for res in batch_outputs if not res.startswith("❌")]
+        if not valid_outputs:
             messages.append({"role": "assistant", "content": "❌ No valid batch outputs."})
             return messages, None
+        summary = generate_final_summary(agent, "\n\n".join(valid_outputs))
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+        with open(report_path, "w", encoding="utf-8") as f:
             f.write(f"# 🧠 Final Medical Report\n\n{summary}")
         messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
+        messages.append({"role": "assistant", "content": f"✅ Saved report: {os.path.basename(report_path)}"})
         return messages, report_path
     except Exception as e:
 def create_ui(agent):
     with gr.Blocks(css="""
+    html, body { background-color: #0e1621; color: #e0e0e0; }
+    button { background: #007bff; color: white; border-radius: 8px; padding: 8px 16px; }
+    .gr-chatbot { background: #1b2533; border: 1px solid #2a2f45; border-radius: 16px; padding: 10px; }
     """) as demo:
+        gr.Markdown("""## 🧠 CPS: Clinical Patient Support Assistant""")
+        chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
+        upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
+        analyze_btn = gr.Button("🧠 Analyze File")
+        download = gr.File(label="Download Report", visible=False)
+        state = gr.State([])
+        def handle_analyze(file, chat_state):
+            messages, report_path = process_file(agent, file, chat_state)
             return messages, gr.update(visible=bool(report_path), value=report_path), messages
+        analyze_btn.click(fn=handle_analyze, inputs=[upload, state], outputs=[chatbot, download, state])
     return demo
 if __name__ == "__main__":
+    agent = init_agent()
+    ui = create_ui(agent)
+    ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)