CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 26

Commit

1dd5b3f

verified ·

1 Parent(s): 095998d

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -98

app.py CHANGED Viewed

@@ -1,17 +1,26 @@
 import sys
 import os
-import pandas as pd
 import json
-import gradio as gr
-from datetime import datetime
 import shutil
-import gc
 import re
 import torch
-from typing import List, Tuple, Dict
 from concurrent.futures import ThreadPoolExecutor, as_completed
-# Directories
 persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
@@ -24,29 +33,27 @@ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
-# Paths
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-# Constants
-MAX_MODEL_TOKENS = 131072
-MAX_NEW_TOKENS = 4096
-MAX_CHUNK_TOKENS = 8192
-PROMPT_OVERHEAD = 300
-BATCH_SIZE = 2
 def estimate_tokens(text: str) -> int:
     return len(text) // 4 + 1
 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
-    for sheet in xls.sheet_names:
         try:
-            df = xls.parse(sheet).astype(str).fillna("")
         except Exception:
             continue
         for _, row in df.iterrows():
@@ -54,36 +61,31 @@ def extract_text_from_excel(path: str) -> str:
             if len(non_empty) >= 2:
                 line = " | ".join(non_empty)
                 if len(line) > 15:
-                    all_text.append(f"[{sheet}] {line}")
     return "\n".join(all_text)
-def split_text(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
-    chunks, current, tokens = [], [], 0
     for line in text.split("\n"):
-        tks = estimate_tokens(line)
-        if tokens + tks > effective_limit:
             if current:
                 chunks.append("\n".join(current))
-            current, tokens = [line], tks
         else:
             current.append(line)
-            tokens += tks
     if current:
         chunks.append("\n".join(current))
     return chunks
-def batch_chunks(chunks: List[str], batch_size: int = BATCH_SIZE) -> List[List[str]]:
-    return [chunks[i:i + batch_size] for i in range(0, len(chunks), batch_size)]
 def build_prompt(chunk: str) -> str:
     return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
-def clean_response(text: str) -> str:
-    text = re.sub(r"\[.*?\]", "", text, flags=re.DOTALL)
-    text = re.sub(r"\n{3,}", "\n\n", text)
-    return text.strip()
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
@@ -100,46 +102,45 @@ def init_agent() -> TxAgent:
     agent.init_model()
     return agent
-def analyze_batches(agent, batches: List[List[str]], max_workers: int = 3) -> List[str]:
     results = []
-    def process_single_batch(batch):
-        prompt = "\n\n".join(build_prompt(chunk) for chunk in batch)
-        response = ""
-        try:
-            for r in agent.run_gradio_chat(
-                message=prompt,
-                history=[],
-                temperature=0.0,
-                max_new_tokens=4096,
-                max_token=131072,
-                call_agent=False,
-                conversation=[]
-            ):
-                if isinstance(r, str):
-                    response += r
-                elif isinstance(r, list):
-                    for m in r:
-                        if hasattr(m, "content"):
-                            response += m.content
-                elif hasattr(r, "content"):
-                    response += r.content
-        except Exception as e:
-            response = f"❌ Error: {str(e)}"
-        return clean_response(response)
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = [executor.submit(process_single_batch, batch) for batch in batches]
         for future in as_completed(futures):
             results.append(future.result())
     return results
 def generate_final_summary(agent, combined: str) -> str:
-    final_prompt = f"""Summarize the following clinical summaries into a final medical report:\n\n{combined}"""
-    response = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
         history=[],
@@ -150,43 +151,41 @@ def generate_final_summary(agent, combined: str) -> str:
         conversation=[]
     ):
         if isinstance(r, str):
-            response += r
         elif isinstance(r, list):
             for m in r:
                 if hasattr(m, "content"):
-                    response += m.content
         elif hasattr(r, "content"):
-            response += r.content
-    return clean_response(response)
-def process_file(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]:
     if not file or not hasattr(file, "name"):
         messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
         return messages, None
-    messages.append({"role": "user", "content": f"📂 Processing file: {file.name}"})
     try:
-        extracted_text = extract_text_from_excel(file.name)
-        chunks = split_text(extracted_text)
-        batches = batch_chunks(chunks)
-        messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing..."})
-        batch_outputs = analyze_batches(agent, batches)
-        valid_outputs = [res for res in batch_outputs if not res.startswith("❌")]
-        if not valid_outputs:
             messages.append({"role": "assistant", "content": "❌ No valid batch outputs."})
             return messages, None
-        summary = generate_final_summary(agent, "\n\n".join(valid_outputs))
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
-        with open(report_path, "w", encoding="utf-8") as f:
             f.write(f"# 🧠 Final Medical Report\n\n{summary}")
         messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
-        messages.append({"role": "assistant", "content": f"✅ Saved report: {os.path.basename(report_path)}"})
         return messages, report_path
     except Exception as e:
@@ -195,27 +194,35 @@ def process_file(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict
 def create_ui(agent):
     with gr.Blocks(css="""
-    html, body { background-color: #0e1621; color: #e0e0e0; }
-    button { background: #007bff; color: white; border-radius: 8px; padding: 8px 16px; }
-    .gr-chatbot { background: #1b2533; border: 1px solid #2a2f45; border-radius: 16px; padding: 10px; }
     """) as demo:
-        gr.Markdown("""## 🧠 CPS: Clinical Patient Support Assistant""")
-        chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
-        upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
-        analyze_btn = gr.Button("🧠 Analyze File")
-        download = gr.File(label="Download Report", visible=False)
-        state = gr.State([])
-        def handle_analyze(file, chat_state):
-            messages, report_path = process_file(agent, file, chat_state)
             return messages, gr.update(visible=bool(report_path), value=report_path), messages
-        analyze_btn.click(fn=handle_analyze, inputs=[upload, state], outputs=[chatbot, download, state])
     return demo
 if __name__ == "__main__":
-    agent = init_agent()
-    ui = create_ui(agent)
-    ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)

 import sys
 import os
+import gc
 import json
 import shutil
 import re
+import time
+import pandas as pd
+import gradio as gr
 import torch
+from typing import List, Tuple, Dict, Union
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+# Constants
+MAX_MODEL_TOKENS = 131072
+MAX_NEW_TOKENS = 4096
+MAX_CHUNK_TOKENS = 8192
+PROMPT_OVERHEAD = 300
+BATCH_SIZE = 4  # 4 chunks per batch
+MAX_WORKERS = 6  # 6 parallel batches
+# Paths
 persistent_dir = "/data/hf_cache"
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
 def estimate_tokens(text: str) -> int:
     return len(text) // 4 + 1
+def clean_response(text: str) -> str:
+    text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
+    return text.strip()
 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
+    for sheet_name in xls.sheet_names:
         try:
+            df = xls.parse(sheet_name).astype(str).fillna("")
         except Exception:
             continue
         for _, row in df.iterrows():
             if len(non_empty) >= 2:
                 line = " | ".join(non_empty)
                 if len(line) > 15:
+                    all_text.append(f"[{sheet_name}] {line}")
     return "\n".join(all_text)
+def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
     effective_limit = max_tokens - PROMPT_OVERHEAD
+    chunks, current, current_tokens = [], [], 0
     for line in text.split("\n"):
+        tokens = estimate_tokens(line)
+        if current_tokens + tokens > effective_limit:
             if current:
                 chunks.append("\n".join(current))
+            current, current_tokens = [line], tokens
         else:
             current.append(line)
+            current_tokens += tokens
     if current:
         chunks.append("\n".join(current))
     return chunks
+def batch_chunks(chunks: List[str], batch_size: int = 4) -> List[List[str]]:
+    return [chunks[i:i+batch_size] for i in range(0, len(chunks), batch_size)]
 def build_prompt(chunk: str) -> str:
     return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
 def init_agent() -> TxAgent:
     tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(tool_path):
     agent.init_model()
     return agent
+def analyze_batch(agent, batch: List[str]) -> str:
+    prompt = "\n\n".join(build_prompt(chunk) for chunk in batch)
+    response = ""
+    try:
+        for r in agent.run_gradio_chat(
+            message=prompt,
+            history=[],
+            temperature=0.0,
+            max_new_tokens=MAX_NEW_TOKENS,
+            max_token=MAX_MODEL_TOKENS,
+            call_agent=False,
+            conversation=[]
+        ):
+            if isinstance(r, str):
+                response += r
+            elif isinstance(r, list):
+                for m in r:
+                    if hasattr(m, "content"):
+                        response += m.content
+            elif hasattr(r, "content"):
+                response += r.content
+    except Exception as e:
+        return f"❌ Error in batch: {str(e)}"
+    finally:
+        torch.cuda.empty_cache()
+        gc.collect()
+    return clean_response(response)
+def analyze_batches_parallel(agent, batches: List[List[str]]) -> List[str]:
     results = []
+    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+        futures = [executor.submit(analyze_batch, agent, batch) for batch in batches]
         for future in as_completed(futures):
             results.append(future.result())
     return results
 def generate_final_summary(agent, combined: str) -> str:
+    final_prompt = f"""Provide a structured medical report based on the following summaries:\n\n{combined}\n\nRespond in detailed medical bullet points."""
+    full_report = ""
     for r in agent.run_gradio_chat(
         message=final_prompt,
         history=[],
         conversation=[]
     ):
         if isinstance(r, str):
+            full_report += r
         elif isinstance(r, list):
             for m in r:
                 if hasattr(m, "content"):
+                    full_report += m.content
         elif hasattr(r, "content"):
+            full_report += r.content
+    return clean_response(full_report)
+def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
     if not file or not hasattr(file, "name"):
         messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
         return messages, None
+    messages.append({"role": "user", "content": f"📂 Processing file: {os.path.basename(file.name)}"})
     try:
+        extracted = extract_text_from_excel(file.name)
+        chunks = split_text(extracted)
+        batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
+        messages.append({"role": "assistant", "content": f"🔍 Split into {len(batches)} batches. Analyzing in parallel..."})
+        batch_results = analyze_batches_parallel(agent, batches)
+        valid = [res for res in batch_results if not res.startswith("❌")]
+        if not valid:
             messages.append({"role": "assistant", "content": "❌ No valid batch outputs."})
             return messages, None
+        summary = generate_final_summary(agent, "\n\n".join(valid))
         report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
+        with open(report_path, 'w', encoding='utf-8') as f:
             f.write(f"# 🧠 Final Medical Report\n\n{summary}")
         messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{summary}"})
+        messages.append({"role": "assistant", "content": f"✅ Report saved: {os.path.basename(report_path)}"})
         return messages, report_path
     except Exception as e:
 def create_ui(agent):
     with gr.Blocks(css="""
+    html, body, .gradio-container {background-color: #0e1621; color: #e0e0e0; font-family: 'Inter', sans-serif;}
+    h2, h3, h4 {color: #89b4fa; font-weight: 600;}
+    button.gr-button-primary {background-color: #007bff !important; color: white !important;}
+    .gr-chatbot, .gr-markdown, .gr-file-upload {border-radius: 16px; background-color: #1b2533;}
+    .gr-chatbot .message {font-size: 16px; padding: 12px; border-radius: 18px;}
+    .gr-chatbot .message.user {background-color: #334155;}
+    .gr-chatbot .message.assistant {background-color: #1e293b;}
     """) as demo:
+        gr.Markdown("""<h2>📄 CPS: Clinical Patient Support System</h2><p>Upload a file and analyze medical notes.</p>""")
+        with gr.Column():
+            chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
+            upload = gr.File(label="Upload Medical File", file_types=[".xlsx"])
+            analyze = gr.Button("🧠 Analyze", variant="primary")
+            download = gr.File(label="Download Report", visible=False, interactive=False)
+        state = gr.State(value=[])
+        def handle_analysis(file, chat):
+            messages, report_path = process_report(agent, file, chat)
             return messages, gr.update(visible=bool(report_path), value=report_path), messages
+        analyze.click(fn=handle_analysis, inputs=[upload, state], outputs=[chatbot, download, state])
     return demo
 if __name__ == "__main__":
+    try:
+        agent = init_agent()
+        ui = create_ui(agent)
+        ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
+    except Exception as err:
+        print(f"Startup failed: {err}")
+        sys.exit(1)