CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 26 days ago

Commit

3ed8d49

verified ·

1 Parent(s): 6287195

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -256

app.py CHANGED Viewed

@@ -3,15 +3,14 @@ import os
 import pandas as pd
 import json
 import gradio as gr
-from typing import List, Tuple, Dict, Any, Union
 import hashlib
 import shutil
 import re
 from datetime import datetime
-import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
-# Configuration and setup
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
@@ -20,29 +19,21 @@ tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
-for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
-    os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
-current_dir = os.path.dirname(os.path.abspath(__file__))
-src_path = os.path.abspath(os.path.join(current_dir, "src"))
-sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-# Constants
 MAX_MODEL_TOKENS = 32768
 MAX_CHUNK_TOKENS = 8192
 MAX_NEW_TOKENS = 2048
 PROMPT_OVERHEAD = 500
 def clean_response(text: str) -> str:
-    try:
-        text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
-    except UnicodeError:
-        text = text.encode('utf-8', 'replace').decode('utf-8')
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
@@ -51,286 +42,126 @@ def clean_response(text: str) -> str:
 def estimate_tokens(text: str) -> int:
     return len(text) // 3.5 + 1
-def extract_text_from_excel(file_path: str) -> str:
     all_text = []
     try:
-        xls = pd.ExcelFile(file_path)
-        for sheet_name in xls.sheet_names:
-            df = xls.parse(sheet_name)
-            df = df.astype(str).fillna("")
-            rows = df.apply(lambda row: " | ".join(row), axis=1)
-            sheet_text = [f"[{sheet_name}] {line}" for line in rows]
-            all_text.extend(sheet_text)
     except Exception as e:
-        raise ValueError(f"Failed to extract text from Excel file: {str(e)}")
     return "\n".join(all_text)
-def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
-    effective_max_tokens = max_tokens - PROMPT_OVERHEAD
-    if effective_max_tokens <= 0:
-        raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
-    lines = text.split("\n")
-    chunks, current_chunk, current_tokens = [], [], 0
     for line in lines:
-        line_tokens = estimate_tokens(line)
-        if current_tokens + line_tokens > effective_max_tokens:
-            if current_chunk:
-                chunks.append("\n".join(current_chunk))
-            current_chunk, current_tokens = [line], line_tokens
         else:
-            current_chunk.append(line)
-            current_tokens += line_tokens
-    if current_chunk:
-        chunks.append("\n".join(current_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
     return f"""
 ### Unstructured Clinical Records
-You are reviewing unstructured, mixed-format clinical documentation from various forms, tables, and sheets.
-**Objective:** Identify patterns, missed diagnoses, inconsistencies, and follow-up gaps.
-Here is the extracted content chunk:
-{chunk}
-Please analyze the above and provide:
 - Diagnostic Patterns
 - Medication Issues
 - Missed Opportunities
 - Inconsistencies
 - Follow-up Recommendations
 """
 def init_agent():
-    default_tool_path = os.path.abspath("data/new_tool.json")
-    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(target_tool_path):
-        shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
-        seed=100,
-        additional_default_tools=[]
     )
     agent.init_model()
     return agent
-def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
-    messages = chatbot_state if chatbot_state else []
-    report_path = None
-    if file is None or not hasattr(file, "name"):
-        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
-        return messages, report_path
-    try:
-        messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
-        messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})
-        extracted_text = extract_text_from_excel(file.name)
-        chunks = split_text_into_chunks(extracted_text)
-        chunk_responses = [None] * len(chunks)
-        def analyze_chunk(index: int, chunk: str) -> Tuple[int, str]:
-            prompt = build_prompt_from_text(chunk)
-            prompt_tokens = estimate_tokens(prompt)
-            if prompt_tokens > MAX_MODEL_TOKENS:
-                return index, f"❌ Chunk {index+1} prompt too long ({prompt_tokens} tokens). Skipping..."
-            response = ""
-            try:
-                for result in agent.run_gradio_chat(
-                    message=prompt,
-                    history=[],
-                    temperature=0.2,
-                    max_new_tokens=MAX_NEW_TOKENS,
-                    max_token=MAX_MODEL_TOKENS,
-                    call_agent=False,
-                    conversation=[],
-                ):
-                    if isinstance(result, str):
-                        response += result
-                    elif hasattr(result, "content"):
-                        response += result.content
-                    elif isinstance(result, list):
-                        for r in result:
-                            if hasattr(r, "content"):
-                                response += r.content
-            except Exception as e:
-                return index, f"❌ Error analyzing chunk {index+1}: {str(e)}"
-            return index, clean_response(response)
-        with ThreadPoolExecutor(max_workers=1) as executor:
-            futures = [executor.submit(analyze_chunk, i, chunk) for i, chunk in enumerate(chunks)]
-            for future in as_completed(futures):
-                i, result = future.result()
-                chunk_responses[i] = result
-                if not result.startswith("❌"):
-                    messages.append({"role": "assistant", "content": f"✅ Chunk {i+1} analysis complete"})
-                else:
-                    messages.append({"role": "assistant", "content": result})
-        valid_responses = [res for res in chunk_responses if not res.startswith("❌")]
-        if not valid_responses:
-            messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
-            return messages, report_path
-        summary = ""
-        current_summary_tokens = 0
-        for i, response in enumerate(valid_responses):
-            response_tokens = estimate_tokens(response)
-            if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
-                summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
-                summary_response = ""
-                try:
-                    for result in agent.run_gradio_chat(
-                        message=summary_prompt,
-                        history=[],
-                        temperature=0.2,
-                        max_new_tokens=MAX_NEW_TOKENS,
-                        max_token=MAX_MODEL_TOKENS,
-                        call_agent=False,
-                        conversation=[],
-                    ):
-                        if isinstance(result, str):
-                            summary_response += result
-                        elif hasattr(result, "content"):
-                            summary_response += result.content
-                        elif isinstance(result, list):
-                            for r in result:
-                                if hasattr(r, "content"):
-                                    summary_response += r.content
-                    summary = clean_response(summary_response)
-                    current_summary_tokens = estimate_tokens(summary)
-                except Exception as e:
-                    messages.append({"role": "assistant", "content": f"❌ Error summarizing intermediate results: {str(e)}"})
-                    return messages, report_path
-            summary += f"\n\n### Chunk {i+1} Analysis\n{response}"
-            current_summary_tokens += response_tokens
-        final_prompt = f"Summarize the key findings from the following analyses:\n\n{summary}"
-        messages.append({"role": "assistant", "content": "📊 Generating final report..."})
-        final_report_text = ""
-        try:
-            for result in agent.run_gradio_chat(
-                message=final_prompt,
-                history=[],
-                temperature=0.2,
-                max_new_tokens=MAX_NEW_TOKENS,
-                max_token=MAX_MODEL_TOKENS,
-                call_agent=False,
-                conversation=[],
-            ):
-                if isinstance(result, str):
-                    final_report_text += result
-                elif hasattr(result, "content"):
-                    final_report_text += result.content
-                elif isinstance(result, list):
-                    for r in result:
-                        if hasattr(r, "content"):
-                            final_report_text += r.content
-        except Exception as e:
-            messages.append({"role": "assistant", "content": f"❌ Error generating final report: {str(e)}"})
-            return messages, report_path
-        final_report = f"# 🧠 Final Patient Report\n\n{clean_response(final_report_text)}"
-        messages[-1]["content"] = f"📊 Final Report:\n\n{clean_response(final_report_text)}"
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        report_path = os.path.join(report_dir, f"report_{timestamp}.md")
-        with open(report_path, 'w') as f:
-            f.write(final_report)
-        messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
-    except Exception as e:
-        messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
-    return messages, report_path
 def create_ui(agent):
-    with gr.Blocks(
-        title="Patient History Chat",
-        css="""
         .gradio-container {
-            max-width: 900px !important;
-            margin: auto;
-            font-family: 'Segoe UI', sans-serif;
-            background-color: #f8f9fa;
         }
-        .gr-button.primary {
-            background: linear-gradient(to right, #4b6cb7, #182848);
             color: white;
             border: none;
             border-radius: 8px;
         }
-        .gr-button.primary:hover {
-            background: linear-gradient(to right, #3552a3, #101a3e);
         }
-        .gr-file-upload, .gr-chatbot, .gr-markdown {
-            background-color: white;
-            border-radius: 10px;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-            padding: 1rem;
-        }
-        .gr-chatbot {
-            border-left: 4px solid #4b6cb7;
-        }
-        .gr-file-upload input {
-            font-size: 0.95rem;
-        }
-        .chat-message-content p {
-            margin: 0.3em 0;
-        }
-        .chat-message-content ul {
-            padding-left: 1.2em;
-            margin: 0.4em 0;
-        }
-        """
-    ) as demo:
-        gr.Markdown("""
-        <h2 style='color:#182848'>🏥 Patient History Analysis Tool</h2>
-        <p style='color:#444;'>Upload an Excel file containing clinical data. The assistant will analyze it for patterns, inconsistencies, and recommendations.</p>
-        """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    label="Clinical Assistant",
-                    show_copy_button=True,
-                    height=600,
-                    type="messages",
-                    avatar_images=(None, "https://i.imgur.com/6wX7Zb4.png"),
-                    render_markdown=True
-                )
-            with gr.Column(scale=1):
-                file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"], height=100)
-                analyze_btn = gr.Button("🧠 Analyze Patient History", variant="primary", elem_classes="primary")
-                report_output = gr.File(label="Download Report", visible=False, interactive=False)
-        chatbot_state = gr.State(value=[])
-        def update_ui(file, current_state):
-            messages, report_path = process_final_report(agent, file, current_state)
-            formatted_messages = []
-            for msg in messages:
-                role = msg.get("role")
-                content = msg.get("content", "")
-                if role == "assistant":
-                    content = content.replace("- ", "\n- ")
-                    content = f"<div class='chat-message-content'>{content}</div>"
-                formatted_messages.append({"role": role, "content": content})
-            report_update = gr.update(visible=report_path is not None, value=report_path)
-            return formatted_messages, report_update, formatted_messages
-        analyze_btn.click(fn=update_ui, inputs=[file_upload, chatbot_state], outputs=[chatbot, report_output, chatbot_state], api_name="analyze")
     return demo
@@ -338,7 +169,7 @@ if __name__ == "__main__":
     try:
         agent = init_agent()
         demo = create_ui(agent)
-        demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, allowed_paths=["/data/hf_cache/reports"], share=False)
     except Exception as e:
         print(f"Error: {str(e)}")
-        sys.exit(1)

 import pandas as pd
 import json
 import gradio as gr
+from typing import List, Tuple, Union, Generator
 import hashlib
 import shutil
 import re
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
+# Setup directories
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
+for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
+    os.makedirs(d, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
 from txagent.txagent import TxAgent
 MAX_MODEL_TOKENS = 32768
 MAX_CHUNK_TOKENS = 8192
 MAX_NEW_TOKENS = 2048
 PROMPT_OVERHEAD = 500
 def clean_response(text: str) -> str:
     text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
 def estimate_tokens(text: str) -> int:
     return len(text) // 3.5 + 1
+def extract_text_from_excel(file_obj: Union[str, os.PathLike, 'file']) -> str:
     all_text = []
     try:
+        xls = pd.ExcelFile(file_obj)
     except Exception as e:
+        raise ValueError(f"❌ Error reading Excel file: {e}")
+    for sheet_name in xls.sheet_names:
+        df = xls.parse(sheet_name).astype(str).fillna("")
+        rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
+        sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
+        all_text.extend(sheet_text)
     return "\n".join(all_text)
+def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
+    effective_max = max_tokens - PROMPT_OVERHEAD
+    lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
     for line in lines:
+        t = estimate_tokens(line)
+        if curr_tokens + t > effective_max:
+            if curr_chunk:
+                chunks.append("\n".join(curr_chunk))
+            if len(chunks) >= max_chunks:
+                break
+            curr_chunk, curr_tokens = [line], t
         else:
+            curr_chunk.append(line)
+            curr_tokens += t
+    if curr_chunk and len(chunks) < max_chunks:
+        chunks.append("\n".join(curr_chunk))
     return chunks
 def build_prompt_from_text(chunk: str) -> str:
     return f"""
 ### Unstructured Clinical Records
+Analyze the following clinical notes and provide a detailed, concise summary focusing on:
 - Diagnostic Patterns
 - Medication Issues
 - Missed Opportunities
 - Inconsistencies
 - Follow-up Recommendations
+---
+{chunk}
+---
+Respond in well-structured bullet points with medical reasoning.
 """
 def init_agent():
+    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(tool_path):
+        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=4,
+        seed=100
     )
     agent.init_model()
     return agent
+def stream_report(agent, file: Union[str, 'file'], full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
+    yield from stream_report_wrapper(agent)(file, full_output)
 def create_ui(agent):
+    with gr.Blocks(css="""
+        body {
+            background: #10141f;
+            color: #ffffff;
+            font-family: 'Inter', sans-serif;
+            margin: 0;
+            padding: 0;
+        }
         .gradio-container {
+            padding: 30px;
+            width: 100vw;
+            max-width: 100%;
+            border-radius: 0;
+            background-color: #1a1f2e;
+        }
+        .output-markdown {
+            background-color: #131720;
+            border-radius: 12px;
+            padding: 20px;
+            min-height: 600px;
+            overflow-y: auto;
+            border: 1px solid #2c3344;
         }
+        .gr-button {
+            background: linear-gradient(135deg, #4b4ced, #37b6e9);
             color: white;
+            font-weight: 500;
             border: none;
+            padding: 10px 20px;
             border-radius: 8px;
+            transition: background 0.3s ease;
         }
+        .gr-button:hover {
+            background: linear-gradient(135deg, #37b6e9, #4b4ced);
         }
+    """) as demo:
+        gr.Markdown("""# 🧠 Clinical Reasoning Assistant
+Upload clinical Excel records below and click **Analyze** to generate a medical summary.
+""")
+        file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
+        analyze_btn = gr.Button("Analyze")
+        report_output_markdown = gr.Markdown(elem_classes="output-markdown")
+        report_file = gr.File(label="Download Report", visible=False)
+        full_output = gr.State(value="")
+        analyze_btn.click(
+            fn=stream_report,
+            inputs=[file_upload, full_output],
+            outputs=[report_output_markdown, report_file, full_output]
+        )
     return demo
     try:
         agent = init_agent()
         demo = create_ui(agent)
+        demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=True)
     except Exception as e:
         print(f"Error: {str(e)}")
+        sys.exit(1)