CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on 27 days ago

Commit

a135a34

verified ·

1 Parent(s): 8955687

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -153

app.py CHANGED Viewed

@@ -60,162 +60,29 @@ def remove_duplicate_paragraphs(text: str) -> str:
             seen.add(clean_p)
     return "\n\n".join(unique_paragraphs)
-def extract_text(file_path: str) -> str:
-    if file_path.endswith(".xlsx"):
-        return pd.read_excel(file_path).astype(str).fillna("").to_string(index=False)
-    elif file_path.endswith(".csv"):
-        return pd.read_csv(file_path).astype(str).fillna("").to_string(index=False)
-    elif file_path.endswith(".pdf"):
-        try:
-            with pdfplumber.open(file_path) as pdf:
-                return "\n".join(page.extract_text() or '' for page in pdf.pages)
-        except Exception:
-            return ""
-    else:
-        return ""
-def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
-    effective_limit = max_tokens - PROMPT_OVERHEAD
-    chunks, current, current_tokens = [], [], 0
-    for line in text.split("\n"):
-        tokens = estimate_tokens(line)
-        if current_tokens + tokens > effective_limit:
-            if current:
-                chunks.append("\n".join(current))
-            current, current_tokens = [line], tokens
-        else:
-            current.append(line)
-            current_tokens += tokens
-    if current:
-        chunks.append("\n".join(current))
-    return chunks
-def batch_chunks(chunks: List[str], batch_size: int = BATCH_SIZE) -> List[List[str]]:
-    return [chunks[i:i+batch_size] for i in range(0, len(chunks), batch_size)]
-def build_prompt(chunk: str) -> str:
-    return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
-def remove_non_ascii(text):
-    return ''.join(c for c in text if ord(c) < 256)
-def init_agent() -> TxAgent:
-    tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(tool_path):
-        shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
-    agent = TxAgent(
-        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": tool_path},
-        force_finish=True,
-        enable_checker=True,
-        step_rag_num=4,
-        seed=100
-    )
-    agent.init_model()
-    return agent
-def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
-    results = []
-    for batch in batches:
-        prompt = "\n\n".join(build_prompt(chunk) for chunk in batch)
-        try:
-            batch_response = ""
-            for r in agent.run_gradio_chat(
-                message=prompt,
-                history=[],
-                temperature=0.0,
-                max_new_tokens=MAX_NEW_TOKENS,
-                max_token=MAX_MODEL_TOKENS,
-                call_agent=False,
-                conversation=[]
-            ):
-                if isinstance(r, str):
-                    batch_response += r
-                elif isinstance(r, list):
-                    for m in r:
-                        if hasattr(m, "content"):
-                            batch_response += m.content
-                elif hasattr(r, "content"):
-                    batch_response += r.content
-            results.append(clean_response(batch_response))
-            time.sleep(SAFE_SLEEP)
-        except Exception as e:
-            results.append(f"❌ Batch failed: {str(e)}")
-            time.sleep(SAFE_SLEEP * 2)
-    torch.cuda.empty_cache()
-    gc.collect()
-    return results
-def generate_final_summary(agent, combined: str) -> str:
-    combined = remove_duplicate_paragraphs(combined)
-    final_prompt = f"""
-You are an expert clinical summarizer. Analyze the following summaries carefully and generate a **single final concise structured medical report**, avoiding any repetition or redundancy.
-Summaries:
-{combined}
-Respond with:
-- Diagnostic Patterns
-- Medication Issues
-- Missed Opportunities
-- Inconsistencies
-- Follow-up Recommendations
-Avoid repeating the same points multiple times.
-""".strip()
-    final_response = ""
-    for r in agent.run_gradio_chat(
-        message=final_prompt,
-        history=[],
-        temperature=0.0,
-        max_new_tokens=MAX_NEW_TOKENS,
-        max_token=MAX_MODEL_TOKENS,
-        call_agent=False,
-        conversation=[]
-    ):
-        if isinstance(r, str):
-            final_response += r
-        elif isinstance(r, list):
-            for m in r:
-                if hasattr(m, "content"):
-                    final_response += m.content
-        elif hasattr(r, "content"):
-            final_response += r.content
-    final_response = clean_response(final_response)
-    final_response = remove_duplicate_paragraphs(final_response)
-    return final_response
-def handle_analysis(file):
     messages = []
-    if not file or not hasattr(file, "name"):
-        return "❌ Please upload a valid file.", None
-    try:
-        extracted = extract_text(file.name)
-        if not extracted:
-            return "❌ Could not extract text.", None
-        chunks = split_text(extracted)
-        batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
-        batch_results = analyze_batches(agent, batches)
-        valid = [res for res in batch_results if not res.startswith("❌")]
-        if not valid:
-            return "❌ No valid batch outputs.", None
-        summary = generate_final_summary(agent, "\n\n".join(valid))
-        report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")
-        with open(report_path, 'w', encoding='utf-8') as f:
-            f.write(summary)
-        return summary, report_path
-    except Exception as e:
-        return f"❌ Error: {str(e)}", None
 if __name__ == "__main__":
     agent = init_agent()
-    gr.Interface(
-        fn=handle_analysis,
-        inputs=gr.File(file_types=[".pdf", ".csv", ".xlsx"]),
-        outputs=[gr.Textbox(label="Summary"), gr.File(label="Download Report")]
-    ).queue().launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)

             seen.add(clean_p)
     return "\n\n".join(unique_paragraphs)
+# === FastAPI for mobile API endpoint ===
+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import JSONResponse
+import uvicorn
+app = FastAPI()
+@app.post("/analyze")
+async def analyze_file_api(file: UploadFile = File(...)):
+    agent = init_agent()
+    temp_file_path = os.path.join(file_cache_dir, file.filename)
+    with open(temp_file_path, "wb") as f:
+        f.write(await file.read())
     messages = []
+    messages, pdf_path = process_report(agent, open(temp_file_path, "rb"), messages)
+    if pdf_path:
+        return JSONResponse(content={"summary": messages[-2]['content'], "pdf": pdf_path})
+    return JSONResponse(content={"error": "Processing failed."}, status_code=400)
+# === Original Gradio UI launch preserved ===
 if __name__ == "__main__":
     agent = init_agent()
+    ui = create_ui(agent)
+    import threading
+    threading.Thread(target=lambda: ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)).start()
+    uvicorn.run(app, host="0.0.0.0", port=8000)