CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

65e7d58

verified ·

1 Parent(s): 422a1e2

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -55

app.py CHANGED Viewed

@@ -96,16 +96,12 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
         return f"PDF processing error: {str(e)}"
 def excel_to_json(file_path: str) -> List[Dict]:
-    """Convert Excel file to JSON with optimized processing"""
     try:
-        # First try with openpyxl (faster for xlsx)
         try:
             df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
         except Exception:
-            # Fall back to xlrd if needed
             df = pd.read_excel(file_path, engine='xlrd', header=None, dtype=str)
-        # Convert to list of lists with null handling
         content = df.where(pd.notnull(df), "").astype(str).values.tolist()
         return [{
@@ -118,9 +114,7 @@ def excel_to_json(file_path: str) -> List[Dict]:
         return [{"error": f"Error processing Excel file: {str(e)}"}]
 def csv_to_json(file_path: str) -> List[Dict]:
-    """Convert CSV file to JSON with optimized processing"""
     try:
-        # Read CSV in chunks if large
         chunks = []
         for chunk in pd.read_csv(
             file_path,
@@ -145,7 +139,6 @@ def csv_to_json(file_path: str) -> List[Dict]:
         return [{"error": f"Error processing CSV file: {str(e)}"}]
 def process_file(file_path: str, file_type: str) -> List[Dict]:
-    """Process file based on type and return JSON data"""
     try:
         if file_type == "pdf":
             text = extract_all_pages(file_path)
@@ -166,7 +159,6 @@ def process_file(file_path: str, file_type: str) -> List[Dict]:
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
 def tokenize_and_chunk(text: str, max_tokens: int = 1800) -> List[str]:
-    """Split text into chunks based on token count"""
     tokens = tokenizer.encode(text)
     chunks = []
     for i in range(0, len(tokens), max_tokens):
@@ -281,7 +273,6 @@ def init_agent():
     return agent
 def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict, None, None]:
-    """Process a single prompt and stream the response"""
     full_response = ""
     for chunk_output in agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
         if chunk_output is None:
@@ -302,20 +293,17 @@ def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict,
     return full_response
-def analyze(message: str, history: List[dict], files: List) -> Generator[Dict[str, Any], None, None]:
     # Initialize outputs
-    outputs = {
-        "chatbot": history.copy(),
-        "download_output": None,
-        "final_summary": "",
-        "progress_text": {"value": "Starting analysis...", "visible": True}
-    }
     try:
         # Start with user message
-        history.append({"role": "user", "content": message})
-        outputs["chatbot"] = history
-        yield outputs
         extracted = []
         file_hash_value = ""
@@ -331,19 +319,16 @@ def analyze(message: str, history: List[dict], files: List) -> Generator[Dict[st
                 for i, future in enumerate(as_completed(futures), 1):
                     try:
                         extracted.extend(future.result())
-                        outputs["progress_text"] = update_progress(i, len(files), "Processing files")
-                        yield outputs
                     except Exception as e:
                         logger.error(f"File processing error: {e}")
                         extracted.append({"error": f"Error processing file: {str(e)}"})
             file_hash_value = file_hash(files[0].name) if files else ""
-            history.append({"role": "assistant", "content": "✅ File processing complete"})
-            outputs.update({
-                "chatbot": history,
-                "progress_text": update_progress(len(files), len(files), "Files processed")
-            })
-            yield outputs
         # Convert extracted data to JSON text
         text_content = "\n".join(json.dumps(item) for item in extracted)
@@ -361,23 +346,17 @@ Patient Record Excerpt (Chunk {chunk_idx} of {len(chunks)}):
 """
             # Create a placeholder message
-            history.append({"role": "assistant", "content": ""})
-            outputs.update({
-                "chatbot": history,
-                "progress_text": update_progress(chunk_idx, len(chunks), "Analyzing")
-            })
-            yield outputs
             # Process and stream the response
             chunk_response = ""
-            for update in process_response_stream(prompt, history):
-                history[-1] = update
                 chunk_response = update["content"]
-                outputs.update({
-                    "chatbot": history,
-                    "progress_text": update_progress(chunk_idx, len(chunks), "Analyzing")
-                })
-                yield outputs
             combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
@@ -386,28 +365,22 @@ Patient Record Excerpt (Chunk {chunk_idx} of {len(chunks)}):
             gc.collect()
         # Generate final summary
-        summary = summarize_findings(combined_response)
         report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
         if report_path:
             with open(report_path, "w", encoding="utf-8") as f:
-                f.write(combined_response + "\n\n" + summary)
-        outputs.update({
-            "download_output": report_path if report_path and os.path.exists(report_path) else None,
-            "final_summary": summary,
-            "progress_text": {"visible": False}
-        })
-        yield outputs
     except Exception as e:
         logger.error("Analysis error: %s", e)
-        history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
-        outputs.update({
-            "chatbot": history,
-            "final_summary": f"Error occurred during analysis: {str(e)}",
-            "progress_text": {"visible": False}
-        })
-        yield outputs
 def clear_and_start():
     return [
@@ -433,6 +406,7 @@ def create_ui(agent):
                         "assets/user.png",
                         "assets/assistant.png"
                     ) if os.path.exists("assets/user.png") else None,
                     render=False
                 )
             with gr.Column(scale=1):

         return f"PDF processing error: {str(e)}"
 def excel_to_json(file_path: str) -> List[Dict]:
     try:
         try:
             df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
         except Exception:
             df = pd.read_excel(file_path, engine='xlrd', header=None, dtype=str)
         content = df.where(pd.notnull(df), "").astype(str).values.tolist()
         return [{
         return [{"error": f"Error processing Excel file: {str(e)}"}]
 def csv_to_json(file_path: str) -> List[Dict]:
     try:
         chunks = []
         for chunk in pd.read_csv(
             file_path,
         return [{"error": f"Error processing CSV file: {str(e)}"}]
 def process_file(file_path: str, file_type: str) -> List[Dict]:
     try:
         if file_type == "pdf":
             text = extract_all_pages(file_path)
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
 def tokenize_and_chunk(text: str, max_tokens: int = 1800) -> List[str]:
     tokens = tokenizer.encode(text)
     chunks = []
     for i in range(0, len(tokens), max_tokens):
     return agent
 def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict, None, None]:
     full_response = ""
     for chunk_output in agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
         if chunk_output is None:
     return full_response
+def analyze(message: str, history: List[dict], files: List) -> Generator[tuple, None, None]:
     # Initialize outputs
+    chatbot_output = history.copy()
+    download_output = None
+    final_summary = ""
+    progress_text = {"value": "Starting analysis...", "visible": True}
     try:
         # Start with user message
+        chatbot_output.append({"role": "user", "content": message})
+        yield (chatbot_output, download_output, final_summary, progress_text)
         extracted = []
         file_hash_value = ""
                 for i, future in enumerate(as_completed(futures), 1):
                     try:
                         extracted.extend(future.result())
+                        progress_text = update_progress(i, len(files), "Processing files")
+                        yield (chatbot_output, download_output, final_summary, progress_text)
                     except Exception as e:
                         logger.error(f"File processing error: {e}")
                         extracted.append({"error": f"Error processing file: {str(e)}"})
             file_hash_value = file_hash(files[0].name) if files else ""
+            chatbot_output.append({"role": "assistant", "content": "✅ File processing complete"})
+            progress_text = update_progress(len(files), len(files), "Files processed")
+            yield (chatbot_output, download_output, final_summary, progress_text)
         # Convert extracted data to JSON text
         text_content = "\n".join(json.dumps(item) for item in extracted)
 """
             # Create a placeholder message
+            chatbot_output.append({"role": "assistant", "content": ""})
+            progress_text = update_progress(chunk_idx, len(chunks), "Analyzing")
+            yield (chatbot_output, download_output, final_summary, progress_text)
             # Process and stream the response
             chunk_response = ""
+            for update in process_response_stream(prompt, chatbot_output):
+                chatbot_output[-1] = update
                 chunk_response = update["content"]
+                progress_text = update_progress(chunk_idx, len(chunks), "Analyzing")
+                yield (chatbot_output, download_output, final_summary, progress_text)
             combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
             gc.collect()
         # Generate final summary
+        final_summary = summarize_findings(combined_response)
         report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
         if report_path:
             with open(report_path, "w", encoding="utf-8") as f:
+                f.write(combined_response + "\n\n" + final_summary)
+        download_output = report_path if report_path and os.path.exists(report_path) else None
+        progress_text = {"visible": False}
+        yield (chatbot_output, download_output, final_summary, progress_text)
     except Exception as e:
         logger.error("Analysis error: %s", e)
+        chatbot_output.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
+        final_summary = f"Error occurred during analysis: {str(e)}"
+        progress_text = {"visible": False}
+        yield (chatbot_output, download_output, final_summary, progress_text)
 def clear_and_start():
     return [
                         "assets/user.png",
                         "assets/assistant.png"
                     ) if os.path.exists("assets/user.png") else None,
+                    type="messages",  # Use openai-style messages
                     render=False
                 )
             with gr.Column(scale=1):