CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

e0669ce

verified ·

1 Parent(s): f5365bc

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +53 -23

src/txagent/txagent.py CHANGED Viewed

@@ -13,6 +13,7 @@ from gradio import ChatMessage
 from .toolrag import ToolRAGModel
 import torch
 import logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -26,15 +27,15 @@ class TxAgent:
                  enable_finish=True,
                  enable_rag=True,
                  enable_summary=False,
-                 init_rag_num=2,  # Reduced for faster initial tool selection
-                 step_rag_num=4,  # Reduced for fewer RAG calls
                  summary_mode='step',
                  summary_skip_last_k=0,
                  summary_context_length=None,
                  force_finish=True,
                  avoid_repeat=True,
                  seed=None,
-                 enable_checker=False,  # Disabled by default for speed
                  enable_chat=False,
                  additional_default_tools=None):
         self.model_name = model_name
@@ -78,7 +79,7 @@ class TxAgent:
         if model_name:
             self.model_name = model_name
-        self.model = LLM(model=self.model_name, dtype="float16")  # Enable FP16
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         logger.info("Model %s loaded successfully", self.model_name)
@@ -101,16 +102,17 @@ class TxAgent:
     def initialize_tools_prompt(self, call_agent, call_agent_level, message):
         picked_tools_prompt = []
-        picked_tools_prompt = self.add_special_tools(
-            picked_tools_prompt, call_agent=call_agent)
-        if call_agent:
-            call_agent_level += 1
-            if call_agent_level >= 2:
-                call_agent = False
-        if not call_agent and self.enable_rag:
-            picked_tools_prompt += self.tool_RAG(
-                message=message, rag_num=self.init_rag_num)
         return picked_tools_prompt, call_agent_level
     def initialize_conversation(self, message, conversation=None, history=None):
@@ -129,7 +131,7 @@ class TxAgent:
     def tool_RAG(self, message=None, picked_tool_names=None,
                  existing_tools_prompt=None, rag_num=4, return_call_result=False):
-        extra_factor = 10  # Reduced from 30 for efficiency
         if picked_tool_names is None:
             picked_tool_names = self.rag_infer(message, top_k=rag_num * extra_factor)
@@ -148,10 +150,10 @@ class TxAgent:
         if self.enable_finish:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
             logger.debug("Finish tool added")
-        if call_agent:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('CallAgent', return_prompt=True))
             logger.debug("CallAgent tool added")
-        elif self.enable_rag:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Tool_RAG', return_prompt=True))
             logger.debug("Tool_RAG tool added")
         if self.additional_default_tools:
@@ -301,7 +303,7 @@ class TxAgent:
         return output
     def run_multistep_agent(self, message: str, temperature: float, max_new_tokens: int,
-                            max_token: int, max_round: int = 10, call_agent=False, call_agent_level=0):
         logger.debug("Starting multistep agent for message: %s", message[:100])
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
@@ -317,6 +319,10 @@ class TxAgent:
         if self.enable_checker:
             checker = ReasoningTraceChecker(message, conversation)
         while next_round and current_round < max_round:
             current_round += 1
             if last_outputs:
@@ -349,9 +355,11 @@ class TxAgent:
                     logger.warning("Checker error: %s", wrong_info)
                     break
             last_outputs = []
             last_outputs_str, token_overflow = self.llm_infer(
-                messages=conversation, temperature=temperature, tools=picked_tools_prompt,
                 max_new_tokens=max_new_tokens, max_token=max_token, check_token_status=True)
             if last_outputs_str is None:
                 if self.force_finish:
@@ -374,7 +382,22 @@ class TxAgent:
                 m['content'] for m in messages[-3:] if m['role'] == 'assistant'
             ][:2]
             forbidden_ids = [tokenizer.encode(msg, add_special_tokens=False) for msg in assistant_messages]
-            return [NoRepeatSentenceProcessor(forbidden_ids, 5)]
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None, output_begin_string=None,
@@ -407,7 +430,7 @@ class TxAgent:
         output = model.generate(prompt, sampling_params=sampling_params)
         output = output[0].outputs[0].text
         logger.debug("Inference output: %s", output[:100])
-        torch.cuda.empty_cache()  # Clear CUDA cache
         if check_token_status:
             return output, False
         return output
@@ -544,7 +567,7 @@ Summarize the function responses in one sentence with all necessary information.
     def run_gradio_chat(self, message: str, history: list, temperature: float,
                         max_new_tokens: int, max_token: int, call_agent: bool,
-                        conversation: gr.State, max_round: int = 10, seed: int = None,
                         call_agent_level: int = 0, sub_agent_task: str = None,
                         uploaded_files: list = None):
         logger.debug("Chat started, message: %s", message[:100])
@@ -555,6 +578,11 @@ Summarize the function responses in one sentence with all necessary information.
         if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
             return
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
         conversation = self.initialize_conversation(
@@ -612,8 +640,10 @@ Summarize the function responses in one sentence with all necessary information.
                         logger.warning("Checker error: %s", wrong_info)
                         break
                 last_outputs_str, token_overflow = self.llm_infer(
-                    messages=conversation, temperature=temperature, tools=picked_tools_prompt,
                     max_new_tokens=max_new_tokens, max_token=max_token, seed=seed, check_token_status=True)
                 if last_outputs_str is None:

 from .toolrag import ToolRAGModel
 import torch
 import logging
+from difflib import SequenceMatcher
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
                  enable_finish=True,
                  enable_rag=True,
                  enable_summary=False,
+                 init_rag_num=2,
+                 step_rag_num=4,
                  summary_mode='step',
                  summary_skip_last_k=0,
                  summary_context_length=None,
                  force_finish=True,
                  avoid_repeat=True,
                  seed=None,
+                 enable_checker=False,
                  enable_chat=False,
                  additional_default_tools=None):
         self.model_name = model_name
         if model_name:
             self.model_name = model_name
+        self.model = LLM(model=self.model_name, dtype="float16")
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         logger.info("Model %s loaded successfully", self.model_name)
     def initialize_tools_prompt(self, call_agent, call_agent_level, message):
         picked_tools_prompt = []
+        # Only add Finish tool unless prompt explicitly requires Tool_RAG or CallAgent
+        if "use external tools" not in message.lower():
+            picked_tools_prompt = self.add_special_tools(picked_tools_prompt, call_agent=False)
+        else:
+            picked_tools_prompt = self.add_special_tools(picked_tools_prompt, call_agent=call_agent)
+            if call_agent:
+                call_agent_level += 1
+                if call_agent_level >= 2:
+                    call_agent = False
+            if self.enable_rag:
+                picked_tools_prompt += self.tool_RAG(message=message, rag_num=self.init_rag_num)
         return picked_tools_prompt, call_agent_level
     def initialize_conversation(self, message, conversation=None, history=None):
     def tool_RAG(self, message=None, picked_tool_names=None,
                  existing_tools_prompt=None, rag_num=4, return_call_result=False):
+        extra_factor = 10
         if picked_tool_names is None:
             picked_tool_names = self.rag_infer(message, top_k=rag_num * extra_factor)
         if self.enable_finish:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
             logger.debug("Finish tool added")
+        if call_agent and "use external tools" in self.prompt_multi_step.lower():
             tools.append(self.tooluniverse.get_one_tool_by_one_name('CallAgent', return_prompt=True))
             logger.debug("CallAgent tool added")
+        elif self.enable_rag and "use external tools" in self.prompt_multi_step.lower():
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Tool_RAG', return_prompt=True))
             logger.debug("Tool_RAG tool added")
         if self.additional_default_tools:
         return output
     def run_multistep_agent(self, message: str, temperature: float, max_new_tokens: int,
+                            max_token: int, max_round: int = 3, call_agent=False, call_agent_level=0):
         logger.debug("Starting multistep agent for message: %s", message[:100])
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
         if self.enable_checker:
             checker = ReasoningTraceChecker(message, conversation)
+        # Check if message contains clinical findings
+        clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
+        has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
         while next_round and current_round < max_round:
             current_round += 1
             if last_outputs:
                     logger.warning("Checker error: %s", wrong_info)
                     break
+            # Skip tool calls if clinical data is present
+            tools = [] if has_clinical_data else picked_tools_prompt
             last_outputs = []
             last_outputs_str, token_overflow = self.llm_infer(
+                messages=conversation, temperature=temperature, tools=tools,
                 max_new_tokens=max_new_tokens, max_token=max_token, check_token_status=True)
             if last_outputs_str is None:
                 if self.force_finish:
                 m['content'] for m in messages[-3:] if m['role'] == 'assistant'
             ][:2]
             forbidden_ids = [tokenizer.encode(msg, add_special_tokens=False) for msg in assistant_messages]
+            # Enhance deduplication with similarity check
+            unique_sentences = []
+            for msg in assistant_messages:
+                sentences = msg.split('. ')
+                for s in sentences:
+                    if not s:
+                        continue
+                    is_unique = True
+                    for seen_s in unique_sentences:
+                        if SequenceMatcher(None, s.lower(), seen_s.lower()).ratio() > 0.9:
+                            is_unique = False
+                            break
+                    if is_unique:
+                        unique_sentences.append(s)
+            forbidden_ids = [tokenizer.encode(s, add_special_tokens=False) for s in unique_sentences]
+            return [NoRepeatSentenceProcessor(forbidden_ids, 10)]  # Increased penalty
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None, output_begin_string=None,
         output = model.generate(prompt, sampling_params=sampling_params)
         output = output[0].outputs[0].text
         logger.debug("Inference output: %s", output[:100])
+        torch.cuda.empty_cache()
         if check_token_status:
             return output, False
         return output
     def run_gradio_chat(self, message: str, history: list, temperature: float,
                         max_new_tokens: int, max_token: int, call_agent: bool,
+                        conversation: gr.State, max_round: int = 3, seed: int = None,
                         call_agent_level: int = 0, sub_agent_task: str = None,
                         uploaded_files: list = None):
         logger.debug("Chat started, message: %s", message[:100])
         if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
             return
+        # Check if message contains clinical findings
+        clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
+        has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
+        call_agent = call_agent and not has_clinical_data  # Disable CallAgent for clinical data
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
         conversation = self.initialize_conversation(
                         logger.warning("Checker error: %s", wrong_info)
                         break
+                # Skip tool calls if clinical data is present
+                tools = [] if has_clinical_data else picked_tools_prompt
                 last_outputs_str, token_overflow = self.llm_infer(
+                    messages=conversation, temperature=temperature, tools=tools,
                     max_new_tokens=max_new_tokens, max_token=max_token, seed=seed, check_token_status=True)
                 if last_outputs_str is None: