Final_Assignment_Submission

Running

App Files Files Community

mhattingpete commited on 1 day ago

Commit

652eb00

1 Parent(s): fb8728b

added reasoning

Browse files

Files changed (3) hide show

agent.py +17 -8
requirements.txt +1 -0
src/tools/reasoning.py +195 -0

agent.py CHANGED Viewed

@@ -8,13 +8,17 @@ from smolagents import (
     CodeAgent,
     GoogleSearchTool,
     PythonInterpreterTool,
     VisitWebpageTool,
 )
 from src.file_handler.parse import parse_file
-from src.tools import reverse_question
 load_dotenv()
 class Agent:
@@ -25,10 +29,13 @@ class Agent:
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
             api_version=os.getenv("OPENAI_API_VERSION"),
         )
         tools = [
             GoogleSearchTool(provider="serper"),
             VisitWebpageTool(),
             PythonInterpreterTool(),
             reverse_question,
         ]
         self.agent = CodeAgent(
@@ -37,12 +44,15 @@ class Agent:
         )
         self.user_prompt = """
         I will ask you a question.
-        Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
         Question: {question}
         Attached content: {content}
@@ -56,7 +66,7 @@ class Agent:
             f"Agent received question (first 50 chars): {question[:50]}..."
         )
         images = None
-        prompt = self.user_prompt.format(question=question)
         if file_name:
             content = parse_file(task_id, file_name, api_url)
@@ -66,13 +76,12 @@ class Agent:
                 ):  # Parse content as image
                     images = [content]
                 else:  # Append content to question
-                    prompt = prompt.format(content=content)
                     logger.info(f"Question with content: {question}")
-        else:
-            prompt = prompt.format(content="")
         answer = self.agent.run(prompt, images=images)
-        answer = answer.replace("FINAL ANSWER:", "").strip()
         logger.info(f"Agent returning answer: {answer}")
         return answer
@@ -93,4 +102,4 @@ if __name__ == "__main__":
         f"Task ID: {task_id}\nQuestion: {question}\nFile Name: {file_name}\n\n"
     )
-    answer = agent(question, file_name)

     CodeAgent,
     GoogleSearchTool,
     PythonInterpreterTool,
+    SpeechToTextTool,
     VisitWebpageTool,
 )
 from src.file_handler.parse import parse_file
+from src.tools.reasoning import ReasoningToolkit
+from src.tools.reverse_question import reverse_question
+from src.tracing import add_tracing
 load_dotenv()
+add_tracing()
 class Agent:
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
             api_version=os.getenv("OPENAI_API_VERSION"),
         )
+        reasoning_toolkit = ReasoningToolkit()
         tools = [
             GoogleSearchTool(provider="serper"),
             VisitWebpageTool(),
             PythonInterpreterTool(),
+            SpeechToTextTool(),
+            *reasoning_toolkit.tools,
             reverse_question,
         ]
         self.agent = CodeAgent(
         )
         self.user_prompt = """
         I will ask you a question.
         YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
         If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        You MUST use the following tools:
+        - think, used before all other tool call and before the final answer
+        - analyze, used after all other tool call and before the final answer
         Question: {question}
         Attached content: {content}
             f"Agent received question (first 50 chars): {question[:50]}..."
         )
         images = None
+        content = ""
         if file_name:
             content = parse_file(task_id, file_name, api_url)
                 ):  # Parse content as image
                     images = [content]
                 else:  # Append content to question
                     logger.info(f"Question with content: {question}")
+        prompt = self.user_prompt.format(question=question, content=content)
         answer = self.agent.run(prompt, images=images)
+        answer = str(answer).replace("FINAL ANSWER:", "").strip()
         logger.info(f"Agent returning answer: {answer}")
         return answer
         f"Task ID: {task_id}\nQuestion: {question}\nFile Name: {file_name}\n\n"
     )
+    answer = agent(question, task_id, file_name, api_url)

requirements.txt CHANGED Viewed

@@ -11,3 +11,4 @@ pillow
 python-docx
 requests
 smolagents[openai]

 python-docx
 requests
 smolagents[openai]
+transformers

src/tools/reasoning.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from textwrap import dedent
+from typing import Any, Dict, List, Optional
+from smolagents.tools import Tool  # SmolAgents base class
+# ---------------------------------------------------------------------
+# Helper enum – kept as str literals so we avoid any Agno dependency.
+# ---------------------------------------------------------------------
+class NextAction:
+    CONTINUE = "continue"
+    VALIDATE = "validate"
+    FINAL_ANSWER = "final_answer"
+# ---------------------------------------------------------------------
+# THINK TOOL -----------------------------------------------------------
+# ---------------------------------------------------------------------
+class ThinkTool(Tool):
+    name = "think"
+    description = (
+        "Internal scratch‑pad. Use this to reason step‑by‑step before "
+        "calling other tools or replying to the user."
+    )
+    inputs = {
+        "title": {"type": "string", "description": "Concise title"},
+        "thought": {"type": "string", "description": "Detailed reasoning"},
+        "action": {
+            "type": "string",
+            "description": "Intended next action",
+            "nullable": True,
+        },
+        "confidence": {
+            "type": "number",
+            "description": "Confidence 0–1",
+            "nullable": True,
+        },
+        "run_id": {
+            "type": "string",
+            "description": "Execution identifier",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self._history: Dict[str, List[Dict[str, Any]]] = {}
+    def forward(  # noqa: N802  (SmolAgents allows camelCase here)
+        self,
+        title: str,
+        thought: str,
+        action: Optional[str] = None,
+        confidence: float = 0.8,
+        run_id: str = "default",
+    ) -> str:
+        """Store and pretty‑print reasoning history."""
+        step = {
+            "title": title,
+            "reasoning": thought,
+            "action": action,
+            "confidence": confidence,
+        }
+        self._history.setdefault(run_id, []).append(step)
+        # Pretty print full chain so the LLM can “see” prior steps
+        formatted = ""
+        for idx, s in enumerate(self._history[run_id], 1):
+            formatted += (
+                dedent(
+                    f"""\
+                Step {idx}:
+                Title: {s["title"]}
+                Reasoning: {s["reasoning"]}
+                Action: {s["action"]}
+                Confidence: {s["confidence"]}
+                """
+                )
+                + "\n"
+            )
+        return formatted.strip()
+# ---------------------------------------------------------------------
+# ANALYZE TOOL ---------------------------------------------------------
+# ---------------------------------------------------------------------
+class AnalyzeTool(Tool):
+    name = "analyze"
+    description = (
+        "Evaluate the result of previous actions and decide whether to "
+        "continue, validate, or provide a final answer. "
+    )
+    inputs = {
+        "title": {"type": "string", "description": "Concise title"},
+        "result": {"type": "string", "description": "Outcome being analysed"},
+        "analysis": {"type": "string", "description": "Your analysis"},
+        "next_action": {
+            "type": "string",
+            "description": "'continue' | 'validate' | 'final_answer'",
+            "nullable": True,
+        },
+        "confidence": {
+            "type": "number",
+            "description": "Confidence 0–1",
+            "nullable": True,
+        },
+        "run_id": {
+            "type": "string",
+            "description": "Execution identifier",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self._history: Dict[str, List[Dict[str, Any]]] = {}
+    def forward(
+        self,
+        title: str,
+        result: str,
+        analysis: str,
+        next_action: str = NextAction.CONTINUE,
+        confidence: float = 0.8,
+        run_id: str = "default",
+    ) -> str:
+        if next_action not in {
+            NextAction.CONTINUE,
+            NextAction.VALIDATE,
+            NextAction.FINAL_ANSWER,
+        }:
+            raise ValueError(
+                f"next_action must be one of "
+                f"{NextAction.CONTINUE}, {NextAction.VALIDATE}, "
+                f"{NextAction.FINAL_ANSWER}"
+            )
+        step = {
+            "title": title,
+            "result": result,
+            "reasoning": analysis,
+            "next_action": next_action,
+            "confidence": confidence,
+        }
+        self._history.setdefault(run_id, []).append(step)
+        formatted = ""
+        for idx, s in enumerate(self._history[run_id], 1):
+            formatted += (
+                dedent(
+                    f"""\
+                Step {idx}:
+                Title: {s["title"]}
+                Result: {s.get("result")}
+                Reasoning: {s["reasoning"]}
+                Next action: {s.get("next_action")}
+                Confidence: {s["confidence"]}
+                """
+                )
+                + "\n"
+            )
+        return formatted.strip()
+# ---------------------------------------------------------------------
+# TOOLKIT WRAPPER ------------------------------------------------------
+# ---------------------------------------------------------------------
+class ReasoningToolkit:
+    """
+    Convenience wrapper so you can write:
+        from reasoning_tools import ReasoningToolkit
+        toolkit = ReasoningToolkit()
+        agent = CodeAgent(tools=toolkit.tools, model=...)
+    """
+    DEFAULT_INSTRUCTIONS = dedent(
+        """\
+        You have access to two internal tools – **think** and **analyze** –
+        for chain‑of‑thought reasoning. **Always** call `think` before
+        external tool calls or final answers, then call `analyze` to
+        decide whether to continue, validate, or finish."""
+    )
+    def __init__(self, think: bool = True, analyze: bool = True):
+        self.tools: List[Tool] = []
+        if think:
+            self.tools.append(ThinkTool())
+        if analyze:
+            self.tools.append(AnalyzeTool())
+    def with_instructions(self, extra: str | None = None) -> str:
+        return self.DEFAULT_INSTRUCTIONS + ("\n" + extra if extra else "")