mhattingpete commited on
Commit
652eb00
·
1 Parent(s): fb8728b

added reasoning

Browse files
Files changed (3) hide show
  1. agent.py +17 -8
  2. requirements.txt +1 -0
  3. src/tools/reasoning.py +195 -0
agent.py CHANGED
@@ -8,13 +8,17 @@ from smolagents import (
8
  CodeAgent,
9
  GoogleSearchTool,
10
  PythonInterpreterTool,
 
11
  VisitWebpageTool,
12
  )
13
 
14
  from src.file_handler.parse import parse_file
15
- from src.tools import reverse_question
 
 
16
 
17
  load_dotenv()
 
18
 
19
 
20
  class Agent:
@@ -25,10 +29,13 @@ class Agent:
25
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
26
  api_version=os.getenv("OPENAI_API_VERSION"),
27
  )
 
28
  tools = [
29
  GoogleSearchTool(provider="serper"),
30
  VisitWebpageTool(),
31
  PythonInterpreterTool(),
 
 
32
  reverse_question,
33
  ]
34
  self.agent = CodeAgent(
@@ -37,12 +44,15 @@ class Agent:
37
  )
38
  self.user_prompt = """
39
  I will ask you a question.
40
- Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
41
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
42
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
43
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
44
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
45
 
 
 
 
 
46
  Question: {question}
47
 
48
  Attached content: {content}
@@ -56,7 +66,7 @@ class Agent:
56
  f"Agent received question (first 50 chars): {question[:50]}..."
57
  )
58
  images = None
59
- prompt = self.user_prompt.format(question=question)
60
 
61
  if file_name:
62
  content = parse_file(task_id, file_name, api_url)
@@ -66,13 +76,12 @@ class Agent:
66
  ): # Parse content as image
67
  images = [content]
68
  else: # Append content to question
69
- prompt = prompt.format(content=content)
70
  logger.info(f"Question with content: {question}")
71
- else:
72
- prompt = prompt.format(content="")
73
 
74
  answer = self.agent.run(prompt, images=images)
75
- answer = answer.replace("FINAL ANSWER:", "").strip()
76
  logger.info(f"Agent returning answer: {answer}")
77
  return answer
78
 
@@ -93,4 +102,4 @@ if __name__ == "__main__":
93
  f"Task ID: {task_id}\nQuestion: {question}\nFile Name: {file_name}\n\n"
94
  )
95
 
96
- answer = agent(question, file_name)
 
8
  CodeAgent,
9
  GoogleSearchTool,
10
  PythonInterpreterTool,
11
+ SpeechToTextTool,
12
  VisitWebpageTool,
13
  )
14
 
15
  from src.file_handler.parse import parse_file
16
+ from src.tools.reasoning import ReasoningToolkit
17
+ from src.tools.reverse_question import reverse_question
18
+ from src.tracing import add_tracing
19
 
20
  load_dotenv()
21
+ add_tracing()
22
 
23
 
24
  class Agent:
 
29
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
30
  api_version=os.getenv("OPENAI_API_VERSION"),
31
  )
32
+ reasoning_toolkit = ReasoningToolkit()
33
  tools = [
34
  GoogleSearchTool(provider="serper"),
35
  VisitWebpageTool(),
36
  PythonInterpreterTool(),
37
+ SpeechToTextTool(),
38
+ *reasoning_toolkit.tools,
39
  reverse_question,
40
  ]
41
  self.agent = CodeAgent(
 
44
  )
45
  self.user_prompt = """
46
  I will ask you a question.
 
47
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
48
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
49
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
50
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
51
 
52
+ You MUST use the following tools:
53
+ - think, used before all other tool call and before the final answer
54
+ - analyze, used after all other tool call and before the final answer
55
+
56
  Question: {question}
57
 
58
  Attached content: {content}
 
66
  f"Agent received question (first 50 chars): {question[:50]}..."
67
  )
68
  images = None
69
+ content = ""
70
 
71
  if file_name:
72
  content = parse_file(task_id, file_name, api_url)
 
76
  ): # Parse content as image
77
  images = [content]
78
  else: # Append content to question
 
79
  logger.info(f"Question with content: {question}")
80
+
81
+ prompt = self.user_prompt.format(question=question, content=content)
82
 
83
  answer = self.agent.run(prompt, images=images)
84
+ answer = str(answer).replace("FINAL ANSWER:", "").strip()
85
  logger.info(f"Agent returning answer: {answer}")
86
  return answer
87
 
 
102
  f"Task ID: {task_id}\nQuestion: {question}\nFile Name: {file_name}\n\n"
103
  )
104
 
105
+ answer = agent(question, task_id, file_name, api_url)
requirements.txt CHANGED
@@ -11,3 +11,4 @@ pillow
11
  python-docx
12
  requests
13
  smolagents[openai]
 
 
11
  python-docx
12
  requests
13
  smolagents[openai]
14
+ transformers
src/tools/reasoning.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from textwrap import dedent
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from smolagents.tools import Tool # SmolAgents base class
5
+
6
+
7
+ # ---------------------------------------------------------------------
8
+ # Helper enum – kept as str literals so we avoid any Agno dependency.
9
+ # ---------------------------------------------------------------------
10
+ class NextAction:
11
+ CONTINUE = "continue"
12
+ VALIDATE = "validate"
13
+ FINAL_ANSWER = "final_answer"
14
+
15
+
16
+ # ---------------------------------------------------------------------
17
+ # THINK TOOL -----------------------------------------------------------
18
+ # ---------------------------------------------------------------------
19
+ class ThinkTool(Tool):
20
+ name = "think"
21
+ description = (
22
+ "Internal scratch‑pad. Use this to reason step‑by‑step before "
23
+ "calling other tools or replying to the user."
24
+ )
25
+ inputs = {
26
+ "title": {"type": "string", "description": "Concise title"},
27
+ "thought": {"type": "string", "description": "Detailed reasoning"},
28
+ "action": {
29
+ "type": "string",
30
+ "description": "Intended next action",
31
+ "nullable": True,
32
+ },
33
+ "confidence": {
34
+ "type": "number",
35
+ "description": "Confidence 0–1",
36
+ "nullable": True,
37
+ },
38
+ "run_id": {
39
+ "type": "string",
40
+ "description": "Execution identifier",
41
+ "nullable": True,
42
+ },
43
+ }
44
+ output_type = "string"
45
+
46
+ def __init__(self):
47
+ super().__init__()
48
+ self._history: Dict[str, List[Dict[str, Any]]] = {}
49
+
50
+ def forward( # noqa: N802 (SmolAgents allows camelCase here)
51
+ self,
52
+ title: str,
53
+ thought: str,
54
+ action: Optional[str] = None,
55
+ confidence: float = 0.8,
56
+ run_id: str = "default",
57
+ ) -> str:
58
+ """Store and pretty‑print reasoning history."""
59
+ step = {
60
+ "title": title,
61
+ "reasoning": thought,
62
+ "action": action,
63
+ "confidence": confidence,
64
+ }
65
+ self._history.setdefault(run_id, []).append(step)
66
+
67
+ # Pretty print full chain so the LLM can “see” prior steps
68
+ formatted = ""
69
+ for idx, s in enumerate(self._history[run_id], 1):
70
+ formatted += (
71
+ dedent(
72
+ f"""\
73
+ Step {idx}:
74
+ Title: {s["title"]}
75
+ Reasoning: {s["reasoning"]}
76
+ Action: {s["action"]}
77
+ Confidence: {s["confidence"]}
78
+ """
79
+ )
80
+ + "\n"
81
+ )
82
+ return formatted.strip()
83
+
84
+
85
+ # ---------------------------------------------------------------------
86
+ # ANALYZE TOOL ---------------------------------------------------------
87
+ # ---------------------------------------------------------------------
88
+ class AnalyzeTool(Tool):
89
+ name = "analyze"
90
+ description = (
91
+ "Evaluate the result of previous actions and decide whether to "
92
+ "continue, validate, or provide a final answer. "
93
+ )
94
+ inputs = {
95
+ "title": {"type": "string", "description": "Concise title"},
96
+ "result": {"type": "string", "description": "Outcome being analysed"},
97
+ "analysis": {"type": "string", "description": "Your analysis"},
98
+ "next_action": {
99
+ "type": "string",
100
+ "description": "'continue' | 'validate' | 'final_answer'",
101
+ "nullable": True,
102
+ },
103
+ "confidence": {
104
+ "type": "number",
105
+ "description": "Confidence 0–1",
106
+ "nullable": True,
107
+ },
108
+ "run_id": {
109
+ "type": "string",
110
+ "description": "Execution identifier",
111
+ "nullable": True,
112
+ },
113
+ }
114
+ output_type = "string"
115
+
116
+ def __init__(self):
117
+ super().__init__()
118
+ self._history: Dict[str, List[Dict[str, Any]]] = {}
119
+
120
+ def forward(
121
+ self,
122
+ title: str,
123
+ result: str,
124
+ analysis: str,
125
+ next_action: str = NextAction.CONTINUE,
126
+ confidence: float = 0.8,
127
+ run_id: str = "default",
128
+ ) -> str:
129
+ if next_action not in {
130
+ NextAction.CONTINUE,
131
+ NextAction.VALIDATE,
132
+ NextAction.FINAL_ANSWER,
133
+ }:
134
+ raise ValueError(
135
+ f"next_action must be one of "
136
+ f"{NextAction.CONTINUE}, {NextAction.VALIDATE}, "
137
+ f"{NextAction.FINAL_ANSWER}"
138
+ )
139
+
140
+ step = {
141
+ "title": title,
142
+ "result": result,
143
+ "reasoning": analysis,
144
+ "next_action": next_action,
145
+ "confidence": confidence,
146
+ }
147
+ self._history.setdefault(run_id, []).append(step)
148
+
149
+ formatted = ""
150
+ for idx, s in enumerate(self._history[run_id], 1):
151
+ formatted += (
152
+ dedent(
153
+ f"""\
154
+ Step {idx}:
155
+ Title: {s["title"]}
156
+ Result: {s.get("result")}
157
+ Reasoning: {s["reasoning"]}
158
+ Next action: {s.get("next_action")}
159
+ Confidence: {s["confidence"]}
160
+ """
161
+ )
162
+ + "\n"
163
+ )
164
+ return formatted.strip()
165
+
166
+
167
+ # ---------------------------------------------------------------------
168
+ # TOOLKIT WRAPPER ------------------------------------------------------
169
+ # ---------------------------------------------------------------------
170
+ class ReasoningToolkit:
171
+ """
172
+ Convenience wrapper so you can write:
173
+
174
+ from reasoning_tools import ReasoningToolkit
175
+ toolkit = ReasoningToolkit()
176
+ agent = CodeAgent(tools=toolkit.tools, model=...)
177
+ """
178
+
179
+ DEFAULT_INSTRUCTIONS = dedent(
180
+ """\
181
+ You have access to two internal tools – **think** and **analyze** –
182
+ for chain‑of‑thought reasoning. **Always** call `think` before
183
+ external tool calls or final answers, then call `analyze` to
184
+ decide whether to continue, validate, or finish."""
185
+ )
186
+
187
+ def __init__(self, think: bool = True, analyze: bool = True):
188
+ self.tools: List[Tool] = []
189
+ if think:
190
+ self.tools.append(ThinkTool())
191
+ if analyze:
192
+ self.tools.append(AnalyzeTool())
193
+
194
+ def with_instructions(self, extra: str | None = None) -> str:
195
+ return self.DEFAULT_INSTRUCTIONS + ("\n" + extra if extra else "")