ffreemt commited on
Commit
0a72279
·
1 Parent(s): 76ac5ae

Update app.py

Browse files
Files changed (2) hide show
  1. app-.py +320 -0
  2. app.py +306 -165
app-.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ import threading
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import List, Optional
9
+
10
+ import datasets
11
+ import pandas as pd
12
+ from dotenv import load_dotenv
13
+ from huggingface_hub import login
14
+ import gradio as gr
15
+
16
+ from scripts.reformulator import prepare_response
17
+ from scripts.run_agents import (
18
+ get_single_file_description,
19
+ get_zip_description,
20
+ )
21
+ from scripts.text_inspector_tool import TextInspectorTool
22
+ from scripts.text_web_browser import (
23
+ ArchiveSearchTool,
24
+ FinderTool,
25
+ FindNextTool,
26
+ PageDownTool,
27
+ PageUpTool,
28
+ SearchInformationTool,
29
+ SimpleTextBrowser,
30
+ VisitTool,
31
+ )
32
+ from scripts.visual_qa import visualizer
33
+ from tqdm import tqdm
34
+
35
+ from smolagents import (
36
+ # MANAGED_AGENT_PROMPT,
37
+ CodeAgent,
38
+ HfApiModel,
39
+ LiteLLMModel,
40
+ Model,
41
+ ToolCallingAgent,
42
+ )
43
+ from smolagents.agent_types import AgentText, AgentImage, AgentAudio, handle_agent_output_types
44
+ from smolagents.gradio_ui import pull_messages_from_step # , handle_agent_output_types
45
+
46
+
47
+ AUTHORIZED_IMPORTS = [
48
+ "requests",
49
+ "zipfile",
50
+ "os",
51
+ "pandas",
52
+ "numpy",
53
+ "sympy",
54
+ "json",
55
+ "bs4",
56
+ "pubchempy",
57
+ "xml",
58
+ "yahoo_finance",
59
+ "Bio",
60
+ "sklearn",
61
+ "scipy",
62
+ "pydub",
63
+ "io",
64
+ "PIL",
65
+ "chess",
66
+ "PyPDF2",
67
+ "pptx",
68
+ "torch",
69
+ "datetime",
70
+ "fractions",
71
+ "csv",
72
+ ]
73
+ load_dotenv(override=True)
74
+ login(os.getenv("HF_TOKEN"))
75
+
76
+ append_answer_lock = threading.Lock()
77
+
78
+ SET = "validation"
79
+
80
+ custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
81
+
82
+ # skip
83
+ _ = """
84
+ ### LOAD EVALUATION DATASET
85
+
86
+ eval_ds = datasets.load_dataset("gaia-benchmark/GAIA", "2023_all")[SET]
87
+ eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"})
88
+
89
+
90
+ def preprocess_file_paths(row):
91
+ if len(row["file_name"]) > 0:
92
+ row["file_name"] = f"data/gaia/{SET}/" + row["file_name"]
93
+ return row
94
+
95
+
96
+ eval_ds = eval_ds.map(preprocess_file_paths)
97
+ eval_df = pd.DataFrame(eval_ds)
98
+ print("Loaded evaluation dataset:")
99
+ print(eval_df["task"].value_counts())
100
+ # """
101
+
102
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
103
+
104
+ BROWSER_CONFIG = {
105
+ "viewport_size": 1024 * 5,
106
+ "downloads_folder": "downloads_folder",
107
+ "request_kwargs": {
108
+ "headers": {"User-Agent": user_agent},
109
+ "timeout": 300,
110
+ },
111
+ "serpapi_key": os.getenv("SERPAPI_API_KEY"),
112
+ }
113
+
114
+ os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
115
+
116
+ model = LiteLLMModel(
117
+ # "gpt-4o",
118
+ # os.getenv("MODEL_ID", "gpt-4o-mini"),
119
+ os.getenv("MODEL_ID", "deepseek-ai/DeepSeek-V3"),
120
+ custom_role_conversions=custom_role_conversions,
121
+ api_base=os.getenv("OPENAI_API_BASE"),
122
+ api_key=os.getenv("OPENAI_API_KEY"),
123
+ )
124
+
125
+ text_limit = 20000
126
+ ti_tool = TextInspectorTool(model, text_limit)
127
+
128
+ browser = SimpleTextBrowser(**BROWSER_CONFIG)
129
+
130
+ WEB_TOOLS = [
131
+ SearchInformationTool(browser),
132
+ VisitTool(browser),
133
+ PageUpTool(browser),
134
+ PageDownTool(browser),
135
+ FinderTool(browser),
136
+ FindNextTool(browser),
137
+ ArchiveSearchTool(browser),
138
+ TextInspectorTool(model, text_limit),
139
+ ]
140
+
141
+ agent = CodeAgent(
142
+ model=model,
143
+ tools=[visualizer] + WEB_TOOLS,
144
+ max_steps=5,
145
+ verbosity_level=2,
146
+ additional_authorized_imports=AUTHORIZED_IMPORTS,
147
+ planning_interval=4,
148
+ )
149
+
150
+ document_inspection_tool = TextInspectorTool(model, 20000)
151
+
152
+
153
+ # augmented_question = """You have one question to answer. It is paramount that you provide a correct answer.
154
+ # Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
155
+ # Run verification steps if that's needed, you must make sure you find the correct answer!
156
+ # Here is the task:
157
+ # """ + example["question"]
158
+
159
+ # if example["file_name"]:
160
+ # prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:"
161
+ # prompt_use_files += get_single_file_description(
162
+ # example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool
163
+ # )
164
+ # augmented_question += prompt_use_files
165
+
166
+
167
+ # final_result = agent.run(augmented_question)
168
+
169
+
170
+ def stream_to_gradio(
171
+ agent,
172
+ task: str,
173
+ reset_agent_memory: bool = False,
174
+ additional_args: Optional[dict] = None,
175
+ ):
176
+ """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
177
+ for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
178
+ for message in pull_messages_from_step(
179
+ step_log,
180
+ ):
181
+ yield message
182
+
183
+ final_answer = step_log # Last log is the run's final_answer
184
+ final_answer = handle_agent_output_types(final_answer)
185
+
186
+ if isinstance(final_answer, AgentText):
187
+ yield gr.ChatMessage(
188
+ role="assistant",
189
+ content=f"**Final answer:**\n{final_answer.to_string()}\n",
190
+ )
191
+ elif isinstance(final_answer, AgentImage):
192
+ yield gr.ChatMessage(
193
+ role="assistant",
194
+ content={"path": final_answer.to_string(), "mime_type": "image/png"},
195
+ )
196
+ elif isinstance(final_answer, AgentAudio):
197
+ yield gr.ChatMessage(
198
+ role="assistant",
199
+ content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
200
+ )
201
+ else:
202
+ yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
203
+
204
+
205
+ class GradioUI:
206
+ """A one-line interface to launch your agent in Gradio"""
207
+
208
+ def __init__(self, agent, file_upload_folder: str | None = None):
209
+ self.agent = agent
210
+ self.file_upload_folder = file_upload_folder
211
+ if self.file_upload_folder is not None:
212
+ if not os.path.exists(file_upload_folder):
213
+ os.mkdir(file_upload_folder)
214
+
215
+ def interact_with_agent(self, prompt, messages):
216
+ messages.append(gr.ChatMessage(role="user", content=prompt))
217
+ yield messages
218
+ for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
219
+ messages.append(msg)
220
+ yield messages
221
+ yield messages
222
+
223
+ def upload_file(
224
+ self,
225
+ file,
226
+ file_uploads_log,
227
+ allowed_file_types=[
228
+ "application/pdf",
229
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
230
+ "text/plain",
231
+ ],
232
+ ):
233
+ """
234
+ Handle file uploads, default allowed types are .pdf, .docx, and .txt
235
+ """
236
+ if file is None:
237
+ return gr.Textbox("No file uploaded", visible=True), file_uploads_log
238
+
239
+ try:
240
+ mime_type, _ = mimetypes.guess_type(file.name)
241
+ except Exception as e:
242
+ return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
243
+
244
+ if mime_type not in allowed_file_types:
245
+ return gr.Textbox("File type disallowed", visible=True), file_uploads_log
246
+
247
+ # Sanitize file name
248
+ original_name = os.path.basename(file.name)
249
+ sanitized_name = re.sub(
250
+ r"[^\w\-.]", "_", original_name
251
+ ) # Replace any non-alphanumeric, non-dash, or non-dot characters with underscores
252
+
253
+ type_to_ext = {}
254
+ for ext, t in mimetypes.types_map.items():
255
+ if t not in type_to_ext:
256
+ type_to_ext[t] = ext
257
+
258
+ # Ensure the extension correlates to the mime type
259
+ sanitized_name = sanitized_name.split(".")[:-1]
260
+ sanitized_name.append("" + type_to_ext[mime_type])
261
+ sanitized_name = "".join(sanitized_name)
262
+
263
+ # Save the uploaded file to the specified folder
264
+ file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
265
+ shutil.copy(file.name, file_path)
266
+
267
+ return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
268
+
269
+ def log_user_message(self, text_input, file_uploads_log):
270
+ return (
271
+ text_input
272
+ + (
273
+ f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
274
+ if len(file_uploads_log) > 0
275
+ else ""
276
+ ),
277
+ "",
278
+ )
279
+
280
+ def launch(self, **kwargs):
281
+ with gr.Blocks(theme="ocean", fill_height=True) as demo:
282
+ gr.Markdown("""# open Deep Research - free the AI agents!
283
+
284
+ OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
285
+
286
+ However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
287
+
288
+ You can try a simplified version below. 👇""")
289
+ stored_messages = gr.State([])
290
+ file_uploads_log = gr.State([])
291
+ chatbot = gr.Chatbot(
292
+ label="Open-Deep-Research",
293
+ type="messages",
294
+ avatar_images=(
295
+ None,
296
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
297
+ ),
298
+ # resizable=True, # new version
299
+ resizeable=True,
300
+ scale=1,
301
+ )
302
+ # If an upload folder is provided, enable the upload feature
303
+ if self.file_upload_folder is not None:
304
+ upload_file = gr.File(label="Upload a file")
305
+ upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
306
+ upload_file.change(
307
+ self.upload_file,
308
+ [upload_file, file_uploads_log],
309
+ [upload_status, file_uploads_log],
310
+ )
311
+ text_input = gr.Textbox(lines=1, label="Your request")
312
+ text_input.submit(
313
+ self.log_user_message,
314
+ [text_input, file_uploads_log],
315
+ [stored_messages, text_input],
316
+ ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
317
+
318
+ demo.launch(debug=True, share=True, **kwargs)
319
+
320
+ GradioUI(agent).launch()
app.py CHANGED
@@ -1,23 +1,28 @@
1
- import argparse
2
  import json
 
3
  import os
 
 
4
  import threading
5
- from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from datetime import datetime
7
- from pathlib import Path
8
- from typing import List, Optional
9
 
10
- import datasets
11
- import pandas as pd
12
  from dotenv import load_dotenv
13
  from huggingface_hub import login
14
- import gradio as gr
15
-
16
- from scripts.reformulator import prepare_response
17
- from scripts.run_agents import (
18
- get_single_file_description,
19
- get_zip_description,
20
  )
 
 
 
 
 
 
 
 
21
  from scripts.text_inspector_tool import TextInspectorTool
22
  from scripts.text_web_browser import (
23
  ArchiveSearchTool,
@@ -25,29 +30,20 @@ from scripts.text_web_browser import (
25
  FindNextTool,
26
  PageDownTool,
27
  PageUpTool,
28
- SearchInformationTool,
29
  SimpleTextBrowser,
30
  VisitTool,
31
  )
32
  from scripts.visual_qa import visualizer
33
- from tqdm import tqdm
34
 
35
- from smolagents import (
36
- # MANAGED_AGENT_PROMPT,
37
- CodeAgent,
38
- HfApiModel,
39
- LiteLLMModel,
40
- Model,
41
- ToolCallingAgent,
42
- )
43
- from smolagents.agent_types import AgentText, AgentImage, AgentAudio, handle_agent_output_types
44
- from smolagents.gradio_ui import pull_messages_from_step # , handle_agent_output_types
45
 
 
46
 
 
 
 
47
  AUTHORIZED_IMPORTS = [
48
  "requests",
49
  "zipfile",
50
- "os",
51
  "pandas",
52
  "numpy",
53
  "sympy",
@@ -60,7 +56,6 @@ AUTHORIZED_IMPORTS = [
60
  "sklearn",
61
  "scipy",
62
  "pydub",
63
- "io",
64
  "PIL",
65
  "chess",
66
  "PyPDF2",
@@ -75,30 +70,8 @@ login(os.getenv("HF_TOKEN"))
75
 
76
  append_answer_lock = threading.Lock()
77
 
78
- SET = "validation"
79
-
80
  custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
81
 
82
- # skip
83
- _ = """
84
- ### LOAD EVALUATION DATASET
85
-
86
- eval_ds = datasets.load_dataset("gaia-benchmark/GAIA", "2023_all")[SET]
87
- eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"})
88
-
89
-
90
- def preprocess_file_paths(row):
91
- if len(row["file_name"]) > 0:
92
- row["file_name"] = f"data/gaia/{SET}/" + row["file_name"]
93
- return row
94
-
95
-
96
- eval_ds = eval_ds.map(preprocess_file_paths)
97
- eval_df = pd.DataFrame(eval_ds)
98
- print("Loaded evaluation dataset:")
99
- print(eval_df["task"].value_counts())
100
- # """
101
-
102
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
103
 
104
  BROWSER_CONFIG = {
@@ -113,13 +86,8 @@ BROWSER_CONFIG = {
113
 
114
  os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
115
 
116
- model = LiteLLMModel(
117
- # "gpt-4o",
118
- # os.getenv("MODEL_ID", "gpt-4o-mini"),
119
- os.getenv("MODEL_ID", "deepseek-ai/DeepSeek-V3"),
120
  custom_role_conversions=custom_role_conversions,
121
- api_base=os.getenv("OPENAI_API_BASE"),
122
- api_key=os.getenv("OPENAI_API_KEY"),
123
  )
124
 
125
  text_limit = 20000
@@ -128,7 +96,7 @@ ti_tool = TextInspectorTool(model, text_limit)
128
  browser = SimpleTextBrowser(**BROWSER_CONFIG)
129
 
130
  WEB_TOOLS = [
131
- SearchInformationTool(browser),
132
  VisitTool(browser),
133
  PageUpTool(browser),
134
  PageDownTool(browser),
@@ -138,87 +106,57 @@ WEB_TOOLS = [
138
  TextInspectorTool(model, text_limit),
139
  ]
140
 
141
- agent = CodeAgent(
142
- model=model,
143
- tools=[visualizer] + WEB_TOOLS,
144
- max_steps=5,
145
- verbosity_level=2,
146
- additional_authorized_imports=AUTHORIZED_IMPORTS,
147
- planning_interval=4,
148
- )
149
-
150
- document_inspection_tool = TextInspectorTool(model, 20000)
151
-
152
 
153
- # augmented_question = """You have one question to answer. It is paramount that you provide a correct answer.
154
- # Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
155
- # Run verification steps if that's needed, you must make sure you find the correct answer!
156
- # Here is the task:
157
- # """ + example["question"]
 
 
 
 
 
 
158
 
159
- # if example["file_name"]:
160
- # prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:"
161
- # prompt_use_files += get_single_file_description(
162
- # example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool
163
- # )
164
- # augmented_question += prompt_use_files
165
 
166
-
167
- # final_result = agent.run(augmented_question)
168
-
169
-
170
- def stream_to_gradio(
171
- agent,
172
- task: str,
173
- reset_agent_memory: bool = False,
174
- additional_args: Optional[dict] = None,
175
- ):
176
- """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
177
- for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
178
- for message in pull_messages_from_step(
179
- step_log,
180
- ):
181
- yield message
182
-
183
- final_answer = step_log # Last log is the run's final_answer
184
- final_answer = handle_agent_output_types(final_answer)
185
-
186
- if isinstance(final_answer, AgentText):
187
- yield gr.ChatMessage(
188
- role="assistant",
189
- content=f"**Final answer:**\n{final_answer.to_string()}\n",
190
- )
191
- elif isinstance(final_answer, AgentImage):
192
- yield gr.ChatMessage(
193
- role="assistant",
194
- content={"path": final_answer.to_string(), "mime_type": "image/png"},
195
- )
196
- elif isinstance(final_answer, AgentAudio):
197
- yield gr.ChatMessage(
198
- role="assistant",
199
- content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
200
- )
201
- else:
202
- yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
203
 
204
 
205
  class GradioUI:
206
  """A one-line interface to launch your agent in Gradio"""
207
 
208
- def __init__(self, agent, file_upload_folder: str | None = None):
209
- self.agent = agent
210
  self.file_upload_folder = file_upload_folder
211
  if self.file_upload_folder is not None:
212
  if not os.path.exists(file_upload_folder):
213
  os.mkdir(file_upload_folder)
214
 
215
- def interact_with_agent(self, prompt, messages):
216
- messages.append(gr.ChatMessage(role="user", content=prompt))
217
- yield messages
218
- for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
219
- messages.append(msg)
 
 
 
 
 
 
 
 
 
220
  yield messages
221
- yield messages
 
 
 
 
 
 
 
 
 
222
 
223
  def upload_file(
224
  self,
@@ -261,10 +199,14 @@ class GradioUI:
261
  sanitized_name = "".join(sanitized_name)
262
 
263
  # Save the uploaded file to the specified folder
264
- file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
 
 
265
  shutil.copy(file.name, file_path)
266
 
267
- return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
 
 
268
 
269
  def log_user_message(self, text_input, file_uploads_log):
270
  return (
@@ -274,47 +216,246 @@ class GradioUI:
274
  if len(file_uploads_log) > 0
275
  else ""
276
  ),
277
- "",
 
 
 
 
 
278
  )
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  def launch(self, **kwargs):
281
  with gr.Blocks(theme="ocean", fill_height=True) as demo:
282
- gr.Markdown("""# open Deep Research - free the AI agents!
283
-
284
- OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
285
-
286
- However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
287
-
288
- You can try a simplified version below. 👇""")
289
- stored_messages = gr.State([])
290
- file_uploads_log = gr.State([])
291
- chatbot = gr.Chatbot(
292
- label="Open-Deep-Research",
293
- type="messages",
294
- avatar_images=(
295
- None,
296
- "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
297
- ),
298
- # resizable=True, # new version
299
- resizeable=True,
300
- scale=1,
301
- )
302
- # If an upload folder is provided, enable the upload feature
303
- if self.file_upload_folder is not None:
304
- upload_file = gr.File(label="Upload a file")
305
- upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
306
- upload_file.change(
307
- self.upload_file,
308
- [upload_file, file_uploads_log],
309
- [upload_status, file_uploads_log],
310
- )
311
- text_input = gr.Textbox(lines=1, label="Your request")
312
- text_input.submit(
313
- self.log_user_message,
314
- [text_input, file_uploads_log],
315
- [stored_messages, text_input],
316
- ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
317
-
318
- demo.launch(debug=True, share=True, **kwargs)
319
-
320
- GradioUI(agent).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
+ import mimetypes
3
  import os
4
+ import re
5
+ import shutil
6
  import threading
7
+ from typing import Optional
 
 
 
8
 
9
+ import gradio as gr
 
10
  from dotenv import load_dotenv
11
  from huggingface_hub import login
12
+ from smolagents import (
13
+ CodeAgent,
14
+ HfApiModel,
15
+ Tool,
16
+ GoogleSearchTool
 
17
  )
18
+ from smolagents.agent_types import (
19
+ AgentAudio,
20
+ AgentImage,
21
+ AgentText,
22
+ handle_agent_output_types,
23
+ )
24
+ from smolagents.gradio_ui import stream_to_gradio
25
+
26
  from scripts.text_inspector_tool import TextInspectorTool
27
  from scripts.text_web_browser import (
28
  ArchiveSearchTool,
 
30
  FindNextTool,
31
  PageDownTool,
32
  PageUpTool,
 
33
  SimpleTextBrowser,
34
  VisitTool,
35
  )
36
  from scripts.visual_qa import visualizer
 
37
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ web_search = GoogleSearchTool(provider="serper")
40
 
41
+ print(web_search(query="Donald Trump news"))
42
+
43
+ # quit()
44
  AUTHORIZED_IMPORTS = [
45
  "requests",
46
  "zipfile",
 
47
  "pandas",
48
  "numpy",
49
  "sympy",
 
56
  "sklearn",
57
  "scipy",
58
  "pydub",
 
59
  "PIL",
60
  "chess",
61
  "PyPDF2",
 
70
 
71
  append_answer_lock = threading.Lock()
72
 
 
 
73
  custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
76
 
77
  BROWSER_CONFIG = {
 
86
 
87
  os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
88
 
89
+ model = HfApiModel(
 
 
 
90
  custom_role_conversions=custom_role_conversions,
 
 
91
  )
92
 
93
  text_limit = 20000
 
96
  browser = SimpleTextBrowser(**BROWSER_CONFIG)
97
 
98
  WEB_TOOLS = [
99
+ web_search,
100
  VisitTool(browser),
101
  PageUpTool(browser),
102
  PageDownTool(browser),
 
106
  TextInspectorTool(model, text_limit),
107
  ]
108
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # Agent creation in a factory function
111
+ def create_agent():
112
+ """Creates a fresh agent instance for each session"""
113
+ return CodeAgent(
114
+ model=model,
115
+ tools=[visualizer] + WEB_TOOLS,
116
+ max_steps=10,
117
+ verbosity_level=1,
118
+ additional_authorized_imports=AUTHORIZED_IMPORTS,
119
+ planning_interval=4,
120
+ )
121
 
 
 
 
 
 
 
122
 
123
+ document_inspection_tool = TextInspectorTool(model, 20000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  class GradioUI:
127
  """A one-line interface to launch your agent in Gradio"""
128
 
129
+ def __init__(self, file_upload_folder: str | None = None):
 
130
  self.file_upload_folder = file_upload_folder
131
  if self.file_upload_folder is not None:
132
  if not os.path.exists(file_upload_folder):
133
  os.mkdir(file_upload_folder)
134
 
135
+ def interact_with_agent(self, prompt, messages, session_state):
136
+ # Get or create session-specific agent
137
+ if "agent" not in session_state:
138
+ session_state["agent"] = create_agent()
139
+
140
+ # Adding monitoring
141
+ try:
142
+ # log the existence of agent memory
143
+ has_memory = hasattr(session_state["agent"], "memory")
144
+ print(f"Agent has memory: {has_memory}")
145
+ if has_memory:
146
+ print(f"Memory type: {type(session_state['agent'].memory)}")
147
+
148
+ messages.append(gr.ChatMessage(role="user", content=prompt))
149
  yield messages
150
+
151
+ for msg in stream_to_gradio(
152
+ session_state["agent"], task=prompt, reset_agent_memory=False
153
+ ):
154
+ messages.append(msg)
155
+ yield messages
156
+ yield messages
157
+ except Exception as e:
158
+ print(f"Error in interaction: {str(e)}")
159
+ raise
160
 
161
  def upload_file(
162
  self,
 
199
  sanitized_name = "".join(sanitized_name)
200
 
201
  # Save the uploaded file to the specified folder
202
+ file_path = os.path.join(
203
+ self.file_upload_folder, os.path.basename(sanitized_name)
204
+ )
205
  shutil.copy(file.name, file_path)
206
 
207
+ return gr.Textbox(
208
+ f"File uploaded: {file_path}", visible=True
209
+ ), file_uploads_log + [file_path]
210
 
211
  def log_user_message(self, text_input, file_uploads_log):
212
  return (
 
216
  if len(file_uploads_log) > 0
217
  else ""
218
  ),
219
+ gr.Textbox(
220
+ value="",
221
+ interactive=False,
222
+ placeholder="Please wait while Steps are getting populated",
223
+ ),
224
+ gr.Button(interactive=False),
225
  )
226
 
227
+ def detect_device(self, request: gr.Request):
228
+ # Check whether the user device is a mobile or a computer
229
+
230
+ if not request:
231
+ return "Unknown device"
232
+ # Method 1: Check sec-ch-ua-mobile header
233
+ is_mobile_header = request.headers.get("sec-ch-ua-mobile")
234
+ if is_mobile_header:
235
+ return "Mobile" if "?1" in is_mobile_header else "Desktop"
236
+
237
+ # Method 2: Check user-agent string
238
+ user_agent = request.headers.get("user-agent", "").lower()
239
+ mobile_keywords = ["android", "iphone", "ipad", "mobile", "phone"]
240
+
241
+ if any(keyword in user_agent for keyword in mobile_keywords):
242
+ return "Mobile"
243
+
244
+ # Method 3: Check platform
245
+ platform = request.headers.get("sec-ch-ua-platform", "").lower()
246
+ if platform:
247
+ if platform in ['"android"', '"ios"']:
248
+ return "Mobile"
249
+ elif platform in ['"windows"', '"macos"', '"linux"']:
250
+ return "Desktop"
251
+
252
+ # Default case if no clear indicators
253
+ return "Desktop"
254
+
255
  def launch(self, **kwargs):
256
  with gr.Blocks(theme="ocean", fill_height=True) as demo:
257
+ # Different layouts for mobile and computer devices
258
+ @gr.render()
259
+ def layout(request: gr.Request):
260
+ device = self.detect_device(request)
261
+ print(f"device - {device}")
262
+ # Render layout with sidebar
263
+ if device == "Desktop":
264
+ with gr.Blocks(
265
+ fill_height=True,
266
+ ):
267
+ file_uploads_log = gr.State([])
268
+ with gr.Sidebar():
269
+ gr.Markdown("""# open Deep Research - free the AI agents!
270
+
271
+ OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), an amazing assistant that can perform deep searches on the web to answer user questions.
272
+
273
+ However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
274
+
275
+ You can try a simplified version here that uses `Qwen-Coder-32B` instead of `o1`.<br><br>""")
276
+ with gr.Group():
277
+ gr.Markdown("**Your request**", container=True)
278
+ text_input = gr.Textbox(
279
+ lines=3,
280
+ label="Your request",
281
+ container=False,
282
+ placeholder="Enter your prompt here and press Shift+Enter or press the button",
283
+ )
284
+ launch_research_btn = gr.Button(
285
+ "Run", variant="primary"
286
+ )
287
+
288
+ # If an upload folder is provided, enable the upload feature
289
+ if self.file_upload_folder is not None:
290
+ upload_file = gr.File(label="Upload a file")
291
+ upload_status = gr.Textbox(
292
+ label="Upload Status",
293
+ interactive=False,
294
+ visible=False,
295
+ )
296
+ upload_file.change(
297
+ self.upload_file,
298
+ [upload_file, file_uploads_log],
299
+ [upload_status, file_uploads_log],
300
+ )
301
+
302
+ gr.HTML("<br><br><h4><center>Powered by:</center></h4>")
303
+ with gr.Row():
304
+ gr.HTML("""<div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
305
+ <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png" style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
306
+ <a target="_blank" href="https://github.com/huggingface/smolagents"><b>huggingface/smolagents</b></a>
307
+ </div>""")
308
+
309
+ # Add session state to store session-specific data
310
+ session_state = gr.State(
311
+ {}
312
+ ) # Initialize empty state for each session
313
+ stored_messages = gr.State([])
314
+ chatbot = gr.Chatbot(
315
+ label="open-Deep-Research",
316
+ type="messages",
317
+ avatar_images=(
318
+ None,
319
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
320
+ ),
321
+ resizeable=False,
322
+ scale=1,
323
+ elem_id="my-chatbot",
324
+ )
325
+
326
+ text_input.submit(
327
+ self.log_user_message,
328
+ [text_input, file_uploads_log],
329
+ [stored_messages, text_input, launch_research_btn],
330
+ ).then(
331
+ self.interact_with_agent,
332
+ # Include session_state in function calls
333
+ [stored_messages, chatbot, session_state],
334
+ [chatbot],
335
+ ).then(
336
+ lambda: (
337
+ gr.Textbox(
338
+ interactive=True,
339
+ placeholder="Enter your prompt here and press the button",
340
+ ),
341
+ gr.Button(interactive=True),
342
+ ),
343
+ None,
344
+ [text_input, launch_research_btn],
345
+ )
346
+ launch_research_btn.click(
347
+ self.log_user_message,
348
+ [text_input, file_uploads_log],
349
+ [stored_messages, text_input, launch_research_btn],
350
+ ).then(
351
+ self.interact_with_agent,
352
+ # Include session_state in function calls
353
+ [stored_messages, chatbot, session_state],
354
+ [chatbot],
355
+ ).then(
356
+ lambda: (
357
+ gr.Textbox(
358
+ interactive=True,
359
+ placeholder="Enter your prompt here and press the button",
360
+ ),
361
+ gr.Button(interactive=True),
362
+ ),
363
+ None,
364
+ [text_input, launch_research_btn],
365
+ )
366
+
367
+ # Render simple layout
368
+ else:
369
+ with gr.Blocks(
370
+ fill_height=True,
371
+ ):
372
+ gr.Markdown("""# open Deep Research - free the AI agents!
373
+ _Built with [smolagents](https://github.com/huggingface/smolagents)_
374
+
375
+ OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
376
+
377
+ However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
378
+
379
+ You can try a simplified version below (uses `Qwen-Coder-32B` instead of `o1`, so much less powerful than the original open-Deep-Research)👇""")
380
+ # Add session state to store session-specific data
381
+ session_state = gr.State(
382
+ {}
383
+ ) # Initialize empty state for each session
384
+ stored_messages = gr.State([])
385
+ file_uploads_log = gr.State([])
386
+ chatbot = gr.Chatbot(
387
+ label="open-Deep-Research",
388
+ type="messages",
389
+ avatar_images=(
390
+ None,
391
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
392
+ ),
393
+ resizeable=True,
394
+ scale=1,
395
+ )
396
+ # If an upload folder is provided, enable the upload feature
397
+ if self.file_upload_folder is not None:
398
+ upload_file = gr.File(label="Upload a file")
399
+ upload_status = gr.Textbox(
400
+ label="Upload Status", interactive=False, visible=False
401
+ )
402
+ upload_file.change(
403
+ self.upload_file,
404
+ [upload_file, file_uploads_log],
405
+ [upload_status, file_uploads_log],
406
+ )
407
+ text_input = gr.Textbox(
408
+ lines=1,
409
+ label="Your request",
410
+ placeholder="Enter your prompt here and press the button",
411
+ )
412
+ launch_research_btn = gr.Button(
413
+ "Run",
414
+ variant="primary",
415
+ )
416
+
417
+ text_input.submit(
418
+ self.log_user_message,
419
+ [text_input, file_uploads_log],
420
+ [stored_messages, text_input, launch_research_btn],
421
+ ).then(
422
+ self.interact_with_agent,
423
+ # Include session_state in function calls
424
+ [stored_messages, chatbot, session_state],
425
+ [chatbot],
426
+ ).then(
427
+ lambda: (
428
+ gr.Textbox(
429
+ interactive=True,
430
+ placeholder="Enter your prompt here and press the button",
431
+ ),
432
+ gr.Button(interactive=True),
433
+ ),
434
+ None,
435
+ [text_input, launch_research_btn],
436
+ )
437
+ launch_research_btn.click(
438
+ self.log_user_message,
439
+ [text_input, file_uploads_log],
440
+ [stored_messages, text_input, launch_research_btn],
441
+ ).then(
442
+ self.interact_with_agent,
443
+ # Include session_state in function calls
444
+ [stored_messages, chatbot, session_state],
445
+ [chatbot],
446
+ ).then(
447
+ lambda: (
448
+ gr.Textbox(
449
+ interactive=True,
450
+ placeholder="Enter your prompt here and press the button",
451
+ ),
452
+ gr.Button(interactive=True),
453
+ ),
454
+ None,
455
+ [text_input, launch_research_btn],
456
+ )
457
+
458
+ demo.launch(debug=True, **kwargs)
459
+
460
+
461
+ GradioUI().launch()