Wendong-Fan commited on
Commit
ac6ba97
·
2 Parent(s): 3808745 69b2e63

Merge branch 'main' into refactor_webdemo

Browse files
.container/Dockerfile CHANGED
@@ -1,107 +1,58 @@
1
- # 使用ARG定义可配置的构建参数 | Using ARG to define configurable build parameters
2
- ARG PYTHON_VERSION=3.10
3
- ARG PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
4
- ARG PLAYWRIGHT_DOWNLOAD_HOST=https://npmmirror.com/mirrors/playwright
5
-
6
- # 第一阶段:构建依赖 | Stage 1: Build dependencies
7
- FROM python:${PYTHON_VERSION}-slim AS builder
8
-
9
- # 设置工作目录 | Set working directory
10
- WORKDIR /build
11
-
12
- # 设置pip镜像源以加速下载 | Set pip mirror to accelerate downloads
13
- ARG PIP_INDEX_URL
14
- RUN pip config set global.index-url ${PIP_INDEX_URL}
15
-
16
- # 安装构建依赖 | Install build dependencies
17
- RUN apt-get update && apt-get install -y --no-install-recommends \
18
- build-essential \
19
- && apt-get clean \
20
- && rm -rf /var/lib/apt/lists/*
21
-
22
- # 复制并安装requirements.txt | Copy and install requirements.txt
23
- COPY requirements.txt .
24
- RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
25
-
26
- # 第二阶段:运行时环境 | Stage 2: Runtime environment
27
- FROM python:${PYTHON_VERSION}-slim
28
-
29
- # 添加构建信息标签 | Add build information labels
30
- ARG BUILD_DATE
31
- ARG VERSION
32
- LABEL org.opencontainers.image.created="${BUILD_DATE}" \
33
- org.opencontainers.image.version="${VERSION}" \
34
- org.opencontainers.image.title="OWL Project" \
35
- org.opencontainers.image.description="OWL Project Docker Image" \
36
- org.opencontainers.image.source="https://github.com/yourusername/owl"
37
-
38
- # 设置工作目录 | Set working directory
39
  WORKDIR /app
40
 
41
- # 设置pip镜像源以加速下载 | Set pip mirror to accelerate downloads
42
- ARG PIP_INDEX_URL
43
- RUN pip config set global.index-url ${PIP_INDEX_URL}
44
-
45
- # 从builder阶段复制已安装的Python包 | Copy installed Python packages from builder stage
46
- COPY --from=builder /install /usr/local
47
-
48
- # 优化apt安装,减少层数 | Optimize apt installation, reduce layers
49
  RUN apt-get update && apt-get install -y --no-install-recommends \
50
- curl \
51
- git \
52
- ffmpeg \
53
- libsm6 \
54
- libxext6 \
55
- # 添加xvfb和相关依赖 | Add xvfb and related dependencies
56
- xvfb \
57
- xauth \
58
- x11-utils \
59
  && apt-get clean \
60
  && rm -rf /var/lib/apt/lists/*
61
-
62
- # 安装 Playwright 依赖(使用国内镜像源) | Install Playwright dependencies (using Chinese mirror)
63
- ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
64
- ARG PLAYWRIGHT_DOWNLOAD_HOST
65
- ENV PLAYWRIGHT_DOWNLOAD_HOST=${PLAYWRIGHT_DOWNLOAD_HOST}
66
- RUN pip install --no-cache-dir playwright && \
67
- playwright install --with-deps chromium
68
-
69
- # 创建非root用户 | Create non-root user
70
- RUN groupadd -r owl && useradd -r -g owl -m owl
71
-
72
- # 复制项目文件 | Copy project files
73
  COPY owl/ ./owl/
74
  COPY licenses/ ./licenses/
75
  COPY assets/ ./assets/
76
  COPY README.md .
77
  COPY README_zh.md .
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
- # 创建启动脚本 | Create startup script
 
 
81
  RUN echo '#!/bin/bash\nxvfb-run --auto-servernum --server-args="-screen 0 1280x960x24" python "$@"' > /usr/local/bin/xvfb-python && \
82
  chmod +x /usr/local/bin/xvfb-python
83
 
84
- # 创建欢迎脚本 | Create welcome script
85
  RUN echo '#!/bin/bash\necho "欢迎使用OWL项目Docker环境!"\necho "Welcome to OWL Project Docker environment!"\necho ""\necho "可用的脚本 | Available scripts:"\nls -1 *.py | grep -v "__" | sed "s/^/- /"\necho ""\necho "运行示例 | Run examples:"\necho " xvfb-python run.py # 运行默认脚本 | Run default script"\necho " xvfb-python run_deepseek_example.py # 运行DeepSeek示例 | Run DeepSeek example"\necho ""\necho "或者使用自定义查询 | Or use custom query:"\necho " xvfb-python run.py \"你的问题 | Your question\""\necho ""' > /usr/local/bin/owl-welcome && \
86
  chmod +x /usr/local/bin/owl-welcome
87
 
88
- # 设置工作目录 | Set working directory
89
  WORKDIR /app/owl
90
 
91
- # 设置适当的权限 | Set appropriate permissions
92
- RUN chown -R owl:owl /app
93
- RUN mkdir -p /root/.cache && chown -R owl:owl /root/.cache
94
- RUN chmod 644 /app/owl/.env
95
-
96
-
97
- USER owl
98
- # 切换到非root用户 | Switch to non-root user
99
- # 注意:如果需要访问/dev/shm,可能仍需要root用户 | Note: If you need to access /dev/shm, you may still need root user
100
- # USER owl
101
-
102
- # 添加健康检查 | Add health check
103
  HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
104
  CMD python -c "import sys; sys.exit(0 if __import__('os').path.exists('/app/owl') else 1)"
105
 
106
- # 容器启动命令 | Container startup command
107
  CMD ["/bin/bash", "-c", "owl-welcome && /bin/bash"]
 
1
+ FROM python:3.10-slim
2
+
3
+ # 设置环境变量
4
+ ENV PYTHONDONTWRITEBYTECODE=1 \
5
+ PYTHONUNBUFFERED=1 \
6
+ PIP_NO_CACHE_DIR=0 \
7
+ PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
8
+ PLAYWRIGHT_DOWNLOAD_HOST=https://npmmirror.com/mirrors/playwright \
9
+ PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright \
10
+ DEBIAN_FRONTEND=noninteractive
11
+
12
+ # 设置工作目录
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  WORKDIR /app
14
 
15
+ # 安装系统依赖(合并为一个RUN命令减少层数)
 
 
 
 
 
 
 
16
  RUN apt-get update && apt-get install -y --no-install-recommends \
17
+ curl git ffmpeg libsm6 libxext6 xvfb xauth x11-utils \
18
+ gcc python3-dev \
 
 
 
 
 
 
 
19
  && apt-get clean \
20
  && rm -rf /var/lib/apt/lists/*
21
+ # 复制项目文件
 
 
 
 
 
 
 
 
 
 
 
22
  COPY owl/ ./owl/
23
  COPY licenses/ ./licenses/
24
  COPY assets/ ./assets/
25
  COPY README.md .
26
  COPY README_zh.md .
27
+ COPY pyproject.toml .
28
+
29
+ # 创建README.md文件以避免构建错误
30
+ RUN echo "# OWL Project\n\n这是OWL项目的Docker环境。" > README.md
31
+ # 安装uv工具
32
+ RUN pip install uv
33
+
34
+ # 创建虚拟环境并安装依赖
35
+ RUN uv venv .venv --python=3.10 && \
36
+ . .venv/bin/activate && \
37
+ uv pip install -e .
38
 
39
 
40
+
41
+
42
+ # 创建启动脚本
43
  RUN echo '#!/bin/bash\nxvfb-run --auto-servernum --server-args="-screen 0 1280x960x24" python "$@"' > /usr/local/bin/xvfb-python && \
44
  chmod +x /usr/local/bin/xvfb-python
45
 
46
+ # 创建欢迎脚本
47
  RUN echo '#!/bin/bash\necho "欢迎使用OWL项目Docker环境!"\necho "Welcome to OWL Project Docker environment!"\necho ""\necho "可用的脚本 | Available scripts:"\nls -1 *.py | grep -v "__" | sed "s/^/- /"\necho ""\necho "运行示例 | Run examples:"\necho " xvfb-python run.py # 运行默认脚本 | Run default script"\necho " xvfb-python run_deepseek_example.py # 运行DeepSeek示例 | Run DeepSeek example"\necho ""\necho "或者使用自定义查询 | Or use custom query:"\necho " xvfb-python run.py \"你的问题 | Your question\""\necho ""' > /usr/local/bin/owl-welcome && \
48
  chmod +x /usr/local/bin/owl-welcome
49
 
50
+ # 设置工作目录
51
  WORKDIR /app/owl
52
 
53
+ # 添加健康检查
 
 
 
 
 
 
 
 
 
 
 
54
  HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
55
  CMD python -c "import sys; sys.exit(0 if __import__('os').path.exists('/app/owl') else 1)"
56
 
57
+ # 容器启动命令
58
  CMD ["/bin/bash", "-c", "owl-welcome && /bin/bash"]
.container/docker-compose.yml CHANGED
@@ -3,47 +3,29 @@ services:
3
  build:
4
  context: ..
5
  dockerfile: .container/Dockerfile
6
- args:
7
- # 构建参数 | Build arguments
8
- BUILDKIT_INLINE_CACHE: 1
9
- # 使用BuildKit加速构建 | Use BuildKit to accelerate build
10
- cache_from:
11
- - python:3.10-slim
12
  volumes:
13
- # 挂载.env文件,方便配置API密钥 | Mount .env file for easy API key configuration
14
  - ../owl/.env:/app/owl/.env
15
- # 可选:挂载数据目录 | Optional: Mount data directory
16
  - ./data:/app/data
17
- # 挂载缓存目录,避免重复下载 | Mount cache directories to avoid repeated downloads
18
- - playwright-cache:/root/.cache/ms-playwright
19
- - pip-cache:/root/.pip/cache
20
  environment:
21
- # 可以在这里设置环境变量,覆盖.env文件中的设置 | Set environment variables here to override settings in .env file
22
  - OPENAI_API_KEY=${OPENAI_API_KEY}
23
- # 添加显示相关的环境变量 | Add display-related environment variables
24
  - DISPLAY=:99
25
- - PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
26
- # 设置Python不生成.pyc文件,减少磁盘IO | Set Python to not generate .pyc files, reduce disk IO
27
  - PYTHONDONTWRITEBYTECODE=1
28
- # 设置Python不缓冲输出,方便查看日志 | Set Python to not buffer output for easier log viewing
29
  - PYTHONUNBUFFERED=1
30
- # 设置终端颜色 | Set terminal color
31
  - TERM=xterm-256color
32
- # 启用pip缓存 | Enable pip cache
33
- - PIP_CACHE_DIR=/root/.pip/cache
34
  ports:
35
- # 如果项目有Web界面,可以映射端口 | If the project has a web interface, map ports
36
  - "8000:8000"
37
- # 使用交互模式运行容器 | Run container in interactive mode
38
  stdin_open: true
39
  tty: true
40
- # 添加共享内存大小,提高浏览器性能 | Add shared memory size to improve browser performance
41
  shm_size: 2gb
42
- # 设置资源限制 | Set resource limits
43
  deploy:
44
  resources:
45
  limits:
46
- cpus: '2'
47
  memory: 4G
48
 
49
  # 定义持久化卷,用于缓存 | Define persistent volumes for caching
 
3
  build:
4
  context: ..
5
  dockerfile: .container/Dockerfile
 
 
 
 
 
 
6
  volumes:
7
+ # 挂载.env文件,方便配置API密钥
8
  - ../owl/.env:/app/owl/.env
9
+ # 挂载数据目录
10
  - ./data:/app/data
11
+ # 挂载缓存目录,避免重复下载
12
+ - ~/.cache/pip:/root/.pip/cache
13
+ - ~/.cache/playwright:/root/.cache/ms-playwright
14
  environment:
 
15
  - OPENAI_API_KEY=${OPENAI_API_KEY}
 
16
  - DISPLAY=:99
 
 
17
  - PYTHONDONTWRITEBYTECODE=1
 
18
  - PYTHONUNBUFFERED=1
 
19
  - TERM=xterm-256color
 
 
20
  ports:
 
21
  - "8000:8000"
 
22
  stdin_open: true
23
  tty: true
 
24
  shm_size: 2gb
25
+ # 简化资源限制
26
  deploy:
27
  resources:
28
  limits:
 
29
  memory: 4G
30
 
31
  # 定义持久化卷,用于缓存 | Define persistent volumes for caching
.container/run_in_docker.bat CHANGED
@@ -165,7 +165,10 @@ REM 在容器中运行指定的脚本,传递查询参数
165
  REM Run the specified script in container, passing query parameter
166
  echo 在Docker容器中使用!PYTHON_CMD!运行脚本...
167
  echo Running script in Docker container using !PYTHON_CMD!...
168
- %COMPOSE_CMD% exec -T !SERVICE_NAME! !PYTHON_CMD! !SCRIPT_NAME! "!QUERY!"
 
 
 
169
 
170
  if errorlevel 0 (
171
  echo 查询完成!
 
165
  REM Run the specified script in container, passing query parameter
166
  echo 在Docker容器中使用!PYTHON_CMD!运行脚本...
167
  echo Running script in Docker container using !PYTHON_CMD!...
168
+
169
+ REM 修改执行命令,按照README中的方式执行
170
+ REM Modify execution command according to README
171
+ %COMPOSE_CMD% exec -T !SERVICE_NAME! bash -c "cd .. && source .venv/bin/activate && cd owl && !PYTHON_CMD! !SCRIPT_NAME! \"!QUERY!\""
172
 
173
  if errorlevel 0 (
174
  echo 查询完成!
.container/run_in_docker.sh CHANGED
@@ -36,13 +36,13 @@ else
36
  fi
37
 
38
  # 检查脚本是否存在 | Check if the script exists
39
- if [ ! -f "owl/$SCRIPT_NAME" ]; then
40
- echo "错误 | Error: 脚本 | Script 'owl/$SCRIPT_NAME' 不存在 | does not exist"
41
  echo "可用的脚本有 | Available scripts:"
42
  if [[ "$OS_TYPE" == MINGW* ]] || [[ "$OS_TYPE" == CYGWIN* ]] || [[ "$OS_TYPE" == MSYS* ]]; then
43
- find owl -name "*.py" | grep -v "__" | sed 's/\\/\//g'
44
  else
45
- ls -1 owl/*.py | grep -v "__"
46
  fi
47
  exit 1
48
  fi
@@ -51,8 +51,8 @@ echo "使用脚本 | Using script: $SCRIPT_NAME"
51
  echo "查询内容 | Query content: $QUERY"
52
 
53
  # 从docker-compose.yml获取服务名称(如果文件存在) | Get service name from docker-compose.yml (if file exists)
54
- if [ -f ".container/docker-compose.yml" ]; then
55
- DETECTED_SERVICE=$(grep -E "^ [a-zA-Z0-9_-]*:" .container/docker-compose.yml | head -1 | sed 's/^ \(.*\):.*/\1/')
56
  if [ ! -z "$DETECTED_SERVICE" ]; then
57
  SERVICE_NAME="$DETECTED_SERVICE"
58
  echo "从docker-compose.yml检测到服务名称 | Detected service name from docker-compose.yml: $SERVICE_NAME"
@@ -119,11 +119,11 @@ echo "在Docker容器中使用 $PYTHON_CMD 运行脚本... | Running script in D
119
  # 根据操作系统类型执行不同的命令 | Execute different commands based on operating system type
120
  if [[ "$OS_TYPE" == MINGW* ]] || [[ "$OS_TYPE" == CYGWIN* ]] || [[ "$OS_TYPE" == MSYS* ]]; then
121
  # Windows可能需要特殊处理引号 | Windows may need special handling for quotes
122
- winpty $COMPOSE_CMD exec -T $SERVICE_NAME $PYTHON_CMD $SCRIPT_NAME "$QUERY"
123
  RESULT=$?
124
  else
125
  # macOS 或 Linux | macOS or Linux
126
- $COMPOSE_CMD exec -T $SERVICE_NAME $PYTHON_CMD $SCRIPT_NAME "$QUERY"
127
  RESULT=$?
128
  fi
129
 
 
36
  fi
37
 
38
  # 检查脚本是否存在 | Check if the script exists
39
+ if [ ! -f "../owl/$SCRIPT_NAME" ]; then
40
+ echo "错误 | Error: 脚本 | Script '../owl/$SCRIPT_NAME' 不存在 | does not exist"
41
  echo "可用的脚本有 | Available scripts:"
42
  if [[ "$OS_TYPE" == MINGW* ]] || [[ "$OS_TYPE" == CYGWIN* ]] || [[ "$OS_TYPE" == MSYS* ]]; then
43
+ find ../owl -name "*.py" | grep -v "__" | sed 's/\\/\//g'
44
  else
45
+ ls -1 ../owl/*.py | grep -v "__"
46
  fi
47
  exit 1
48
  fi
 
51
  echo "查询内容 | Query content: $QUERY"
52
 
53
  # 从docker-compose.yml获取服务名称(如果文件存在) | Get service name from docker-compose.yml (if file exists)
54
+ if [ -f "docker-compose.yml" ]; then
55
+ DETECTED_SERVICE=$(grep -E "^ [a-zA-Z0-9_-]*:" docker-compose.yml | head -1 | sed 's/^ \(.*\):.*/\1/')
56
  if [ ! -z "$DETECTED_SERVICE" ]; then
57
  SERVICE_NAME="$DETECTED_SERVICE"
58
  echo "从docker-compose.yml检测到服务名称 | Detected service name from docker-compose.yml: $SERVICE_NAME"
 
119
  # 根据操作系统类型执行不同的命令 | Execute different commands based on operating system type
120
  if [[ "$OS_TYPE" == MINGW* ]] || [[ "$OS_TYPE" == CYGWIN* ]] || [[ "$OS_TYPE" == MSYS* ]]; then
121
  # Windows可能需要特殊处理引号 | Windows may need special handling for quotes
122
+ winpty $COMPOSE_CMD exec -T $SERVICE_NAME bash -c "cd .. && source .venv/bin/activate && cd owl && $PYTHON_CMD $SCRIPT_NAME \"$QUERY\""
123
  RESULT=$?
124
  else
125
  # macOS 或 Linux | macOS or Linux
126
+ $COMPOSE_CMD exec -T $SERVICE_NAME bash -c "cd .. && source .venv/bin/activate && cd owl && $PYTHON_CMD $SCRIPT_NAME \"$QUERY\""
127
  RESULT=$?
128
  fi
129
 
README.md CHANGED
@@ -87,6 +87,7 @@ Our vision is to revolutionize how AI agents collaborate to solve real-world tas
87
 
88
  # 🔥 News
89
 
 
90
  <div align="center" style="background-color: #fffacd; padding: 15px; border-radius: 10px; border: 2px solid #ffd700; margin: 20px 0;">
91
  <h3 style="color: #d81b60; margin: 0; font-size: 1.3em;">
92
  🌟🌟🌟 <b>COMMUNITY CALL FOR USE CASES!</b> 🌟🌟🌟
@@ -109,6 +110,7 @@ Our vision is to revolutionize how AI agents collaborate to solve real-world tas
109
  - **[2025.03.07]**: We open-sourced the codebase of the 🦉 OWL project.
110
  - **[2025.03.03]**: OWL achieved the #1 position among open-source frameworks on the GAIA benchmark with a score of 58.18.
111
 
 
112
  # 🎬 Demo Video
113
 
114
  https://github.com/user-attachments/assets/2a2a825d-39ea-45c5-9ba1-f9d58efbc372
@@ -122,7 +124,9 @@ https://private-user-images.githubusercontent.com/55657767/420212194-e813fc05-13
122
  - **Browser Automation**: Utilize the Playwright framework for simulating browser interactions, including scrolling, clicking, input handling, downloading, navigation, and more.
123
  - **Document Parsing**: Extract content from Word, Excel, PDF, and PowerPoint files, converting them into text or Markdown format.
124
  - **Code Execution**: Write and execute Python code using interpreter.
125
- - **Built-in Toolkits**: Access to a comprehensive set of built-in toolkits including ArxivToolkit, AudioAnalysisToolkit, CodeExecutionToolkit, DalleToolkit, DataCommonsToolkit, ExcelToolkit, GitHubToolkit, GoogleMapsToolkit, GoogleScholarToolkit, ImageAnalysisToolkit, MathToolkit, NetworkXToolkit, NotionToolkit, OpenAPIToolkit, RedditToolkit, SearchToolkit, SemanticScholarToolkit, SymPyToolkit, VideoAnalysisToolkit, WeatherToolkit, WebToolkit, and many more for specialized tasks.
 
 
126
 
127
  # 🛠️ Installation
128
 
@@ -177,7 +181,7 @@ source .venv/bin/activate
177
  .venv\Scripts\activate
178
 
179
  # Install from requirements.txt
180
- pip install -r requirements.txt
181
  ```
182
 
183
  ## Option 3: Using conda
@@ -199,7 +203,7 @@ conda activate owl
199
  pip install -e .
200
 
201
  # Option 2: Install from requirements.txt
202
- pip install -r requirements.txt
203
 
204
  # Exit the conda environment when done
205
  conda deactivate
@@ -259,9 +263,19 @@ cp owl/.env_template owl/.env
259
 
260
  # Option 1: Using docker-compose directly
261
  cd .container
 
262
  docker-compose up -d
 
263
  # Run OWL inside the container
264
- docker-compose exec owl bash -c "xvfb-python run.py"
 
 
 
 
 
 
 
 
265
 
266
  # Option 2: Build and run using the provided scripts
267
  cd .container
@@ -275,6 +289,23 @@ For more detailed Docker usage instructions, including cross-platform support, o
275
 
276
  # 🚀 Quick Start
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  After installation and setting up your environment variables, you can start using OWL right away:
279
 
280
  ```bash
@@ -307,6 +338,9 @@ python owl/examples/run_deepseek_zh.py
307
  # Run with other OpenAI-compatible models
308
  python owl/examples/run_openai_compatiable_model.py
309
 
 
 
 
310
  # Run with Ollama
311
  python owl/examples/run_ollama.py
312
  ```
@@ -355,6 +389,14 @@ Here are some tasks you can try with OWL:
355
 
356
  # 🧰 Toolkits and Capabilities
357
 
 
 
 
 
 
 
 
 
358
  > **Important**: Effective use of toolkits requires models with strong tool calling capabilities. For multimodal toolkits (Web, Image, Video), models must also have multimodal understanding abilities.
359
 
360
  OWL supports various toolkits that can be customized by modifying the `tools` list in your script:
@@ -362,7 +404,7 @@ OWL supports various toolkits that can be customized by modifying the `tools` li
362
  ```python
363
  # Configure toolkits
364
  tools = [
365
- *WebToolkit(headless=False).get_tools(), # Browser automation
366
  *VideoAnalysisToolkit(model=models["video"]).get_tools(),
367
  *AudioAnalysisToolkit().get_tools(), # Requires OpenAI Key
368
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
@@ -381,7 +423,7 @@ tools = [
381
  Key toolkits include:
382
 
383
  ### Multimodal Toolkits (Require multimodal model capabilities)
384
- - **WebToolkit**: Browser automation for web interaction and navigation
385
  - **VideoAnalysisToolkit**: Video processing and content analysis
386
  - **ImageAnalysisToolkit**: Image analysis and interpretation
387
 
@@ -399,11 +441,11 @@ To customize available tools:
399
 
400
  ```python
401
  # 1. Import toolkits
402
- from camel.toolkits import WebToolkit, SearchToolkit, CodeExecutionToolkit
403
 
404
  # 2. Configure tools list
405
  tools = [
406
- *WebToolkit(headless=True).get_tools(),
407
  SearchToolkit().search_wiki,
408
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
409
  ]
@@ -490,10 +532,11 @@ We welcome contributions from the community! Here's how you can help:
490
  3. Submit pull requests with your improvements
491
 
492
  **Current Issues Open for Contribution:**
 
493
  - [#1770](https://github.com/camel-ai/camel/issues/1770)
494
  - [#1712](https://github.com/camel-ai/camel/issues/1712)
495
  - [#1537](https://github.com/camel-ai/camel/issues/1537)
496
- - [#1827](https://github.com/camel-ai/camel/issues/1827)
497
 
498
  To take on an issue, simply leave a comment stating your interest.
499
 
@@ -501,8 +544,8 @@ To take on an issue, simply leave a comment stating your interest.
501
  Join us ([*Discord*](https://discord.camel-ai.org/) or [*WeChat*](https://ghli.org/camel/wechat.png)) in pushing the boundaries of finding the scaling laws of agents.
502
 
503
  Join us for further discussions!
504
- ![](./assets/community.jpg)
505
- <!-- ![](./assets/meetup.jpg) -->
506
 
507
  # ❓ FAQ
508
 
 
87
 
88
  # 🔥 News
89
 
90
+
91
  <div align="center" style="background-color: #fffacd; padding: 15px; border-radius: 10px; border: 2px solid #ffd700; margin: 20px 0;">
92
  <h3 style="color: #d81b60; margin: 0; font-size: 1.3em;">
93
  🌟🌟🌟 <b>COMMUNITY CALL FOR USE CASES!</b> 🌟🌟🌟
 
110
  - **[2025.03.07]**: We open-sourced the codebase of the 🦉 OWL project.
111
  - **[2025.03.03]**: OWL achieved the #1 position among open-source frameworks on the GAIA benchmark with a score of 58.18.
112
 
113
+
114
  # 🎬 Demo Video
115
 
116
  https://github.com/user-attachments/assets/2a2a825d-39ea-45c5-9ba1-f9d58efbc372
 
124
  - **Browser Automation**: Utilize the Playwright framework for simulating browser interactions, including scrolling, clicking, input handling, downloading, navigation, and more.
125
  - **Document Parsing**: Extract content from Word, Excel, PDF, and PowerPoint files, converting them into text or Markdown format.
126
  - **Code Execution**: Write and execute Python code using interpreter.
127
+ - **Built-in Toolkits**: Access to a comprehensive set of built-in toolkits including:
128
+ - **Model Context Protocol (MCP)**: A universal protocol layer that standardizes AI model interactions with various tools and data sources
129
+ - **Core Toolkits**: ArxivToolkit, AudioAnalysisToolkit, CodeExecutionToolkit, DalleToolkit, DataCommonsToolkit, ExcelToolkit, GitHubToolkit, GoogleMapsToolkit, GoogleScholarToolkit, ImageAnalysisToolkit, MathToolkit, NetworkXToolkit, NotionToolkit, OpenAPIToolkit, RedditToolkit, SearchToolkit, SemanticScholarToolkit, SymPyToolkit, VideoAnalysisToolkit, WeatherToolkit, BrowserToolkit, and many more for specialized tasks
130
 
131
  # 🛠️ Installation
132
 
 
181
  .venv\Scripts\activate
182
 
183
  # Install from requirements.txt
184
+ pip install -r requirements.txt --use-pep517
185
  ```
186
 
187
  ## Option 3: Using conda
 
203
  pip install -e .
204
 
205
  # Option 2: Install from requirements.txt
206
+ pip install -r requirements.txt --use-pep517
207
 
208
  # Exit the conda environment when done
209
  conda deactivate
 
263
 
264
  # Option 1: Using docker-compose directly
265
  cd .container
266
+
267
  docker-compose up -d
268
+
269
  # Run OWL inside the container
270
+ docker-compose exec owl bash
271
+
272
+ # activate the virtual environment
273
+ cd .. && source .venv/bin/activate && cd owl
274
+
275
+ playwright install-deps
276
+
277
+ #run example demo script
278
+ xvfb-python run.py
279
 
280
  # Option 2: Build and run using the provided scripts
281
  cd .container
 
289
 
290
  # 🚀 Quick Start
291
 
292
+ ## Try MCP (Model Context Protocol) Integration
293
+
294
+ Experience the power of MCP by running our example that demonstrates multi-agent information retrieval and processing:
295
+
296
+ ```bash
297
+ # Set up MCP servers (one-time setup)
298
+ npx -y @smithery/cli install @wonderwhy-er/desktop-commander --client claude
299
+ npx @wonderwhy-er/desktop-commander setup
300
+
301
+ # Run the MCP example
302
+ python owl/run_mcp.py
303
+ ```
304
+
305
+ This example showcases how OWL agents can seamlessly interact with file systems, web automation, and information retrieval through the MCP protocol. Check out `owl/run_mcp.py` for the full implementation.
306
+
307
+ ## Basic Usage
308
+
309
  After installation and setting up your environment variables, you can start using OWL right away:
310
 
311
  ```bash
 
338
  # Run with other OpenAI-compatible models
339
  python owl/examples/run_openai_compatiable_model.py
340
 
341
+ # Run with Azure OpenAI
342
+ python owl/run_azure_openai.py
343
+
344
  # Run with Ollama
345
  python owl/examples/run_ollama.py
346
  ```
 
389
 
390
  # 🧰 Toolkits and Capabilities
391
 
392
+ ## Model Context Protocol (MCP)
393
+
394
+ OWL's MCP integration provides a standardized way for AI models to interact with various tools and data sources:
395
+
396
+ Try our comprehensive MCP example in `owl/run_mcp.py` to see these capabilities in action!
397
+
398
+ ## Available Toolkits
399
+
400
  > **Important**: Effective use of toolkits requires models with strong tool calling capabilities. For multimodal toolkits (Web, Image, Video), models must also have multimodal understanding abilities.
401
 
402
  OWL supports various toolkits that can be customized by modifying the `tools` list in your script:
 
404
  ```python
405
  # Configure toolkits
406
  tools = [
407
+ *BrowserToolkit(headless=False).get_tools(), # Browser automation
408
  *VideoAnalysisToolkit(model=models["video"]).get_tools(),
409
  *AudioAnalysisToolkit().get_tools(), # Requires OpenAI Key
410
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
 
423
  Key toolkits include:
424
 
425
  ### Multimodal Toolkits (Require multimodal model capabilities)
426
+ - **BrowserToolkit**: Browser automation for web interaction and navigation
427
  - **VideoAnalysisToolkit**: Video processing and content analysis
428
  - **ImageAnalysisToolkit**: Image analysis and interpretation
429
 
 
441
 
442
  ```python
443
  # 1. Import toolkits
444
+ from camel.toolkits import BrowserToolkit, SearchToolkit, CodeExecutionToolkit
445
 
446
  # 2. Configure tools list
447
  tools = [
448
+ *BrowserToolkit(headless=True).get_tools(),
449
  SearchToolkit().search_wiki,
450
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
451
  ]
 
532
  3. Submit pull requests with your improvements
533
 
534
  **Current Issues Open for Contribution:**
535
+ - [#1857](https://github.com/camel-ai/camel/issues/1857)
536
  - [#1770](https://github.com/camel-ai/camel/issues/1770)
537
  - [#1712](https://github.com/camel-ai/camel/issues/1712)
538
  - [#1537](https://github.com/camel-ai/camel/issues/1537)
539
+
540
 
541
  To take on an issue, simply leave a comment stating your interest.
542
 
 
544
  Join us ([*Discord*](https://discord.camel-ai.org/) or [*WeChat*](https://ghli.org/camel/wechat.png)) in pushing the boundaries of finding the scaling laws of agents.
545
 
546
  Join us for further discussions!
547
+ <!-- ![](./assets/community.png) -->
548
+ ![](./assets/community_8.jpg)
549
 
550
  # ❓ FAQ
551
 
README_zh.md CHANGED
@@ -105,7 +105,7 @@
105
  </div>
106
 
107
  - **[2025.03.12]**: 在SearchToolkit中添加了Bocha搜索功能,集成了火山引擎模型平台,并更新了Azure和OpenAI Compatible模型的结构化输出和工具调用能力。
108
- - **[2025.03.11]**: 我们添加了 MCPToolkit、FileWriteToolkit 和 TerminalToolkit,增强 OWL Agent的工具调用、文件写入能力和终端命令执行功能。
109
  - **[2025.03.09]**: 我们添加了基于网页的用户界面,使系统交互变得更加简便。
110
  - **[2025.03.07]**: 我们开源了 🦉 OWL 项目的代码库。
111
  - **[2025.03.03]**: OWL 在 GAIA 基准测试中取得 58.18 平均分,在开源框架中排名第一!
@@ -123,7 +123,7 @@ https://private-user-images.githubusercontent.com/55657767/420212194-e813fc05-13
123
  - **浏览器操作**:借助Playwright框架开发浏览器模拟交互,支持页面滚动、点击、输入、下载、历史回退等功能
124
  - **文件解析**:word、excel、PDF、PowerPoint信息提取,内容转文本/Markdown
125
  - **代码执行**:编写python代码,并使用解释器运行
126
- - **丰富工具包**:提供丰富的工具包,包括ArxivToolkit(学术论文检索)、AudioAnalysisToolkit(音频分析)、CodeExecutionToolkit(代码执行)、DalleToolkit(图像生成)、DataCommonsToolkit(数据共享)、ExcelToolkit(Excel处理)、GitHubToolkit(GitHub交互)、GoogleMapsToolkit(地图服务)、GoogleScholarToolkit(学术搜索)、ImageAnalysisToolkit(图像分析)、MathToolkit(数学计算)、NetworkXToolkit(图形分析)、NotionToolkit(Notion交互)、OpenAPIToolkit(API操作)、RedditToolkit(Reddit交互)、SearchToolkit(搜索服务)、SemanticScholarToolkit(语义学术搜索)、SymPyToolkit(符号计算)、VideoAnalysisToolkit(视频分析)、WeatherToolkit(天气查询)、WebToolkit(网页交互)等多种专业工具,满足各类特定任务需求。
127
 
128
  # 🛠️ 安装
129
 
@@ -176,7 +176,7 @@ source .venv/bin/activate
176
  .venv\Scripts\activate
177
 
178
  # 从 requirements.txt 安装
179
- pip install -r requirements.txt
180
  ```
181
 
182
  ## 选项3:使用 conda
@@ -198,7 +198,7 @@ conda activate owl
198
  pip install -e .
199
 
200
  # 选项2:从 requirements.txt 安装
201
- pip install -r requirements.txt
202
 
203
  # 完成后退出 conda 环境
204
  conda deactivate
@@ -257,9 +257,19 @@ cp owl/.env_template owl/.env
257
 
258
  # 选项1:直接使用docker-compose
259
  cd .container
 
260
  docker-compose up -d
 
261
  # 在容器中运行OWL
262
- docker-compose exec owl bash -c "xvfb-python run.py"
 
 
 
 
 
 
 
 
263
 
264
  # 选项2:使用提供的脚本构建和运行
265
  cd .container
@@ -272,6 +282,23 @@ chmod +x build_docker.sh
272
  更多详细的Docker使用说明,包括跨平台支持、优化配置和故障排除,请参阅 [DOCKER_README.md](.container/DOCKER_README.md)
273
 
274
  # 🚀 快速开始
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  运行以下示例:
277
 
@@ -311,6 +338,9 @@ python owl/examples/run_deepseek_zh.py
311
  # 使用其他 OpenAI 兼容模型运行
312
  python owl/examples/run_openai_compatiable_model.py
313
 
 
 
 
314
  # 使用 Ollama 运行
315
  python owl/examples/run_ollama.py
316
  ```
@@ -349,6 +379,14 @@ OWL 将自动调用与文档相关的工具来处理文件并提取答案。
349
 
350
  # 🧰 工具包与功能
351
 
 
 
 
 
 
 
 
 
352
  > **重要提示**:有效使用工具包需要具备强大工具调用能力的模型。对于多模态工具包(Web、图像、视频),模型还必须具备多模态理解能力。
353
 
354
  OWL支持多种工具包,可通过修改脚本中的`tools`列表进行自定义:
@@ -356,7 +394,7 @@ OWL支持多种工具包,可通过修改脚本中的`tools`列表进行自定
356
  ```python
357
  # 配置工具包
358
  tools = [
359
- *WebToolkit(headless=False).get_tools(), # 浏览器自动化
360
  *VideoAnalysisToolkit(model=models["video"]).get_tools(),
361
  *AudioAnalysisToolkit().get_tools(), # 需要OpenAI API密钥
362
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
@@ -375,7 +413,7 @@ tools = [
375
  关键工具包包括:
376
 
377
  ### 多模态工具包(需要模型具备多模态能力)
378
- - **WebToolkit**:浏览器自动化,用于网页交互和导航
379
  - **VideoAnalysisToolkit**:视频处理和内容分析
380
  - **ImageAnalysisToolkit**:图像分析和解释
381
 
@@ -393,11 +431,11 @@ tools = [
393
 
394
  ```python
395
  # 1. 导入工具包
396
- from camel.toolkits import WebToolkit, SearchToolkit, CodeExecutionToolkit
397
 
398
  # 2. 配置工具列表
399
  tools = [
400
- *WebToolkit(headless=True).get_tools(),
401
  SearchToolkit().search_wiki,
402
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
403
  ]
@@ -481,10 +519,10 @@ python run_gaia_roleplaying.py
481
  3. 提交包含您改进的拉取请求
482
 
483
  **当前开放贡献的问题:**
 
484
  - [#1770](https://github.com/camel-ai/camel/issues/1770)
485
  - [#1712](https://github.com/camel-ai/camel/issues/1712)
486
  - [#1537](https://github.com/camel-ai/camel/issues/1537)
487
- - [#1827](https://github.com/camel-ai/camel/issues/1827)
488
 
489
  要认领一个问题,只需在该问题下留言表明您的兴趣即可。
490
 
@@ -492,7 +530,8 @@ python run_gaia_roleplaying.py
492
  加入我们的 ([*Discord*](https://discord.camel-ai.org/) 或 [*微信*](https://ghli.org/camel/wechat.png)) 社区,一起探索智能体扩展规律的边界。
493
 
494
  加入我们,参与更多讨论!
495
- ![](./assets/community.jpg)
 
496
  <!-- ![](./assets/meetup.jpg) -->
497
 
498
  # ❓ 常见问题
 
105
  </div>
106
 
107
  - **[2025.03.12]**: 在SearchToolkit中添加了Bocha搜索功能,集成了火山引擎模型平台,并更新了Azure和OpenAI Compatible模型的结构化输出和工具调用能力。
108
+ - **[2025.03.11]**: 我们添加了 MCPToolkit、FileWriteToolkit 和 TerminalToolkit,增强了 OWL Agent 的 MCP(模型上下文协议)集成、文件写入能力和终端命令执行功能。MCP 作为一个通用协议层,标准化了 AI 模型与各种数据源和工具的交互方式。
109
  - **[2025.03.09]**: 我们添加了基于网页的用户界面,使系统交互变得更加简便。
110
  - **[2025.03.07]**: 我们开源了 🦉 OWL 项目的代码库。
111
  - **[2025.03.03]**: OWL 在 GAIA 基准测试中取得 58.18 平均分,在开源框架中排名第一!
 
123
  - **浏览器操作**:借助Playwright框架开发浏览器模拟交互,支持页面滚动、点击、输入、下载、历史回退等功能
124
  - **文件解析**:word、excel、PDF、PowerPoint信息提取,内容转文本/Markdown
125
  - **代码执行**:编写python代码,并使用解释器运行
126
+ - **丰富工具包**:提供丰富的工具包,包括ArxivToolkit(学术论文检索)、AudioAnalysisToolkit(音频分析)、CodeExecutionToolkit(代码执行)、DalleToolkit(图像生成)、DataCommonsToolkit(数据共享)、ExcelToolkit(Excel处理)、GitHubToolkit(GitHub交互)、GoogleMapsToolkit(地图服务)、GoogleScholarToolkit(学术搜索)、ImageAnalysisToolkit(图像分析)、MathToolkit(数学计算)、NetworkXToolkit(图形分析)、NotionToolkit(Notion交互)、OpenAPIToolkit(API操作)、RedditToolkit(Reddit交互)、SearchToolkit(搜索服务)、SemanticScholarToolkit(语义学术搜索)、SymPyToolkit(符号计算)、VideoAnalysisToolkit(视频分析)、WeatherToolkit(天气查询)、BrowserToolkit(网页交互)等多种专业工具,满足各类特定任务需求。
127
 
128
  # 🛠️ 安装
129
 
 
176
  .venv\Scripts\activate
177
 
178
  # 从 requirements.txt 安装
179
+ pip install -r requirements.txt --use-pep517
180
  ```
181
 
182
  ## 选项3:使用 conda
 
198
  pip install -e .
199
 
200
  # 选项2:从 requirements.txt 安装
201
+ pip install -r requirements.txt --use-pep517
202
 
203
  # 完成后退出 conda 环境
204
  conda deactivate
 
257
 
258
  # 选项1:直接使用docker-compose
259
  cd .container
260
+
261
  docker-compose up -d
262
+
263
  # 在容器中运行OWL
264
+ docker-compose exec owl bash
265
+
266
+ # 激活虚拟环境
267
+ cd .. && source .venv/bin/activate && cd owl
268
+
269
+ playwright install-deps
270
+
271
+ #运行例子演示脚本
272
+ xvfb-python run.py
273
 
274
  # 选项2:使用提供的脚本构建和运行
275
  cd .container
 
282
  更多详细的Docker使用说明,包括跨平台支持、优化配置和故障排除,请参阅 [DOCKER_README.md](.container/DOCKER_README.md)
283
 
284
  # 🚀 快速开始
285
+
286
+ ## 尝试 MCP(模型上下文协议)集成
287
+
288
+ 体验 MCP 的强大功能,运行我们的示例来展示多智能体信息检索和处理:
289
+
290
+ ```bash
291
+ # 设置 MCP 服务器(仅需一次性设置)
292
+ npx -y @smithery/cli install @wonderwhy-er/desktop-commander --client claude
293
+ npx @wonderwhy-er/desktop-commander setup
294
+
295
+ # 运行 MCP 示例
296
+ python owl/run_mcp.py
297
+ ```
298
+
299
+ 这个示例展示了 OWL 智能体如何通过 MCP 协议无缝地与文件系统、网页自动化和信息检索进行交互。查看 `owl/run_mcp.py` 了解完整实现。
300
+
301
+ ## 基本用法
302
 
303
  运行以下示例:
304
 
 
338
  # 使用其他 OpenAI 兼容模型运行
339
  python owl/examples/run_openai_compatiable_model.py
340
 
341
+ # 使用 Azure OpenAI模型运行
342
+ python owl/run_azure_openai.py
343
+
344
  # 使用 Ollama 运行
345
  python owl/examples/run_ollama.py
346
  ```
 
379
 
380
  # 🧰 工具包与功能
381
 
382
+ ## 模型上下文协议(MCP)
383
+
384
+ OWL 的 MCP 集成为 AI 模型与各种工具和数据源的交互提供了标准化的方式。
385
+
386
+ 查看我们的综合示例 `owl/run_mcp.py` 来体验这些功能!
387
+
388
+ ## 可用工具包
389
+
390
  > **重要提示**:有效使用工具包需要具备强大工具调用能力的模型。对于多模态工具包(Web、图像、视频),模型还必须具备多模态理解能力。
391
 
392
  OWL支持多种工具包,可通过修改脚本中的`tools`列表进行自定义:
 
394
  ```python
395
  # 配置工具包
396
  tools = [
397
+ *BrowserToolkit(headless=False).get_tools(), # 浏览器自动化
398
  *VideoAnalysisToolkit(model=models["video"]).get_tools(),
399
  *AudioAnalysisToolkit().get_tools(), # 需要OpenAI API密钥
400
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
 
413
  关键工具包包括:
414
 
415
  ### 多模态工具包(需要模型具备多模态能力)
416
+ - **BrowserToolkit**:浏览器自动化,用于网页交互和导航
417
  - **VideoAnalysisToolkit**:视频处理和内容分析
418
  - **ImageAnalysisToolkit**:图像分析和解释
419
 
 
431
 
432
  ```python
433
  # 1. 导入工具包
434
+ from camel.toolkits import BrowserToolkit, SearchToolkit, CodeExecutionToolkit
435
 
436
  # 2. 配置工具列表
437
  tools = [
438
+ *BrowserToolkit(headless=True).get_tools(),
439
  SearchToolkit().search_wiki,
440
  *CodeExecutionToolkit(sandbox="subprocess").get_tools(),
441
  ]
 
519
  3. 提交包含您改进的拉取请求
520
 
521
  **当前开放贡献的问题:**
522
+ - [#1857](https://github.com/camel-ai/camel/issues/1857)
523
  - [#1770](https://github.com/camel-ai/camel/issues/1770)
524
  - [#1712](https://github.com/camel-ai/camel/issues/1712)
525
  - [#1537](https://github.com/camel-ai/camel/issues/1537)
 
526
 
527
  要认领一个问题,只需在该问题下留言表明您的兴趣即可。
528
 
 
530
  加入我们的 ([*Discord*](https://discord.camel-ai.org/) 或 [*微信*](https://ghli.org/camel/wechat.png)) 社区,一起探索智能体扩展规律的边界。
531
 
532
  加入我们,参与更多讨论!
533
+ <!-- ![](./assets/community.png) -->
534
+ ![](./assets/community_8.jpg)
535
  <!-- ![](./assets/meetup.jpg) -->
536
 
537
  # ❓ 常见问题
owl/.env_template CHANGED
@@ -7,6 +7,13 @@
7
  # OPENAI_API_KEY= ""
8
  # OPENAI_API_BASE_URL=""
9
 
 
 
 
 
 
 
 
10
  # Qwen API (https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key)
11
  # QWEN_API_KEY=""
12
 
 
7
  # OPENAI_API_KEY= ""
8
  # OPENAI_API_BASE_URL=""
9
 
10
+ # Azure OpenAI API
11
+ # AZURE_OPENAI_BASE_URL=""
12
+ # AZURE_API_VERSION=""
13
+ # AZURE_OPENAI_API_KEY=""
14
+ # AZURE_DEPLOYMENT_NAME=""
15
+
16
+
17
  # Qwen API (https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key)
18
  # QWEN_API_KEY=""
19
 
owl/examples/run_terminal_zh.py CHANGED
@@ -12,7 +12,7 @@
12
  # limitations under the License.
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
  from dotenv import load_dotenv
15
-
16
  from camel.models import ModelFactory
17
  from camel.toolkits import (
18
  SearchToolkit,
 
12
  # limitations under the License.
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
  from dotenv import load_dotenv
15
+ import os
16
  from camel.models import ModelFactory
17
  from camel.toolkits import (
18
  SearchToolkit,
owl/mcp_servers_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mcpServers": {
3
+ "desktop-commander": {
4
+ "command": "npx",
5
+ "args": [
6
+ "-y",
7
+ "@wonderwhy-er/desktop-commander"
8
+ ]
9
+ },
10
+ "playwright": {
11
+ "command": "npx",
12
+ "args": ["-y", "@executeautomation/playwright-mcp-server"]
13
+ }
14
+ }
15
+ }
16
+
owl/run_azure_openai.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import os
15
+ from dotenv import load_dotenv
16
+ from camel.configs import ChatGPTConfig
17
+ from camel.models import ModelFactory
18
+ from camel.toolkits import (
19
+ CodeExecutionToolkit,
20
+ ExcelToolkit,
21
+ ImageAnalysisToolkit,
22
+ SearchToolkit,
23
+ BrowserToolkit,
24
+ FileWriteToolkit,
25
+ )
26
+ from camel.types import ModelPlatformType
27
+
28
+ from utils import OwlRolePlaying, run_society
29
+
30
+ from camel.logger import set_log_level
31
+
32
+ set_log_level(level="DEBUG")
33
+
34
+ load_dotenv()
35
+
36
+
37
+ def construct_society(question: str) -> OwlRolePlaying:
38
+ r"""Construct a society of agents based on the given question.
39
+
40
+ Args:
41
+ question (str): The task or question to be addressed by the society.
42
+
43
+ Returns:
44
+ OwlRolePlaying: A configured society of agents ready to address the question.
45
+ """
46
+
47
+ # Create models for different components using Azure OpenAI
48
+ base_model_config = {
49
+ "model_platform": ModelPlatformType.AZURE,
50
+ "model_type": os.getenv("AZURE_OPENAI_MODEL_TYPE"),
51
+ "model_config_dict": ChatGPTConfig(temperature=0.4, max_tokens=4096).as_dict(),
52
+ }
53
+
54
+ models = {
55
+ "user": ModelFactory.create(**base_model_config),
56
+ "assistant": ModelFactory.create(**base_model_config),
57
+ "web": ModelFactory.create(**base_model_config),
58
+ "planning": ModelFactory.create(**base_model_config),
59
+ "image": ModelFactory.create(**base_model_config),
60
+ }
61
+
62
+ # Configure toolkits
63
+ tools = [
64
+ *BrowserToolkit(
65
+ headless=False, # Set to True for headless mode (e.g., on remote servers)
66
+ web_agent_model=models["web"],
67
+ planning_agent_model=models["planning"],
68
+ ).get_tools(),
69
+ *CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
70
+ *ImageAnalysisToolkit(model=models["image"]).get_tools(),
71
+ SearchToolkit().search_duckduckgo,
72
+ SearchToolkit().search_google, # Comment this out if you don't have google search
73
+ SearchToolkit().search_wiki,
74
+ *ExcelToolkit().get_tools(),
75
+ *FileWriteToolkit(output_dir="./").get_tools(),
76
+ ]
77
+
78
+ # Configure agent roles and parameters
79
+ user_agent_kwargs = {"model": models["user"]}
80
+ assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
81
+
82
+ # Configure task parameters
83
+ task_kwargs = {
84
+ "task_prompt": question,
85
+ "with_task_specify": False,
86
+ }
87
+
88
+ # Create and return the society
89
+ society = OwlRolePlaying(
90
+ **task_kwargs,
91
+ user_role_name="user",
92
+ user_agent_kwargs=user_agent_kwargs,
93
+ assistant_role_name="assistant",
94
+ assistant_agent_kwargs=assistant_agent_kwargs,
95
+ )
96
+
97
+ return society
98
+
99
+
100
+ def main():
101
+ r"""Main function to run the OWL system with Azure OpenAI."""
102
+ # Example question
103
+ question = "Navigate to Amazon.com and identify one product that is attractive to coders. Please provide me with the product name and price. No need to verify your answer."
104
+
105
+ # Construct and run the society
106
+ society = construct_society(question)
107
+ answer, chat_history, token_count = run_society(society)
108
+
109
+ # Output the result
110
+ print(f"\033[94mAnswer: {answer}\033[0m")
111
+
112
+
113
+ if __name__ == "__main__":
114
+ main()
owl/run_mcp.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ """MCP Multi-Agent System Example
15
+
16
+ This example demonstrates how to use MCP (Model Context Protocol) with CAMEL agents
17
+ for advanced information retrieval and processing tasks.
18
+
19
+ Environment Setup:
20
+ 1. Configure the required dependencies of owl library
21
+ Refer to: https://github.com/camel-ai/owl for installation guide
22
+
23
+ 2. MCP Server Setup:
24
+
25
+ 2.1 MCP Desktop Commander (File System Service):
26
+ Prerequisites: Node.js and npm
27
+ ```bash
28
+ # Install MCP service
29
+ npx -y @smithery/cli install @wonderwhy-er/desktop-commander --client claude
30
+ npx @wonderwhy-er/desktop-commander setup
31
+
32
+ # Configure in owl/mcp_servers_config.json:
33
+ {
34
+ "desktop-commander": {
35
+ "command": "npx",
36
+ "args": [
37
+ "-y",
38
+ "@wonderwhy-er/desktop-commander"
39
+ ]
40
+ }
41
+ }
42
+ ```
43
+
44
+ 2.2 MCP Playwright Service:
45
+ ```bash
46
+ # Install MCP service
47
+ npm install -g @executeautomation/playwright-mcp-server
48
+ npx playwright install-deps
49
+
50
+ # Configure in mcp_servers_config.json:
51
+ {
52
+ "mcpServers": {
53
+ "playwright": {
54
+ "command": "npx",
55
+ "args": ["-y", "@executeautomation/playwright-mcp-server"]
56
+ }
57
+ }
58
+ }
59
+ ```
60
+
61
+ 2.3 MCP Fetch Service (Optional - for better retrieval):
62
+ ```bash
63
+ # Install MCP service
64
+ pip install mcp-server-fetch
65
+
66
+ # Configure in mcp_servers_config.json:
67
+ {
68
+ "mcpServers": {
69
+ "fetch": {
70
+ "command": "python",
71
+ "args": ["-m", "mcp_server_fetch"]
72
+ }
73
+ }
74
+ }
75
+ ```
76
+
77
+ Usage:
78
+ 1. Ensure all MCP servers are properly configured in mcp_servers_config.json
79
+ 2. Run this script to create a multi-agent system that can:
80
+ - Access and manipulate files through MCP Desktop Commander
81
+ - Perform web automation tasks using Playwright
82
+ - Process and generate information using GPT-4o
83
+ - Fetch web content (if fetch service is configured)
84
+ 3. The system will execute the specified task while maintaining security through
85
+ controlled access
86
+
87
+ Note:
88
+ - All file operations are restricted to configured directories
89
+ - System uses GPT-4o for both user and assistant roles
90
+ - Supports asynchronous operations for efficient processing
91
+ """
92
+
93
+ import asyncio
94
+ from pathlib import Path
95
+ from typing import List
96
+
97
+ from dotenv import load_dotenv
98
+
99
+ from camel.models import ModelFactory
100
+ from camel.toolkits import FunctionTool
101
+ from camel.types import ModelPlatformType, ModelType
102
+ from camel.logger import set_log_level
103
+ from camel.toolkits import MCPToolkit
104
+
105
+ from utils.enhanced_role_playing import OwlRolePlaying, arun_society
106
+
107
+
108
+ load_dotenv()
109
+ set_log_level(level="DEBUG")
110
+
111
+
112
+ async def construct_society(
113
+ question: str,
114
+ tools: List[FunctionTool],
115
+ ) -> OwlRolePlaying:
116
+ r"""build a multi-agent OwlRolePlaying instance.
117
+
118
+ Args:
119
+ question (str): The question to ask.
120
+ tools (List[FunctionTool]): The MCP tools to use.
121
+ """
122
+ models = {
123
+ "user": ModelFactory.create(
124
+ model_platform=ModelPlatformType.OPENAI,
125
+ model_type=ModelType.GPT_4O,
126
+ model_config_dict={"temperature": 0},
127
+ ),
128
+ "assistant": ModelFactory.create(
129
+ model_platform=ModelPlatformType.OPENAI,
130
+ model_type=ModelType.GPT_4O,
131
+ model_config_dict={"temperature": 0},
132
+ ),
133
+ }
134
+
135
+ user_agent_kwargs = {"model": models["user"]}
136
+ assistant_agent_kwargs = {
137
+ "model": models["assistant"],
138
+ "tools": tools,
139
+ }
140
+
141
+ task_kwargs = {
142
+ "task_prompt": question,
143
+ "with_task_specify": False,
144
+ }
145
+
146
+ society = OwlRolePlaying(
147
+ **task_kwargs,
148
+ user_role_name="user",
149
+ user_agent_kwargs=user_agent_kwargs,
150
+ assistant_role_name="assistant",
151
+ assistant_agent_kwargs=assistant_agent_kwargs,
152
+ )
153
+ return society
154
+
155
+
156
+ async def main():
157
+ config_path = Path(__file__).parent / "mcp_servers_config.json"
158
+ mcp_toolkit = MCPToolkit(config_path=str(config_path))
159
+
160
+ try:
161
+ await mcp_toolkit.connect()
162
+
163
+ question = (
164
+ "I'd like a academic report about Andrew Ng, including his research "
165
+ "direction, published papers (At least 3), institutions, etc."
166
+ "Then organize the report in Markdown format and save it to my desktop"
167
+ )
168
+
169
+ # Connect to all MCP toolkits
170
+ tools = [*mcp_toolkit.get_tools()]
171
+ society = await construct_society(question, tools)
172
+ answer, chat_history, token_count = await arun_society(society)
173
+ print(f"\033[94mAnswer: {answer}\033[0m")
174
+
175
+ finally:
176
+ # Make sure to disconnect safely after all operations are completed.
177
+ try:
178
+ await mcp_toolkit.disconnect()
179
+ except Exception:
180
+ print("Disconnect failed")
181
+
182
+
183
+ if __name__ == "__main__":
184
+ asyncio.run(main())
owl/utils/__init__.py CHANGED
@@ -13,7 +13,12 @@
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
 
15
  from .common import extract_pattern
16
- from .enhanced_role_playing import OwlRolePlaying, OwlGAIARolePlaying, run_society
 
 
 
 
 
17
  from .gaia import GAIABenchmark
18
  from .document_toolkit import DocumentProcessingToolkit
19
 
@@ -22,6 +27,7 @@ __all__ = [
22
  "OwlRolePlaying",
23
  "OwlGAIARolePlaying",
24
  "run_society",
 
25
  "GAIABenchmark",
26
  "DocumentProcessingToolkit",
27
  ]
 
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
 
15
  from .common import extract_pattern
16
+ from .enhanced_role_playing import (
17
+ OwlRolePlaying,
18
+ OwlGAIARolePlaying,
19
+ run_society,
20
+ arun_society,
21
+ )
22
  from .gaia import GAIABenchmark
23
  from .document_toolkit import DocumentProcessingToolkit
24
 
 
27
  "OwlRolePlaying",
28
  "OwlGAIARolePlaying",
29
  "run_society",
30
+ "arun_society",
31
  "GAIABenchmark",
32
  "DocumentProcessingToolkit",
33
  ]
owl/utils/document_toolkit.py CHANGED
@@ -144,12 +144,11 @@ class DocumentProcessingToolkit(BaseToolkit):
144
  return True, extracted_text
145
  try:
146
  result = asyncio.run(self._extract_content_with_chunkr(document_path))
147
- raise ValueError("Chunkr is not available.")
148
  return True, result
149
 
150
  except Exception as e:
151
  logger.warning(
152
- f"Error occurred while using chunkr to process document: {e}"
153
  )
154
  if document_path.endswith(".pdf"):
155
  # try using pypdf to extract text from pdf
@@ -226,7 +225,7 @@ class DocumentProcessingToolkit(BaseToolkit):
226
 
227
  if result.status == "Failed":
228
  logger.error(
229
- f"Error while processing document {document_path}: {result.message}"
230
  )
231
  return f"Error while processing document: {result.message}"
232
 
 
144
  return True, extracted_text
145
  try:
146
  result = asyncio.run(self._extract_content_with_chunkr(document_path))
 
147
  return True, result
148
 
149
  except Exception as e:
150
  logger.warning(
151
+ f"Error occurred while using Chunkr to process document: {e}"
152
  )
153
  if document_path.endswith(".pdf"):
154
  # try using pypdf to extract text from pdf
 
225
 
226
  if result.status == "Failed":
227
  logger.error(
228
+ f"Error while processing document {document_path}: {result.message} using Chunkr."
229
  )
230
  return f"Error while processing document: {result.message}"
231
 
owl/utils/enhanced_role_playing.py CHANGED
@@ -152,7 +152,7 @@ Please note that the task may be very complicated. Do not attempt to solve the t
152
  Here are some tips that will help you to give more valuable instructions about our task to me:
153
  <tips>
154
  - I have various tools to use, such as search toolkit, web browser simulation toolkit, document relevant toolkit, code execution toolkit, etc. Thus, You must think how human will solve the task step-by-step, and give me instructions just like that. For example, one may first use google search to get some initial information and the target url, then retrieve the content of the url, or do some web browser interaction to find the answer.
155
- - Although the task is complex, the answer does exist. If you cant find the answer using the current scheme, try to re-plan and use other ways to find the answer, e.g. using other tools or methods that can achieve similar results.
156
  - Always remind me to verify my final answer about the overall task. This work can be done by using multiple tools(e.g., screenshots, webpage analysis, etc.), or something else.
157
  - If I have written code, please remind me to run the code and get the result.
158
  - Search results typically do not provide precise answers. It is not likely to find the answer directly using search toolkit only, the search query should be concise and focuses on finding sources rather than direct answers, as it always need to use other tools to further process the url, e.g. interact with the webpage, extract webpage content, etc.
@@ -281,6 +281,74 @@ Please note that our overall task may be very complicated. Here are some tips th
281
  ),
282
  )
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
  class OwlGAIARolePlaying(OwlRolePlaying):
286
  def __init__(self, **kwargs):
@@ -370,15 +438,16 @@ class OwlGAIARolePlaying(OwlRolePlaying):
370
 
371
 
372
  def run_society(
373
- society: RolePlaying, round_limit: int = 15
 
374
  ) -> Tuple[str, List[dict], dict]:
375
  overall_completion_token_count = 0
376
  overall_prompt_token_count = 0
377
 
378
  chat_history = []
379
  init_prompt = """
380
- Now please give me instructions to solve over overall task step by step. If the task requires some specific knowledge, please instruct me to use tools to complete the task.
381
- """
382
  input_msg = society.init_chat(init_prompt)
383
  for _round in range(round_limit):
384
  # Check if previous user response had TASK_DONE before getting next assistant response
@@ -392,6 +461,59 @@ Now please give me instructions to solve over overall task step by step. If the
392
  assistant_response.info["usage"]["completion_tokens"]
393
  + user_response.info["usage"]["completion_tokens"]
394
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  overall_prompt_token_count += (
396
  assistant_response.info["usage"]["prompt_tokens"]
397
  + user_response.info["usage"]["prompt_tokens"]
 
152
  Here are some tips that will help you to give more valuable instructions about our task to me:
153
  <tips>
154
  - I have various tools to use, such as search toolkit, web browser simulation toolkit, document relevant toolkit, code execution toolkit, etc. Thus, You must think how human will solve the task step-by-step, and give me instructions just like that. For example, one may first use google search to get some initial information and the target url, then retrieve the content of the url, or do some web browser interaction to find the answer.
155
+ - Although the task is complex, the answer does exist. If you can't find the answer using the current scheme, try to re-plan and use other ways to find the answer, e.g. using other tools or methods that can achieve similar results.
156
  - Always remind me to verify my final answer about the overall task. This work can be done by using multiple tools(e.g., screenshots, webpage analysis, etc.), or something else.
157
  - If I have written code, please remind me to run the code and get the result.
158
  - Search results typically do not provide precise answers. It is not likely to find the answer directly using search toolkit only, the search query should be concise and focuses on finding sources rather than direct answers, as it always need to use other tools to further process the url, e.g. interact with the webpage, extract webpage content, etc.
 
281
  ),
282
  )
283
 
284
+ async def astep(
285
+ self, assistant_msg: BaseMessage
286
+ ) -> Tuple[ChatAgentResponse, ChatAgentResponse]:
287
+ user_response = await self.user_agent.astep(assistant_msg)
288
+ if user_response.terminated or user_response.msgs is None:
289
+ return (
290
+ ChatAgentResponse(msgs=[], terminated=False, info={}),
291
+ ChatAgentResponse(
292
+ msgs=[],
293
+ terminated=user_response.terminated,
294
+ info=user_response.info,
295
+ ),
296
+ )
297
+ user_msg = self._reduce_message_options(user_response.msgs)
298
+
299
+ modified_user_msg = deepcopy(user_msg)
300
+
301
+ if "TASK_DONE" not in user_msg.content:
302
+ modified_user_msg.content += f"""\n
303
+ Here are auxiliary information about the overall task, which may help you understand the intent of the current task:
304
+ <auxiliary_information>
305
+ {self.task_prompt}
306
+ </auxiliary_information>
307
+ If there are available tools and you want to call them, never say 'I will ...', but first call the tool and reply based on tool call's result, and tell me which tool you have called.
308
+ """
309
+
310
+ else:
311
+ # The task is done, and the assistant agent need to give the final answer about the original task
312
+ modified_user_msg.content += f"""\n
313
+ Now please make a final answer of the original task based on our conversation : <task>{self.task_prompt}</task>
314
+ """
315
+
316
+ assistant_response = await self.assistant_agent.astep(user_msg)
317
+ if assistant_response.terminated or assistant_response.msgs is None:
318
+ return (
319
+ ChatAgentResponse(
320
+ msgs=[],
321
+ terminated=assistant_response.terminated,
322
+ info=assistant_response.info,
323
+ ),
324
+ ChatAgentResponse(
325
+ msgs=[user_msg], terminated=False, info=user_response.info
326
+ ),
327
+ )
328
+ assistant_msg = self._reduce_message_options(assistant_response.msgs)
329
+
330
+ modified_assistant_msg = deepcopy(assistant_msg)
331
+ if "TASK_DONE" not in user_msg.content:
332
+ modified_assistant_msg.content += f"""\n
333
+ Provide me with the next instruction and input (if needed) based on my response and our current task: <task>{self.task_prompt}</task>
334
+ Before producing the final answer, please check whether I have rechecked the final answer using different toolkit as much as possible. If not, please remind me to do that.
335
+ If I have written codes, remind me to run the codes.
336
+ If you think our task is done, reply with `TASK_DONE` to end our conversation.
337
+ """
338
+
339
+ return (
340
+ ChatAgentResponse(
341
+ msgs=[assistant_msg],
342
+ terminated=assistant_response.terminated,
343
+ info=assistant_response.info,
344
+ ),
345
+ ChatAgentResponse(
346
+ msgs=[user_msg],
347
+ terminated=user_response.terminated,
348
+ info=user_response.info,
349
+ ),
350
+ )
351
+
352
 
353
  class OwlGAIARolePlaying(OwlRolePlaying):
354
  def __init__(self, **kwargs):
 
438
 
439
 
440
  def run_society(
441
+ society: OwlRolePlaying,
442
+ round_limit: int = 15,
443
  ) -> Tuple[str, List[dict], dict]:
444
  overall_completion_token_count = 0
445
  overall_prompt_token_count = 0
446
 
447
  chat_history = []
448
  init_prompt = """
449
+ Now please give me instructions to solve over overall task step by step. If the task requires some specific knowledge, please instruct me to use tools to complete the task.
450
+ """
451
  input_msg = society.init_chat(init_prompt)
452
  for _round in range(round_limit):
453
  # Check if previous user response had TASK_DONE before getting next assistant response
 
461
  assistant_response.info["usage"]["completion_tokens"]
462
  + user_response.info["usage"]["completion_tokens"]
463
  )
464
+
465
+ # convert tool call to dict
466
+ tool_call_records: List[dict] = []
467
+ for tool_call in assistant_response.info["tool_calls"]:
468
+ tool_call_records.append(tool_call.as_dict())
469
+
470
+ _data = {
471
+ "user": user_response.msg.content,
472
+ "assistant": assistant_response.msg.content,
473
+ "tool_calls": tool_call_records,
474
+ }
475
+
476
+ chat_history.append(_data)
477
+ logger.info(f"Round #{_round} user_response:\n {user_response.msgs[0].content}")
478
+ logger.info(
479
+ f"Round #{_round} assistant_response:\n {assistant_response.msgs[0].content}"
480
+ )
481
+
482
+ if (
483
+ assistant_response.terminated
484
+ or user_response.terminated
485
+ or "TASK_DONE" in user_response.msg.content
486
+ ):
487
+ break
488
+
489
+ input_msg = assistant_response.msg
490
+
491
+ answer = chat_history[-1]["assistant"]
492
+ token_info = {
493
+ "completion_token_count": overall_completion_token_count,
494
+ "prompt_token_count": overall_prompt_token_count,
495
+ }
496
+
497
+ return answer, chat_history, token_info
498
+
499
+
500
+ async def arun_society(
501
+ society: OwlRolePlaying,
502
+ round_limit: int = 15,
503
+ ) -> Tuple[str, List[dict], dict]:
504
+ overall_completion_token_count = 0
505
+ overall_prompt_token_count = 0
506
+
507
+ chat_history = []
508
+ init_prompt = """
509
+ Now please give me instructions to solve over overall task step by step. If the task requires some specific knowledge, please instruct me to use tools to complete the task.
510
+ """
511
+ input_msg = society.init_chat(init_prompt)
512
+ for _round in range(round_limit):
513
+ assistant_response, user_response = await society.astep(input_msg)
514
+ overall_prompt_token_count += assistant_response.info["usage"][
515
+ "completion_tokens"
516
+ ]
517
  overall_prompt_token_count += (
518
  assistant_response.info["usage"]["prompt_tokens"]
519
  + user_response.info["usage"]["prompt_tokens"]
owl/utils/gaia.py CHANGED
@@ -191,15 +191,12 @@ class GAIABenchmark(BaseBenchmark):
191
  except Exception as e:
192
  logger.warning(e)
193
  # raise FileNotFoundError(f"{self.save_to} does not exist.")
194
-
 
 
 
195
  # Process tasks
196
  for task in tqdm(datas, desc="Running"):
197
- if self._check_task_completed(task["task_id"]):
198
- logger.info(
199
- f"The following task is already completed:\n task id: {task['task_id']}, question: {task['Question']}"
200
- )
201
- continue
202
-
203
  if_prepared_task, info = self._prepare_task(task)
204
  if not if_prepared_task:
205
  _result_info = {
 
191
  except Exception as e:
192
  logger.warning(e)
193
  # raise FileNotFoundError(f"{self.save_to} does not exist.")
194
+ datas = [
195
+ data for data in datas if not self._check_task_completed(data["task_id"])
196
+ ]
197
+ logger.info(f"Number of tasks to be processed: {len(datas)}")
198
  # Process tasks
199
  for task in tqdm(datas, desc="Running"):
 
 
 
 
 
 
200
  if_prepared_task, info = self._prepare_task(task)
201
  if not if_prepared_task:
202
  _result_info = {
pyproject.toml CHANGED
@@ -25,6 +25,8 @@ dependencies = [
25
  "chunkr-ai>=0.0.41",
26
  "docx2markdown>=0.1.1",
27
  "gradio>=3.50.2",
 
 
28
  ]
29
 
30
  [project.urls]
 
25
  "chunkr-ai>=0.0.41",
26
  "docx2markdown>=0.1.1",
27
  "gradio>=3.50.2",
28
+ "mcp-simple-arxiv==0.2.2",
29
+ "mcp-server-fetch==2025.1.17",
30
  ]
31
 
32
  [project.urls]
uv.lock CHANGED
@@ -2685,6 +2685,19 @@ wheels = [
2685
  { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
2686
  ]
2687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2688
  [[package]]
2689
  name = "markupsafe"
2690
  version = "2.1.5"
@@ -2806,6 +2819,38 @@ wheels = [
2806
  { url = "https://files.pythonhosted.org/packages/d0/d2/a9e87b506b2094f5aa9becc1af5178842701b27217fa43877353da2577e3/mcp-1.3.0-py3-none-any.whl", hash = "sha256:2829d67ce339a249f803f22eba5e90385eafcac45c94b00cab6cef7e8f217211", size = 70672 },
2807
  ]
2808
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2809
  [[package]]
2810
  name = "mdurl"
2811
  version = "0.1.2"
@@ -3571,6 +3616,8 @@ dependencies = [
3571
  { name = "chunkr-ai" },
3572
  { name = "docx2markdown" },
3573
  { name = "gradio" },
 
 
3574
  ]
3575
 
3576
  [package.metadata]
@@ -3579,6 +3626,8 @@ requires-dist = [
3579
  { name = "chunkr-ai", specifier = ">=0.0.41" },
3580
  { name = "docx2markdown", specifier = ">=0.1.1" },
3581
  { name = "gradio", specifier = ">=3.50.2" },
 
 
3582
  ]
3583
 
3584
  [[package]]
@@ -3962,6 +4011,15 @@ wheels = [
3962
  { url = "https://files.pythonhosted.org/packages/b5/35/6c4c6fc8774a9e3629cd750dc24a7a4fb090a25ccd5c3246d127b70f9e22/propcache-0.3.0-py3-none-any.whl", hash = "sha256:67dda3c7325691c2081510e92c561f465ba61b975f481735aefdfc845d2cd043", size = 12101 },
3963
  ]
3964
 
 
 
 
 
 
 
 
 
 
3965
  [[package]]
3966
  name = "proto-plus"
3967
  version = "1.26.0"
@@ -4673,6 +4731,21 @@ wheels = [
4673
  { url = "https://files.pythonhosted.org/packages/09/f6/fa777f336629aee8938f3d5c95c09df38459d4eadbdbe34642889857fb6a/rapidfuzz-3.12.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:54bb69ebe5ca0bd7527357e348f16a4c0c52fe0c2fcc8a041010467dcb8385f7", size = 1555000 },
4674
  ]
4675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4676
  [[package]]
4677
  name = "redis"
4678
  version = "5.2.1"
 
2685
  { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
2686
  ]
2687
 
2688
+ [[package]]
2689
+ name = "markdownify"
2690
+ version = "1.1.0"
2691
+ source = { registry = "https://pypi.org/simple" }
2692
+ dependencies = [
2693
+ { name = "beautifulsoup4" },
2694
+ { name = "six" },
2695
+ ]
2696
+ sdist = { url = "https://files.pythonhosted.org/packages/2f/78/c48fed23c7aebc2c16049062e72de1da3220c274de59d28c942acdc9ffb2/markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd", size = 17127 }
2697
+ wheels = [
2698
+ { url = "https://files.pythonhosted.org/packages/64/11/b751af7ad41b254a802cf52f7bc1fca7cabe2388132f2ce60a1a6b9b9622/markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef", size = 13901 },
2699
+ ]
2700
+
2701
  [[package]]
2702
  name = "markupsafe"
2703
  version = "2.1.5"
 
2819
  { url = "https://files.pythonhosted.org/packages/d0/d2/a9e87b506b2094f5aa9becc1af5178842701b27217fa43877353da2577e3/mcp-1.3.0-py3-none-any.whl", hash = "sha256:2829d67ce339a249f803f22eba5e90385eafcac45c94b00cab6cef7e8f217211", size = 70672 },
2820
  ]
2821
 
2822
+ [[package]]
2823
+ name = "mcp-server-fetch"
2824
+ version = "2025.1.17"
2825
+ source = { registry = "https://pypi.org/simple" }
2826
+ dependencies = [
2827
+ { name = "markdownify" },
2828
+ { name = "mcp" },
2829
+ { name = "protego" },
2830
+ { name = "pydantic" },
2831
+ { name = "readabilipy" },
2832
+ { name = "requests" },
2833
+ ]
2834
+ sdist = { url = "https://files.pythonhosted.org/packages/99/76/204ac83afe2000b1513b4741229586128361f376fab03832695e0179104d/mcp_server_fetch-2025.1.17.tar.gz", hash = "sha256:aa3a5dee358651103477bc121b98ada18a5c35840c56e4016cc3b40e7df1aa7d", size = 43468 }
2835
+ wheels = [
2836
+ { url = "https://files.pythonhosted.org/packages/d7/34/c0dce3415b627f763a9b7a0202a6a0672446b49f5ca04827340c28d75c63/mcp_server_fetch-2025.1.17-py3-none-any.whl", hash = "sha256:53c4967572464c6329824c9b05cdfa5fe214004d577ae8700fdb04203844be52", size = 7991 },
2837
+ ]
2838
+
2839
+ [[package]]
2840
+ name = "mcp-simple-arxiv"
2841
+ version = "0.2.2"
2842
+ source = { registry = "https://pypi.org/simple" }
2843
+ dependencies = [
2844
+ { name = "beautifulsoup4" },
2845
+ { name = "feedparser" },
2846
+ { name = "httpx" },
2847
+ { name = "mcp" },
2848
+ ]
2849
+ sdist = { url = "https://files.pythonhosted.org/packages/20/d3/d47bfce067ea85bc73154d8299549f84455e601f699fcff513f9d44cef0d/mcp_simple_arxiv-0.2.2.tar.gz", hash = "sha256:e27cfd58a470dcec7d733bd09b4219daddbdc3475a6d256e246a114e5b94e817", size = 12100 }
2850
+ wheels = [
2851
+ { url = "https://files.pythonhosted.org/packages/07/4e/6646a0004fc85b0c1df6e662db42f76fe5a0412179b7f65c066d7804370a/mcp_simple_arxiv-0.2.2-py3-none-any.whl", hash = "sha256:fcf607303c074ae5e88337b5bf3ea52cd781081f49ddf8fa0898eb3b8420dccb", size = 13686 },
2852
+ ]
2853
+
2854
  [[package]]
2855
  name = "mdurl"
2856
  version = "0.1.2"
 
3616
  { name = "chunkr-ai" },
3617
  { name = "docx2markdown" },
3618
  { name = "gradio" },
3619
+ { name = "mcp-server-fetch" },
3620
+ { name = "mcp-simple-arxiv" },
3621
  ]
3622
 
3623
  [package.metadata]
 
3626
  { name = "chunkr-ai", specifier = ">=0.0.41" },
3627
  { name = "docx2markdown", specifier = ">=0.1.1" },
3628
  { name = "gradio", specifier = ">=3.50.2" },
3629
+ { name = "mcp-server-fetch", specifier = "==2025.1.17" },
3630
+ { name = "mcp-simple-arxiv", specifier = "==0.2.2" },
3631
  ]
3632
 
3633
  [[package]]
 
4011
  { url = "https://files.pythonhosted.org/packages/b5/35/6c4c6fc8774a9e3629cd750dc24a7a4fb090a25ccd5c3246d127b70f9e22/propcache-0.3.0-py3-none-any.whl", hash = "sha256:67dda3c7325691c2081510e92c561f465ba61b975f481735aefdfc845d2cd043", size = 12101 },
4012
  ]
4013
 
4014
+ [[package]]
4015
+ name = "protego"
4016
+ version = "0.4.0"
4017
+ source = { registry = "https://pypi.org/simple" }
4018
+ sdist = { url = "https://files.pythonhosted.org/packages/4e/6b/84e878d0567dfc11538bad6ce2595cee7ae0c47cf6bf7293683c9ec78ef8/protego-0.4.0.tar.gz", hash = "sha256:93a5e662b61399a0e1f208a324f2c6ea95b23ee39e6cbf2c96246da4a656c2f6", size = 3246425 }
4019
+ wheels = [
4020
+ { url = "https://files.pythonhosted.org/packages/d9/fd/8d84d75832b0983cecf3aff7ae48362fe96fc8ab6ebca9dcf3cefd87e79c/Protego-0.4.0-py2.py3-none-any.whl", hash = "sha256:37640bc0ebe37572d624453a21381d05e9d86e44f89ff1e81794d185a0491666", size = 8553 },
4021
+ ]
4022
+
4023
  [[package]]
4024
  name = "proto-plus"
4025
  version = "1.26.0"
 
4731
  { url = "https://files.pythonhosted.org/packages/09/f6/fa777f336629aee8938f3d5c95c09df38459d4eadbdbe34642889857fb6a/rapidfuzz-3.12.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:54bb69ebe5ca0bd7527357e348f16a4c0c52fe0c2fcc8a041010467dcb8385f7", size = 1555000 },
4732
  ]
4733
 
4734
+ [[package]]
4735
+ name = "readabilipy"
4736
+ version = "0.3.0"
4737
+ source = { registry = "https://pypi.org/simple" }
4738
+ dependencies = [
4739
+ { name = "beautifulsoup4" },
4740
+ { name = "html5lib" },
4741
+ { name = "lxml" },
4742
+ { name = "regex" },
4743
+ ]
4744
+ sdist = { url = "https://files.pythonhosted.org/packages/b8/e4/260a202516886c2e0cc6e6ae96d1f491792d829098886d9529a2439fbe8e/readabilipy-0.3.0.tar.gz", hash = "sha256:e13313771216953935ac031db4234bdb9725413534bfb3c19dbd6caab0887ae0", size = 35491 }
4745
+ wheels = [
4746
+ { url = "https://files.pythonhosted.org/packages/dd/46/8a640c6de1a6c6af971f858b2fb178ca5e1db91f223d8ba5f40efe1491e5/readabilipy-0.3.0-py3-none-any.whl", hash = "sha256:d106da0fad11d5fdfcde21f5c5385556bfa8ff0258483037d39ea6b1d6db3943", size = 22158 },
4747
+ ]
4748
+
4749
  [[package]]
4750
  name = "redis"
4751
  version = "5.2.1"