Ruby-a07 commited on
Commit
bac9d55
·
1 Parent(s): f6ca5b1

add usecase of virtual fitting room

Browse files
community_usecase/virtual_fitting_room/readme.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual Fitting Room
2
+ ## What's this?
3
+
4
+ This code example can automatically search for suitable trending products from your designated websites (e.g. Uniqlo) and show you realistic try-on effects with different virtual models (you can also use your own photo as the model to get a more intuitive try-on experience). All with one prompt.
5
+
6
+ All with one prompt 🪄
7
+
8
+ ## Dependencies:
9
+
10
+ 1. I made some modificaitons to the camel repo so please first run "git clone -b feature/virtual-try-on-toolkit-and-partial-screenshot --single-branch https://github.com/camel-ai/camel.git"
11
+ 2. fill in your klingai api keys for virtual try-on in camel/toolkits/virtual_try_on_toolkit.py (you can get it from https://klingai.kuaishou.com/dev-center)
12
+ 3. pip install the above cloned repo
13
+
14
+ ## How to use:
15
+ 1. copy "run_gpt4o.py" to owl/examples
16
+ 2. run "python examples/run_gpt4o.py" (assuming your current dir is owl)
17
+ 3. the fetched image of clothes will be saved in tmp/clothes
18
+ 4. the final try-on image will be saved in tmp/fitting_room
19
+
20
+ ## Example Output
21
+ https://drive.google.com/file/d/1J3caeAL4C-_LEULPi6VOvlyJPazQeOOv/view?usp=sharing
22
+
23
+ (click the above link to see the screen recording, which shows the full automated process from browsing clothes on uniqlo to generating the final try-on image)
community_usecase/virtual_fitting_room/run_gpt4o.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import os
15
+ import logging
16
+ import functools
17
+ import json
18
+ from typing import Callable, Any, Dict, List
19
+
20
+ from dotenv import load_dotenv
21
+ from camel.models import ModelFactory, BaseModelBackend
22
+
23
+ from camel.toolkits import (
24
+ ExcelToolkit,
25
+ ImageAnalysisToolkit,
26
+ SearchToolkit,
27
+ BrowserToolkit,
28
+ FileWriteToolkit,
29
+ VirtualTryOnToolkit
30
+ )
31
+ from camel.toolkits.base import BaseToolkit
32
+ from camel.types import ModelPlatformType
33
+
34
+ from owl.utils import run_society
35
+ from camel.societies import RolePlaying
36
+ from camel.logger import set_log_level, get_logger
37
+
38
+ import pathlib
39
+
40
+ base_dir = pathlib.Path(__file__).parent.parent
41
+ env_path = base_dir / "owl" / ".env"
42
+ load_dotenv(dotenv_path=str(env_path))
43
+
44
+ # set detailed log recording for debug
45
+ set_log_level(level="DEBUG")
46
+ logger = get_logger(__name__)
47
+ file_handler = logging.FileHandler('tool_calls.log')
48
+ file_handler.setLevel(logging.DEBUG)
49
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
50
+ file_handler.setFormatter(formatter)
51
+ logger.addHandler(file_handler)
52
+
53
+ root_logger = logging.getLogger()
54
+ root_logger.addHandler(file_handler)
55
+
56
+ def construct_society(question: str) -> RolePlaying:
57
+ r"""Construct a society of agents based on the given question.
58
+
59
+ Args:
60
+ question (str): The task or question to be addressed by the society.
61
+
62
+ Returns:
63
+ RolePlaying: A configured society of agents ready to address the question.
64
+ """
65
+
66
+ # Create models for different components (here I use gpt-4o for all agents, so remember to set the openai key in .env)
67
+ models = {
68
+ "user": ModelFactory.create(
69
+ model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
70
+ model_type="gpt-4o",
71
+ api_key=os.getenv("OPENAI_API_KEY"),
72
+ model_config_dict={"temperature": 0.4},
73
+ ),
74
+ "assistant": ModelFactory.create(
75
+ model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
76
+ model_type="gpt-4o",
77
+ api_key=os.getenv("OPENAI_API_KEY"),
78
+ model_config_dict={"temperature": 0.4},
79
+ ),
80
+ "web": ModelFactory.create(
81
+ model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
82
+ model_type="gpt-4o",
83
+ api_key=os.getenv("OPENAI_API_KEY"),
84
+ model_config_dict={"temperature": 0.2},
85
+ ),
86
+ "planning": ModelFactory.create(
87
+ model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
88
+ model_type="gpt-4o",
89
+ api_key=os.getenv("OPENAI_API_KEY"),
90
+ model_config_dict={"temperature": 0.3},
91
+ ),
92
+ "image": ModelFactory.create(
93
+ model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
94
+ model_type="gpt-4o",
95
+ api_key=os.getenv("OPENAI_API_KEY"),
96
+ model_config_dict={"temperature": 0.4},
97
+ ),
98
+ }
99
+
100
+ # prepare toolkits
101
+ image_toolkit = ImageAnalysisToolkit(model=models["image"])
102
+ browser_toolkit = BrowserToolkit(
103
+ headless=False,
104
+ web_agent_model=models["web"],
105
+ planning_agent_model=models["planning"],
106
+ )
107
+ excel_toolkit = ExcelToolkit()
108
+ file_toolkit = FileWriteToolkit(output_dir="./")
109
+ virtual_try_on_toolkit = VirtualTryOnToolkit()
110
+
111
+ tools = [
112
+ *browser_toolkit.get_tools(),
113
+ *image_toolkit.get_tools(),
114
+ SearchToolkit().search_duckduckgo,
115
+ # SearchToolkit().search_google,
116
+ # SearchToolkit().search_wiki,
117
+ *excel_toolkit.get_tools(),
118
+ *file_toolkit.get_tools(),
119
+ *virtual_try_on_toolkit.get_tools(),
120
+ ]
121
+
122
+ # Configure agent roles and parameters
123
+ user_agent_kwargs = {"model": models["user"]}
124
+ assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
125
+
126
+ # Configure task parameters
127
+ task_kwargs = {
128
+ "task_prompt": question,
129
+ "with_task_specify": False,
130
+ }
131
+
132
+ # Create and return the society
133
+ society = RolePlaying(
134
+ **task_kwargs,
135
+ user_role_name="user",
136
+ user_agent_kwargs=user_agent_kwargs,
137
+ assistant_role_name="assistant",
138
+ assistant_agent_kwargs=assistant_agent_kwargs,
139
+ )
140
+
141
+ return society
142
+
143
+
144
+ def main():
145
+ r"""Main function to run the OWL system with an example question."""
146
+
147
+ question = f"open https://www.uniqlo.com/eu-at/en/women/tops?path=37608%2C84986%2C85018%2C85207 which shows some clothes on sale. First, directly click one image of clothes which should be an big interactive element (don't wrongly click the small like button overlapped on the image!) to go into its specific details page and then get a partial screenshot for this clothes. Second, only after you've get the partial screenshort of the product, using your own virtual try-on toolkit (there is no built-in virtual try-on button on this website, either no third party tool required) to show me the virtual try-on result with the product."
148
+
149
+ # Construct and run the society
150
+ society = construct_society(question)
151
+ answer, chat_history, token_count = run_society(society)
152
+
153
+ # record tool using history (for debug)
154
+ analyze_chat_history(chat_history)
155
+ print(f"\033[94mAnswer: {answer}\033[0m")
156
+
157
+
158
+ def analyze_chat_history(chat_history):
159
+ r"""分析聊天历史记录,提取工具调用信息。"""
160
+ print("\n============ 工具调用分析 ============")
161
+ logger.info("========== 开始分析聊天历史中的工具调用 ==========")
162
+
163
+ tool_calls = []
164
+ for i, message in enumerate(chat_history):
165
+ if message.get('role') == 'assistant' and 'tool_calls' in message:
166
+ for tool_call in message.get('tool_calls', []):
167
+ if tool_call.get('type') == 'function':
168
+ function = tool_call.get('function', {})
169
+ tool_info = {
170
+ 'call_id': tool_call.get('id'),
171
+ 'name': function.get('name'),
172
+ 'arguments': function.get('arguments'),
173
+ 'message_index': i,
174
+ }
175
+ tool_calls.append(tool_info)
176
+ print(f"工具调用: {function.get('name')} 参数: {function.get('arguments')}")
177
+ logger.info(f"工具调用: {function.get('name')} 参数: {function.get('arguments')}")
178
+
179
+ elif message.get('role') == 'tool' and 'tool_call_id' in message:
180
+ # 找到对应的工具调用
181
+ for tool_call in tool_calls:
182
+ if tool_call.get('call_id') == message.get('tool_call_id'):
183
+ result = message.get('content', '')
184
+ result_summary = result[:100] + "..." if len(result) > 100 else result
185
+ print(f"工具结果: {tool_call.get('name')} 返回: {result_summary}")
186
+ logger.info(f"工具结果: {tool_call.get('name')} 返回: {result_summary}")
187
+
188
+ print(f"总共发现 {len(tool_calls)} 个工具调用")
189
+ logger.info(f"总共发现 {len(tool_calls)} 个工具调用")
190
+ logger.info("========== 结束分析聊天历史中的工具调用 ==========")
191
+
192
+ # 将完整聊天历史保存到文件
193
+ with open('chat_history.json', 'w', encoding='utf-8') as f:
194
+ json.dump(chat_history, f, ensure_ascii=False, indent=2)
195
+
196
+ print("记录已保存到 chat_history.json")
197
+ print("============ 分析结束 ============\n")
198
+
199
+
200
+ if __name__ == "__main__":
201
+ main()