File size: 3,720 Bytes
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
049ef04
 
 
 
 
 
 
 
 
 
 
 
3afb4b6
 
 
 
 
 
049ef04
 
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import copy
from PCAgent.api import resize_encode_image


def init_subtask_chat():
    operation_history = []
    system_prompt = "You are a helpful AI assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_action_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant. You need to help me operate the PC to complete the user\'s instruction."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_reflect_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_memory_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def add_response_old(role, prompt, chat_history, image=None):
    new_chat_history = copy.deepcopy(chat_history)
    if image:
        base64_image = resize_encode_image(image)
        # content = [
        #     {
        #         "type": "text", 
        #         "text": prompt
        #     },
        #     {
        #         "type": "image_url", 
        #         "image_url": {
        #             "url": f"data:image/jpeg;base64,{base64_image}"
        #         }
        #     },
        # ]
        content = [
            {
                "type": "text", 
                "text": prompt
            },
            {
                "type": "image", 
                "image": f"file://{image}"
            },
        ]
    else:
        content = [
            {
            "type": "text", 
            "text": prompt
            },
        ]
    new_chat_history.append([role, content])
    return new_chat_history


def add_response(role, prompt, chat_history, image=[], use_qwen=False):
    new_chat_history = copy.deepcopy(chat_history)
    content = [
        {
        "type": "text", 
        "text": prompt
        },
    ]
    for i in range(len(image)):
        if not use_qwen:
            base64_image = resize_encode_image(image[i])
            content.append(
                {
                    "type": "image_url", 
                    "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                    }
                }
            )
        else:
            content.append(
                {
                    "type": "image", 
                    "image": image[i]
                }
            )
    new_chat_history.append([role, content])
    return new_chat_history


def add_response_two_image(role, prompt, chat_history, image):
    new_chat_history = copy.deepcopy(chat_history)

    base64_image1 = resize_encode_image(image[0])
    base64_image2 = resize_encode_image(image[1])
    content = [
        {
            "type": "text", 
            "text": prompt
        },
        {
            "type": "image_url", 
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image1}"
            }
        },
        {
            "type": "image_url", 
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image2}"
            }
        },
    ]

    new_chat_history.append([role, content])
    return new_chat_history


def print_status(chat_history):
    print("*"*100)
    for chat in chat_history:
        print("role:", chat[0])
        print(chat[1][0]["text"] + "<image>"*(len(chat[1])-1) + "\n")
    print("*"*100)