File size: 3,562 Bytes
9c3dbfb
 
 
986e43a
9c3dbfb
 
 
 
3005b7a
b719566
 
 
 
 
 
 
 
 
 
 
 
3005b7a
b719566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db080f6
3005b7a
 
b719566
db080f6
 
 
 
b719566
 
db080f6
 
 
 
 
 
 
 
 
b719566
 
db080f6
3005b7a
 
 
b719566
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import subprocess

def install():
    subprocess.check_call([os.sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

install()

import gradio as gr
from utils.caller.llm_client import (
    chat_completions,
    image_to_one_of_content,
    tts,
)
from utils.learner.learner import (
    get_default_system_prompt,
    DefaultTool,
)
from tempfile import NamedTemporaryFile
import re
import json

def extract_json_from_code_block(text):
    pattern = r'```json\s*([\s\S]*?)\s*```'
    matches = re.findall(pattern, text)

    json_objects = []
    for match in matches:
        try:
            json_obj = json.loads(match)
            json_objects.append(json_obj)
        except json.JSONDecodeError:
            continue  # Skip invalid JSON

    return json_objects

def gr_msg_to_openai_msg(gr_message, role="user"):
    content = []
    content.append({
        "type":"text",
        "text": gr_message.get("text")
    })
    for image_path in gr_message.get("files"):
        try:
            content.append(image_to_one_of_content(image_path))
        except Exception as e:
            gr.Warning(f"fail to load {image_path}, error msg {e}", duration=5)

    return {
        "role":role,
        "content": content,
    }
            

async def chat_fn(gr_message, history, model, state_openai_messages:list, state_audios:list):
    gr_response = {
        "text":"",
        "files":[],
    }
    openai_message = gr_msg_to_openai_msg(gr_message)
    
    if not state_openai_messages:
        state_openai_messages = [
            {
                "role":"system",
                "content":get_default_system_prompt(),
            }
        ]

    state_openai_messages.append(openai_message)


    gr.Info("start text generation")
    response = chat_completions(
        messages=state_openai_messages,
        model=model,
    )
    
    text = response.choices[0].message.content
    state_openai_messages.append(response.choices[0].message)
    gr_response["text"] = text
    gr.Info("finish text generation")
    
    yield gr_response, state_openai_messages, state_audios

    J_list = extract_json_from_code_block(text)
    if len(J_list) > 0:
        J = J_list[0]
        M = DefaultTool(**J)
        for item in M.records:
            try:
                data = item.foreign.data
                gr.Info(f"Start STT ({data})")
                tmp_file_name = NamedTemporaryFile(
                    suffix=".mp3",
                    delete=True,
                ).name
                tts(input=data).stream_to_file(tmp_file_name)
                gr_response["files"].append(tmp_file_name)
                gr.Info(f"END STT ({data})")
            except Exception as e:
                gr.Warning(f"Fail to generate audio. {e}")
            
            yield gr_response, state_openai_messages, state_audios

with gr.Blocks() as demo:
    gr.Markdown("# Lang Thrower")
    state_openai_messages = gr.State([]) # The state for openai usage
    state_audios = gr.State([
        # {"text":"...", "path":"..."}
    ])

    model = gr.Text("gpt-4o", label="model_name",)

    textbox = gr.MultimodalTextbox(
        file_types=["image"],
        file_count="multiple",
        placeholder="Please give text and image.",
    )

    chat_interface = gr.ChatInterface(
        fn=chat_fn,
        textbox=textbox,
        additional_inputs=[model, state_openai_messages, state_audios],
        additional_outputs=[state_openai_messages, state_audios],
    )


if __name__ == "__main__":
    demo.launch(debug=True)