import os import subprocess def install(): subprocess.check_call([os.sys.executable, "-m", "pip", "install", "-r", "requirements.txt"]) install() import gradio as gr from utils.caller.llm_client import ( chat_completions, image_to_one_of_content, tts, ) from utils.learner.learner import ( get_default_system_prompt, DefaultTool, ) from tempfile import NamedTemporaryFile import re import json def extract_json_from_code_block(text): pattern = r'```json\s*([\s\S]*?)\s*```' matches = re.findall(pattern, text) json_objects = [] for match in matches: try: json_obj = json.loads(match) json_objects.append(json_obj) except json.JSONDecodeError: continue # Skip invalid JSON return json_objects def gr_msg_to_openai_msg(gr_message, role="user"): content = [] content.append({ "type":"text", "text": gr_message.get("text") }) for image_path in gr_message.get("files"): try: content.append(image_to_one_of_content(image_path)) except Exception as e: gr.Warning(f"fail to load {image_path}, error msg {e}", duration=5) return { "role":role, "content": content, } async def chat_fn(gr_message, history, model, state_openai_messages:list, state_audios:list): gr_response = { "text":"", "files":[], } openai_message = gr_msg_to_openai_msg(gr_message) if not state_openai_messages: state_openai_messages = [ { "role":"system", "content":get_default_system_prompt(), } ] state_openai_messages.append(openai_message) gr.Info("start text generation") response = chat_completions( messages=state_openai_messages, model=model, ) text = response.choices[0].message.content state_openai_messages.append(response.choices[0].message) gr_response["text"] = text gr.Info("finish text generation") yield gr_response, state_openai_messages, state_audios J_list = extract_json_from_code_block(text) if len(J_list) > 0: J = J_list[0] M = DefaultTool(**J) for item in M.records: try: data = item.foreign.data gr.Info(f"Start STT ({data})") tmp_file_name = NamedTemporaryFile( suffix=".mp3", delete=True, ).name tts(input=data).stream_to_file(tmp_file_name) gr_response["files"].append(tmp_file_name) gr.Info(f"END STT ({data})") except Exception as e: gr.Warning(f"Fail to generate audio. {e}") yield gr_response, state_openai_messages, state_audios with gr.Blocks() as demo: gr.Markdown("# Lang Thrower") state_openai_messages = gr.State([]) # The state for openai usage state_audios = gr.State([ # {"text":"...", "path":"..."} ]) model = gr.Text("gpt-4o", label="model_name",) textbox = gr.MultimodalTextbox( file_types=["image"], file_count="multiple", placeholder="Please give text and image.", ) chat_interface = gr.ChatInterface( fn=chat_fn, textbox=textbox, additional_inputs=[model, state_openai_messages, state_audios], additional_outputs=[state_openai_messages, state_audios], ) if __name__ == "__main__": demo.launch(debug=True)