import base64 from openai import Client from openai.types.chat.chat_completion import ChatCompletion from gradio_client.utils import is_http_url_like import magic from pydantic import BaseModel from ..learner.learner import DefaultTool from typing import Any from copy import deepcopy import json def get_client(api_key: str | None = None, **kwargs): return Client( api_key=api_key, **kwargs, ) def encode_image(image_path:str): with open(image_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode("utf-8") mime = magic.Magic(mime=True) mime_type = mime.from_file(image_path) return f"data:{mime_type};base64,{base64_image}" def image_to_one_of_content( image_path:str, detail:str="auto", ): url = ( image_path if is_http_url_like(image_path) else encode_image(image_path) ) return { "type":"image_url", "image_url":{ "url":url, "detail":detail, } } def audio_to_content( data:str, format:str, ): return { "type":"input_audio", "input_audio":{ "data": data, "format": format, } } def chat_completions( messages: list, model:str, *, client : Client | None = None, **kwargs, )->ChatCompletion: client = client or get_client() response = client.chat.completions.create( model=model, messages=messages, **kwargs, ) return response def tts( input, *, client : Client | None = None, voice="fable", model="tts-1" ): client = client or get_client() response = client.audio.speech.create( model=model, voice=voice, input=input, ) return response