import gradio as gr
from openai import OpenAI
import base64

# 初始化腾讯混元客户端（替换为你的API Key）
client = OpenAI(
    api_key="HUNYUAN_API_KEY",
    base_url="https://api.hunyuan.cloud.tencent.com/v1"
)

def generate_caption(image_path, question):
    # 将图片转换为Base64
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
    
    # 构建消息结构
    messages = [{
        "role": "user",
        "content": [
            {"type": "text", "text": question},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
            }
        ]
    }]

    # 调用混元视觉模型
    response = client.chat.completions.create(
        model="hunyuan-vision",
        messages=messages,
        stream=True,
        extra_body={
            "stream_moderation": True,
            "enable_enhancement": False
        }
    )

    # 流式处理响应
    full_response = ""
    for chunk in response:
        token = chunk.choices[0].delta.content
        if token:
            full_response += token
            yield full_response

# 创建Gradio界面
with gr.Blocks(title="腾讯混元图生文Demo") as demo:
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="上传图片")
            question_input = gr.Textbox(label="输入问题", value="请描述图片内容")
            submit_btn = gr.Button("生成描述")
        output = gr.Textbox(label="描述结果", interactive=False)

    submit_btn.click(
        fn=generate_caption,
        inputs=[image_input, question_input],
        outputs=output
    )

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=100)
    demo.launch(max_threads=100)