import gradio as gr from openai import OpenAI import base64 # 初始化腾讯混元客户端(替换为你的API Key) client = OpenAI( api_key="HUNYUAN_API_KEY", base_url="https://api.hunyuan.cloud.tencent.com/v1" ) def generate_caption(image_path, question): # 将图片转换为Base64 with open(image_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode('utf-8') # 构建消息结构 messages = [{ "role": "user", "content": [ {"type": "text", "text": question}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] }] # 调用混元视觉模型 response = client.chat.completions.create( model="hunyuan-vision", messages=messages, stream=True, extra_body={ "stream_moderation": True, "enable_enhancement": False } ) # 流式处理响应 full_response = "" for chunk in response: token = chunk.choices[0].delta.content if token: full_response += token yield full_response # 创建Gradio界面 with gr.Blocks(title="腾讯混元图生文Demo") as demo: with gr.Row(): with gr.Column(): image_input = gr.Image(type="filepath", label="上传图片") question_input = gr.Textbox(label="输入问题", value="请描述图片内容") submit_btn = gr.Button("生成描述") output = gr.Textbox(label="描述结果", interactive=False) submit_btn.click( fn=generate_caption, inputs=[image_input, question_input], outputs=output ) if __name__ == "__main__": demo.queue(default_concurrency_limit=100) demo.launch(max_threads=100)