import base64 import os import gradio as gr from openai import OpenAI client = OpenAI( api_key=os.getenv('HUNYUAN_API_KEY'), base_url="https://api.hunyuan.cloud.tencent.com/v1" ) def generate_caption(image_path, question): # 将图片转换为Base64 with open(image_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode('utf-8') # 构建消息结构 messages = [{ "role": "user", "content": [ {"type": "text", "text": question}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] }] # 调用混元视觉模型 response = client.chat.completions.create( model="hunyuan-vision", messages=messages, stream=True, extra_body={ "stream_moderation": True, "enable_enhancement": False } ) # 流式处理响应 full_response = "" for chunk in response: token = chunk.choices[0].delta.content if token: full_response += token yield full_response # 创建Gradio界面 title="Hunyuan-Vision图生文Demo" with gr.Blocks(title=title) as demo: gr.Markdown(f"# 🖼️ {title}") with gr.Row(): with gr.Column(): image_input = gr.Image(type="filepath", label="上传图片") question_input = gr.Textbox(label="输入问题", value="请描述图片内容") submit_btn = gr.Button("生成描述") output = gr.Textbox(label="描述结果", interactive=False) submit_btn.click( fn=generate_caption, inputs=[image_input, question_input], outputs=output ) if __name__ == "__main__": demo.queue(default_concurrency_limit=100) demo.launch(max_threads=100)