import gradio as gr import torch from PIL import Image from transformers import AutoProcessor, AutoModelForVision2Seq # 模型与处理器 model_id = "Qwen/Qwen1.5-VL-Chat" processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForVision2Seq.from_pretrained( model_id, device_map="auto", # 自动分配 GPU 或 CPU trust_remote_code=True, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, **{"disable_exllama": True} # 防止加载失败 ).eval() # 推理函数 def chat(image, question): if image is None or question.strip() == "": return "請上傳圖片並輸入問題。" inputs = processor(text=question, images=image, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=512) answer = processor.batch_decode(outputs, skip_special_tokens=True)[0] return answer.strip() # 界面設計 with gr.Blocks(title="Qwen1.5-VL 圖文問答 Demo") as demo: gr.Markdown("## 🧠 Qwen1.5-VL 圖文問答 Demo") with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="📷 請上傳圖片") question_input = gr.Textbox(label="請輸入問題", placeholder="例如:這是什麼地方?") submit_btn = gr.Button("Submit", variant="primary") clear_btn = gr.Button("Clear") with gr.Column(): answer_output = gr.Textbox(label="💬 答案", lines=8) submit_btn.click(fn=chat, inputs=[image_input, question_input], outputs=answer_output) clear_btn.click(lambda: ("", "", ""), outputs=[image_input, question_input, answer_output]) # 啟動服務 demo.launch(share=True) # 如果你不想公開訪問可以改為 share=False