import torch from transformers import AutoTokenizer, AutoModelForVision2Seq import gradio as gr from PIL import Image # 模型名称 model_id = "deepseek-ai/deepseek-vl-1.3b-chat" # 加载 tokenizer 和 model tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16, trust_remote_code=True).to("cuda") model.eval() # 图文聊天函数 def chat_with_image(image: Image.Image, user_input: str): # 构造 prompt messages = [ {"role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": user_input} ]} ] # 使用 generate_response 方法(根据 DeepSeek 的源码) with torch.no_grad(): output = model.chat(tokenizer, messages=messages, image=image) return output # Gradio 接口 iface = gr.Interface( fn=chat_with_image, inputs=[ gr.Image(type="pil", label="上传图片"), gr.Textbox(label="请输入你的问题") ], outputs=gr.Textbox(label="模型回答"), title="DeepSeek-VL-1.3B Chat Demo", description="上传图片并输入问题,体验多模态聊天模型。" ) if __name__ == "__main__": iface.launch()