按照主页示例输出失败

#78

by YoloBird - opened Apr 5

Apr 5

代码参考主页的示例

from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

def main():
    # 加载模型和 processor
    model = Qwen2VLForConditionalGeneration.from_pretrained(
        "/root/autodl-tmp/Qwen/Qwen2-VL-2B", torch_dtype="auto", device_map="auto"
    )
    processor = AutoProcessor.from_pretrained("/root/autodl-tmp/Qwen/Qwen2-VL-2B")
    
    # 构造一个聊天消息，包含图像和文本
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
                },
                {"type": "text", "text": "Describe this image."},
            ],
        }
    ]
    
    # 通过 processor 的模板构造最终的文本 prompt
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    
    # 利用 process_vision_info 处理消息中的图像信息
    image_inputs, video_inputs = process_vision_info(messages)
    
    # 调用 processor 生成模型输入，注意返回张量形式
    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    )
    
    # 将输入数据转到 GPU（如果可用）
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = inputs.to(device)
    
    # 输出各个输入张量的形状
    print("input_ids shape:", inputs.input_ids.shape)
    print("attention_mask shape:", inputs.attention_mask.shape)
    print("pixel_values shape:", inputs.pixel_values.shape)
    
    # 可选：执行一次生成，观察生成结果
    generated_ids = model.generate(**inputs, max_new_tokens=128)
    generated_ids_trimmed = [
        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )
    print("Generated text:", output_text)

if __name__ == "__main__":
    main()

YoloBird

Apr 5

下面是报错信息，另外为什么输出的图像是二维的，没有通道数

(qwen_attack) root@autodl-container-28d246918f-350fbcec:~/HADES/Llava_test/qwen_test# python test.py 
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.08it/s]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
input_ids shape: torch.Size([1, 0])
attention_mask shape: torch.Size([1, 0])
pixel_values shape: torch.Size([14308, 1176])
Traceback (most recent call last):
  File "/root/HADES/Llava_test/qwen_test/test.py", line 63, in <module>
    main()
  File "/root/HADES/Llava_test/qwen_test/test.py", line 53, in main
    generated_ids = model.generate(**inputs, max_new_tokens=128)
  File "/root/miniconda3/envs/qwen_attack/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/root/miniconda3/envs/qwen_attack/lib/python3.10/site-packages/transformers/generation/utils.py", line 2326, in generate
    result = self._sample(
  File "/root/miniconda3/envs/qwen_attack/lib/python3.10/site-packages/transformers/generation/utils.py", line 3279, in _sample
    model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
  File "/root/miniconda3/envs/qwen_attack/lib/python3.10/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1792, in prepare_inputs_for_generation
    model_inputs = super().prepare_inputs_for_generation(
  File "/root/miniconda3/envs/qwen_attack/lib/python3.10/site-packages/transformers/generation/utils.py", line 419, in prepare_inputs_for_generation
    or cache_position[-1] >= input_ids.shape[1]  # Exception 3
IndexError: index -1 is out of bounds for dimension 0 with size 0

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

Your need to confirm your account before you can post a new comment.

· Sign up or log in to comment