TEST-02 / processing_qwen_vl.py
taybeyond's picture
Upload 3 files
c132fb4 verified
raw
history blame contribute delete
1.07 kB
from transformers import CLIPImageProcessor, AutoTokenizer
class QWenVLProcessor:
def __init__(self, tokenizer, image_processor):
self.tokenizer = tokenizer
self.image_processor = image_processor
@classmethod
def from_pretrained(cls, model_id, **kwargs):
tokenizer = AutoTokenizer.from_pretrained(model_id, **kwargs)
image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
return cls(tokenizer=tokenizer, image_processor=image_processor)
def __call__(self, text=None, images=None, return_tensors=None):
if images is not None:
image_inputs = self.image_processor(images, return_tensors=return_tensors)
else:
image_inputs = {}
if text is not None:
text_inputs = self.tokenizer(text, return_tensors=return_tensors, padding=True)
else:
text_inputs = {}
return {**text_inputs, **image_inputs}
def batch_decode(self, *args, **kwargs):
return self.tokenizer.batch_decode(*args, **kwargs)