Spaces:
Sleeping
Sleeping
from pathlib import Path | |
from typing import Tuple | |
import auto_gptq | |
import torch | |
from auto_gptq.modeling import BaseGPTQForCausalLM | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
class QwenVLChat: | |
def __init__(self, device: str = "cuda:0", quantized: bool = False) -> None: | |
if quantized: | |
self.model = AutoModelForCausalLM.from_pretrained( | |
"Qwen/Qwen-VL-Chat-Int4", device_map=device, trust_remote_code=True | |
).eval() | |
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True) | |
else: | |
self.model = AutoModelForCausalLM.from_pretrained( | |
"Qwen/Qwen-VL-Chat", device_map=device, trust_remote_code=True, fp16=True | |
).eval() | |
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True) | |
def __call__(self, prompt: str, image: str) -> Tuple[str, str]: | |
query = self.tokenizer.from_list_format([{"image": image}, {"text": prompt}]) | |
response, history = self.model.chat(self.tokenizer, query=query, history=[]) | |
return response, history | |
class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM): | |
layers_block_name = "model.layers" | |
outside_layer_modules = [ | |
"vit", | |
"vision_proj", | |
"model.tok_embeddings", | |
"model.norm", | |
"output", | |
] | |
inside_layer_modules = [ | |
["attention.wqkv.linear"], | |
["attention.wo.linear"], | |
["feed_forward.w1.linear", "feed_forward.w3.linear"], | |
["feed_forward.w2.linear"], | |
] | |
class InternLMXComposer2: | |
def __init__(self, device: str = "cuda:0", quantized: bool = True): | |
if quantized: | |
auto_gptq.modeling._base.SUPPORTED_MODELS = ["internlm"] | |
self.model = InternLMXComposer2QForCausalLM.from_quantized( | |
"internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True, device=device | |
).eval() | |
self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True) | |
else: | |
# Setting fp16=True does not work. See https://huggingface.co/internlm/internlm-xcomposer2-vl-7b/discussions/1. | |
self.model = ( | |
AutoModelForCausalLM.from_pretrained( | |
"internlm/internlm-xcomposer2-vl-7b", device_map=device, trust_remote_code=True | |
) | |
.eval() | |
.to(torch.float16) | |
) | |
self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b", trust_remote_code=True) | |
def __call__(self, prompt: str, image: str): | |
if not prompt.startswith("<ImageHere>"): | |
prompt = "<ImageHere>" + prompt | |
with torch.cuda.amp.autocast(), torch.no_grad(): | |
response, history = self.model.chat(self.tokenizer, query=prompt, image=image, history=[], do_sample=False) | |
return response, history | |
if __name__ == "__main__": | |
image_folder = "demo/" | |
wildcard_list = ["*.jpg", "*.png"] | |
image_list = [] | |
for wildcard in wildcard_list: | |
image_list.extend([str(image_path) for image_path in Path(image_folder).glob(wildcard)]) | |
qwen_vl_chat = QwenVLChat(device="cuda:0", quantized=True) | |
qwen_vl_prompt = "Please describe this image in detail." | |
for image in image_list: | |
response, _ = qwen_vl_chat(qwen_vl_prompt, image) | |
print(image, response) | |
internlm2_vl = InternLMXComposer2(device="cuda:0", quantized=False) | |
internlm2_vl_prompt = "Please describe this image in detail." | |
for image in image_list: | |
response, _ = internlm2_vl(internlm2_vl_prompt, image) | |
print(image, response) | |