Update app.py
Browse files
app.py
CHANGED
@@ -36,12 +36,17 @@ def describe_image(image, user_question="Describe this image in great detail."):
|
|
36 |
pil_images = [Image.open(BytesIO(image_byte_arr.read()))]
|
37 |
image_byte_arr.seek(0)
|
38 |
|
39 |
-
# Prepare inputs
|
40 |
prepare_inputs = vl_chat_processor(
|
41 |
conversations=conversation,
|
42 |
images=pil_images,
|
43 |
force_batchify=True
|
44 |
-
)
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
# Load model with CPU and float32 weights
|
47 |
vl_gpt = AutoModelForCausalLM.from_pretrained(
|
@@ -55,7 +60,7 @@ def describe_image(image, user_question="Describe this image in great detail."):
|
|
55 |
# Generate response with CPU
|
56 |
outputs = vl_gpt.language_model.generate(
|
57 |
inputs_embeds=inputs_embeds,
|
58 |
-
attention_mask=prepare_inputs
|
59 |
pad_token_id=tokenizer.eos_token_id,
|
60 |
bos_token_id=tokenizer.bos_token_id,
|
61 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
36 |
pil_images = [Image.open(BytesIO(image_byte_arr.read()))]
|
37 |
image_byte_arr.seek(0)
|
38 |
|
39 |
+
# Prepare inputs
|
40 |
prepare_inputs = vl_chat_processor(
|
41 |
conversations=conversation,
|
42 |
images=pil_images,
|
43 |
force_batchify=True
|
44 |
+
)
|
45 |
+
|
46 |
+
# Convert all tensors in prepare_inputs to float32 for CPU compatibility
|
47 |
+
for key in prepare_inputs:
|
48 |
+
if isinstance(prepare_inputs[key], torch.Tensor):
|
49 |
+
prepare_inputs[key] = prepare_inputs[key].to(dtype=torch.float32)
|
50 |
|
51 |
# Load model with CPU and float32 weights
|
52 |
vl_gpt = AutoModelForCausalLM.from_pretrained(
|
|
|
60 |
# Generate response with CPU
|
61 |
outputs = vl_gpt.language_model.generate(
|
62 |
inputs_embeds=inputs_embeds,
|
63 |
+
attention_mask=prepare_inputs["attention_mask"],
|
64 |
pad_token_id=tokenizer.eos_token_id,
|
65 |
bos_token_id=tokenizer.bos_token_id,
|
66 |
eos_token_id=tokenizer.eos_token_id,
|