Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import torch
|
|
8 |
from PIL import Image
|
9 |
import uuid
|
10 |
import io
|
|
|
11 |
|
12 |
# Fine-tuned for OCR-based tasks from Qwen's [ Qwen/Qwen2-VL-2B-Instruct ]
|
13 |
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
@@ -93,10 +94,19 @@ def model_inference(input_dict, history):
|
|
93 |
|
94 |
# Apply chat template and process inputs
|
95 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
inputs = processor(
|
97 |
text=[prompt],
|
98 |
-
images=
|
99 |
-
videos=
|
100 |
return_tensors="pt",
|
101 |
padding=True,
|
102 |
).to("cuda")
|
@@ -121,7 +131,6 @@ def model_inference(input_dict, history):
|
|
121 |
|
122 |
# Example inputs
|
123 |
examples = [
|
124 |
-
[{"text": "Describe the video.", "files": ["examples/demo.mp4"]}],
|
125 |
[{"text": "Extract JSON from the image", "files": ["example_images/document.jpg"]}],
|
126 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
127 |
[{"text": "Describe the photo", "files": ["examples/3.png"]}],
|
@@ -132,6 +141,7 @@ examples = [
|
|
132 |
[{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
|
133 |
[{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
|
134 |
[{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
|
|
|
135 |
]
|
136 |
|
137 |
demo = gr.ChatInterface(
|
|
|
8 |
from PIL import Image
|
9 |
import uuid
|
10 |
import io
|
11 |
+
import os
|
12 |
|
13 |
# Fine-tuned for OCR-based tasks from Qwen's [ Qwen/Qwen2-VL-2B-Instruct ]
|
14 |
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
|
|
94 |
|
95 |
# Apply chat template and process inputs
|
96 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
97 |
+
|
98 |
+
# Prepare inputs for the processor
|
99 |
+
image_inputs = [load_image(path) for path, media_type in zip(media_paths, media_types) if media_type == "image"]
|
100 |
+
video_inputs = [path for path, media_type in zip(media_paths, media_types) if media_type == "video"]
|
101 |
+
|
102 |
+
# Ensure video_inputs is not empty
|
103 |
+
if not video_inputs:
|
104 |
+
video_inputs = None
|
105 |
+
|
106 |
inputs = processor(
|
107 |
text=[prompt],
|
108 |
+
images=image_inputs if image_inputs else None,
|
109 |
+
videos=video_inputs if video_inputs else None,
|
110 |
return_tensors="pt",
|
111 |
padding=True,
|
112 |
).to("cuda")
|
|
|
131 |
|
132 |
# Example inputs
|
133 |
examples = [
|
|
|
134 |
[{"text": "Extract JSON from the image", "files": ["example_images/document.jpg"]}],
|
135 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
136 |
[{"text": "Describe the photo", "files": ["examples/3.png"]}],
|
|
|
141 |
[{"text": "Can you describe this image?", "files": ["example_images/newyork.jpg"]}],
|
142 |
[{"text": "Can you describe this image?", "files": ["example_images/dogs.jpg"]}],
|
143 |
[{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
|
144 |
+
[{"text": "Describe the video.", "files": ["example_videos/sample.mp4"]}],
|
145 |
]
|
146 |
|
147 |
demo = gr.ChatInterface(
|