Spaces:
Sleeping
Sleeping
hugohabicht01
commited on
Commit
·
7896f13
1
Parent(s):
dae4d1c
run inference using transformers and not unsloth
Browse files- app.py +16 -13
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,18 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
-
from
|
4 |
-
import
|
5 |
from PIL import Image
|
6 |
import numpy as np
|
7 |
import traceback
|
8 |
from typing import Any, Optional
|
9 |
|
10 |
-
# Import user-provided modules
|
11 |
import utils
|
12 |
-
from utils import
|
13 |
import blurnonymize
|
14 |
|
15 |
-
# --- Constants ---
|
16 |
MODEL_NAME = "cborg/qwen2.5VL-3b-privacydetector"
|
17 |
MAX_NEW_TOKENS = 2048
|
18 |
TEMPERATURE = 1.0
|
@@ -80,11 +78,12 @@ def build_messages(image, history: Optional[list[dict[str, Any]]] = None, prompt
|
|
80 |
# --- Model Loading ---
|
81 |
# Load model using unsloth for 4-bit quantization
|
82 |
try:
|
83 |
-
model
|
84 |
-
|
85 |
-
|
86 |
)
|
87 |
-
|
|
|
88 |
model.to("cuda").eval() # Ensure model is on GPU and in eval mode
|
89 |
print("Model loaded successfully.")
|
90 |
except Exception as e:
|
@@ -124,12 +123,16 @@ def run_model_inference(input_image_pil: Image.Image, prompt_text: str):
|
|
124 |
messages = build_messages(
|
125 |
input_image_pil,
|
126 |
prompt=prompt_text)
|
127 |
-
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
128 |
|
129 |
-
|
|
|
|
|
|
|
130 |
inputs = tokenizer(
|
131 |
-
|
132 |
-
|
|
|
|
|
133 |
return_tensors="pt",
|
134 |
).to("cuda")
|
135 |
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
4 |
+
from qwen_vl_utils import process_vision_info
|
5 |
from PIL import Image
|
6 |
import numpy as np
|
7 |
import traceback
|
8 |
from typing import Any, Optional
|
9 |
|
|
|
10 |
import utils
|
11 |
+
from utils import BoundingBox
|
12 |
import blurnonymize
|
13 |
|
|
|
14 |
MODEL_NAME = "cborg/qwen2.5VL-3b-privacydetector"
|
15 |
MAX_NEW_TOKENS = 2048
|
16 |
TEMPERATURE = 1.0
|
|
|
78 |
# --- Model Loading ---
|
79 |
# Load model using unsloth for 4-bit quantization
|
80 |
try:
|
81 |
+
# default: Load the model on the available device(s)
|
82 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
83 |
+
MODEL_NAME, torch_dtype="auto", device_map="auto"
|
84 |
)
|
85 |
+
tokenizer = AutoProcessor.from_pretrained(MODEL_NAME)
|
86 |
+
|
87 |
model.to("cuda").eval() # Ensure model is on GPU and in eval mode
|
88 |
print("Model loaded successfully.")
|
89 |
except Exception as e:
|
|
|
123 |
messages = build_messages(
|
124 |
input_image_pil,
|
125 |
prompt=prompt_text)
|
|
|
126 |
|
127 |
+
input_text = tokenizer.apply_chat_template(
|
128 |
+
messages, tokenize=False, add_generation_prompt=True
|
129 |
+
)
|
130 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
131 |
inputs = tokenizer(
|
132 |
+
text=[input_text],
|
133 |
+
images=image_inputs,
|
134 |
+
videos=video_inputs,
|
135 |
+
padding=True,
|
136 |
return_tensors="pt",
|
137 |
).to("cuda")
|
138 |
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
gradio
|
2 |
-
unsloth
|
3 |
transformers
|
4 |
torch
|
5 |
pydantic
|
@@ -10,3 +9,4 @@ opencv-python
|
|
10 |
spaces
|
11 |
matplotlib
|
12 |
sam2
|
|
|
|
1 |
gradio
|
|
|
2 |
transformers
|
3 |
torch
|
4 |
pydantic
|
|
|
9 |
spaces
|
10 |
matplotlib
|
11 |
sam2
|
12 |
+
qwen-vl-utils[decord]==0.0.8
|