hugohabicht01 commited on
Commit
7896f13
·
1 Parent(s): dae4d1c

run inference using transformers and not unsloth

Browse files
Files changed (2) hide show
  1. app.py +16 -13
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,18 +1,16 @@
1
  import gradio as gr
2
  import spaces
3
- from unsloth import FastVisionModel
4
- import torch
5
  from PIL import Image
6
  import numpy as np
7
  import traceback
8
  from typing import Any, Optional
9
 
10
- # Import user-provided modules
11
  import utils
12
- from utils import Finding, BoundingBox # Explicitly import needed classes
13
  import blurnonymize
14
 
15
- # --- Constants ---
16
  MODEL_NAME = "cborg/qwen2.5VL-3b-privacydetector"
17
  MAX_NEW_TOKENS = 2048
18
  TEMPERATURE = 1.0
@@ -80,11 +78,12 @@ def build_messages(image, history: Optional[list[dict[str, Any]]] = None, prompt
80
  # --- Model Loading ---
81
  # Load model using unsloth for 4-bit quantization
82
  try:
83
- model, tokenizer = FastVisionModel.from_pretrained(
84
- model_name=MODEL_NAME,
85
- load_in_4bit=True,
86
  )
87
- FastVisionModel.for_inference(model)
 
88
  model.to("cuda").eval() # Ensure model is on GPU and in eval mode
89
  print("Model loaded successfully.")
90
  except Exception as e:
@@ -124,12 +123,16 @@ def run_model_inference(input_image_pil: Image.Image, prompt_text: str):
124
  messages = build_messages(
125
  input_image_pil,
126
  prompt=prompt_text)
127
- input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
128
 
129
- # Prepare inputs for the model
 
 
 
130
  inputs = tokenizer(
131
- input_image_pil,
132
- input_text,
 
 
133
  return_tensors="pt",
134
  ).to("cuda")
135
 
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
4
+ from qwen_vl_utils import process_vision_info
5
  from PIL import Image
6
  import numpy as np
7
  import traceback
8
  from typing import Any, Optional
9
 
 
10
  import utils
11
+ from utils import BoundingBox
12
  import blurnonymize
13
 
 
14
  MODEL_NAME = "cborg/qwen2.5VL-3b-privacydetector"
15
  MAX_NEW_TOKENS = 2048
16
  TEMPERATURE = 1.0
 
78
  # --- Model Loading ---
79
  # Load model using unsloth for 4-bit quantization
80
  try:
81
+ # default: Load the model on the available device(s)
82
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
83
+ MODEL_NAME, torch_dtype="auto", device_map="auto"
84
  )
85
+ tokenizer = AutoProcessor.from_pretrained(MODEL_NAME)
86
+
87
  model.to("cuda").eval() # Ensure model is on GPU and in eval mode
88
  print("Model loaded successfully.")
89
  except Exception as e:
 
123
  messages = build_messages(
124
  input_image_pil,
125
  prompt=prompt_text)
 
126
 
127
+ input_text = tokenizer.apply_chat_template(
128
+ messages, tokenize=False, add_generation_prompt=True
129
+ )
130
+ image_inputs, video_inputs = process_vision_info(messages)
131
  inputs = tokenizer(
132
+ text=[input_text],
133
+ images=image_inputs,
134
+ videos=video_inputs,
135
+ padding=True,
136
  return_tensors="pt",
137
  ).to("cuda")
138
 
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio
2
- unsloth
3
  transformers
4
  torch
5
  pydantic
@@ -10,3 +9,4 @@ opencv-python
10
  spaces
11
  matplotlib
12
  sam2
 
 
1
  gradio
 
2
  transformers
3
  torch
4
  pydantic
 
9
  spaces
10
  matplotlib
11
  sam2
12
+ qwen-vl-utils[decord]==0.0.8