Nadav Eden commited on
Commit
d26e194
·
1 Parent(s): b7a2f31

Added prefix editting, and Hailo logo

Browse files
Files changed (4) hide show
  1. app.py +57 -21
  2. assets/hailo.png +0 -0
  3. assets/hailo_logo.gif +0 -0
  4. requirements.txt +1 -1
app.py CHANGED
@@ -1,11 +1,12 @@
1
  #!/usr/bin/env python3
2
 
3
  import gradio as gr
4
- from PIL import Image
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor, Qwen2VLForConditionalGeneration
6
  from utils import image_to_base64, rescale_bounding_boxes, draw_bounding_boxes, florence_draw_bboxes
7
  from qwen_vl_utils import process_vision_info
8
  import re
 
 
9
 
10
  llms = {
11
  "Qwen2-1.5B": {"model": "Qwen/Qwen2-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
@@ -13,7 +14,9 @@ llms = {
13
  "Qwen2-7B": {"model": "Qwen/Qwen2-7B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
14
  "Qwen2.5-1.5B": {"model": "Qwen/Qwen2.5-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
15
  "Qwen2.5-3B": {"model": "Qwen/Qwen2.5-3B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
16
- "DeepSeek-Coder": {"model": "DeepSeek/DeepSeek-Coder", "prefix": "You are a helpful assistant."},
 
 
17
  }
18
 
19
  vlms = {
@@ -26,14 +29,27 @@ vlms = {
26
 
27
  tasks = ["<OD>", "<OCR>", "<CAPTION>", "<OCR_WITH_REGION>"]
28
 
29
- def run_llm(text_input, model_id="Qwen2-1.5B"):
 
 
 
 
 
 
 
 
 
 
30
  global messages
31
  tokenizer = AutoTokenizer.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
32
  model = AutoModelForCausalLM.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
33
 
 
 
 
34
  if messages is None:
35
  messages = [
36
- {"role": "system", "content": llms[model_id]["prefix"]},
37
  {"role": "user", "content": text_input},
38
  ]
39
  else:
@@ -61,7 +77,7 @@ def run_llm(text_input, model_id="Qwen2-1.5B"):
61
 
62
  return response
63
 
64
- def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt = "<OD>"):
65
  if "Qwen" in model_id:
66
  model = Qwen2VLForConditionalGeneration.from_pretrained(vlms[model_id]["model"], torch_dtype="auto", device_map="auto")
67
  else:
@@ -69,12 +85,15 @@ def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt = "<OD>"):
69
  processor = AutoProcessor.from_pretrained(vlms[model_id]["model"], trust_remote_code=True)
70
 
71
  if "Qwen" in model_id:
 
 
 
72
  messages = [
73
  {
74
  "role": "user",
75
  "content": [
76
  {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
77
- {"type": "text", "text": vlms[model_id]["prefix"]},
78
  {"type": "text", "text": text_input},
79
  ],
80
  }
@@ -138,28 +157,42 @@ def reset_conversation():
138
 
139
  def update_task_dropdown(model):
140
  if "Florence" in model:
141
- return gr.Dropdown(visible=True)
142
- return gr.Dropdown(visible=False)
 
 
 
 
 
 
 
143
 
144
  with gr.Blocks() as demo:
145
  gr.Markdown(
146
- """
147
- # LLM & VLM Demo
 
 
 
 
148
  Use the different LLMs or VLMs to experience the different models.
 
 
149
  """)
150
  with gr.Tab(label="LLM"):
151
  with gr.Row():
152
  with gr.Column():
153
  model_selector = gr.Dropdown(choices=list(llms.keys()), label="Model", value="Qwen2-1.5B")
154
  text_input = gr.Textbox(label="User Prompt")
155
- submit_btn = gr.Button(value="Submit")
156
- reset_btn = gr.Button(value="Reset conversation")
 
157
  with gr.Column():
158
  model_output_text = gr.Textbox(label="Model Output Text")
159
-
160
 
161
  submit_btn.click(run_llm,
162
- [text_input, model_selector],
163
  [model_output_text])
164
 
165
  reset_btn.click(reset_conversation)
@@ -168,22 +201,25 @@ with gr.Blocks() as demo:
168
  # taken from https://huggingface.co/spaces/maxiw/Qwen2-VL-Detection/blob/main/app.py
169
  with gr.Row():
170
  with gr.Column():
171
- input_img = gr.Image(label="Input Image", type="pil")
172
- model_selector = gr.Dropdown(choices=list(vlms.keys()), label="Model", value="Florence-2-base")
173
  task_select = gr.Dropdown(choices=tasks, label="task", value= "<OD>")
174
  text_input = gr.Textbox(label="User Prompt")
175
- submit_btn = gr.Button(value="Submit")
 
176
  with gr.Column():
177
  model_output_text = gr.Textbox(label="Model Output Text")
178
  parsed_boxes = gr.Textbox(label="Parsed Boxes")
179
- annotated_image = gr.Image(label="Annotated Image")
180
 
181
- model_selector.change(update_task_dropdown, inputs=model_selector, outputs=task_select)
 
 
182
 
183
 
184
  submit_btn.click(run_vlm,
185
- [input_img, text_input, model_selector, task_select],
186
- [model_output_text, parsed_boxes, annotated_image])
187
 
188
 
189
 
 
1
  #!/usr/bin/env python3
2
 
3
  import gradio as gr
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor, Qwen2VLForConditionalGeneration
5
  from utils import image_to_base64, rescale_bounding_boxes, draw_bounding_boxes, florence_draw_bboxes
6
  from qwen_vl_utils import process_vision_info
7
  import re
8
+ import base64
9
+ import os
10
 
11
  llms = {
12
  "Qwen2-1.5B": {"model": "Qwen/Qwen2-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
 
14
  "Qwen2-7B": {"model": "Qwen/Qwen2-7B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
15
  "Qwen2.5-1.5B": {"model": "Qwen/Qwen2.5-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
16
  "Qwen2.5-3B": {"model": "Qwen/Qwen2.5-3B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
17
+ "DeepSeek-Coder-1.3B": {"model": "deepseek-ai/deepseek-coder-1.3b-instruct", "prefix": "You are a helpful assistant."},
18
+ "DeepSeek-r1-Qwen-1.5B": {"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "prefix": "You are a helpful assistant."},
19
+
20
  }
21
 
22
  vlms = {
 
29
 
30
  tasks = ["<OD>", "<OCR>", "<CAPTION>", "<OCR_WITH_REGION>"]
31
 
32
+ def get_image_base64(image_path):
33
+ with open(image_path, "rb") as image_file:
34
+ encoded_string = base64.b64encode(image_file.read()).decode()
35
+ return encoded_string
36
+
37
+ # At the top of your file, after imports
38
+ current_dir = os.path.dirname(os.path.abspath(__file__))
39
+ image_path = os.path.join(current_dir, "assets", "hailo_logo.gif")
40
+ image_base64 = get_image_base64(image_path)
41
+
42
+ def run_llm(text_input, model_id="Qwen2-1.5B", prefix=None):
43
  global messages
44
  tokenizer = AutoTokenizer.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
45
  model = AutoModelForCausalLM.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
46
 
47
+ # Use the provided prefix if available, otherwise fall back to the default
48
+ system_prefix = prefix if prefix is not None else llms[model_id]["prefix"]
49
+
50
  if messages is None:
51
  messages = [
52
+ {"role": "system", "content": system_prefix},
53
  {"role": "user", "content": text_input},
54
  ]
55
  else:
 
77
 
78
  return response
79
 
80
+ def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt="<OD>", custom_prefix=None):
81
  if "Qwen" in model_id:
82
  model = Qwen2VLForConditionalGeneration.from_pretrained(vlms[model_id]["model"], torch_dtype="auto", device_map="auto")
83
  else:
 
85
  processor = AutoProcessor.from_pretrained(vlms[model_id]["model"], trust_remote_code=True)
86
 
87
  if "Qwen" in model_id:
88
+ # Use custom prefix if provided, otherwise use default from vlms dictionary
89
+ prefix_to_use = custom_prefix if custom_prefix is not None else vlms[model_id]["prefix"]
90
+
91
  messages = [
92
  {
93
  "role": "user",
94
  "content": [
95
  {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
96
+ {"type": "text", "text": prefix_to_use},
97
  {"type": "text", "text": text_input},
98
  ],
99
  }
 
157
 
158
  def update_task_dropdown(model):
159
  if "Florence" in model:
160
+ return [gr.Dropdown(visible=True), gr.Textbox(value=vlms[model]["prefix"])]
161
+ elif model in vlms:
162
+ return [gr.Dropdown(visible=False), gr.Textbox(value=vlms[model]["prefix"])]
163
+ return [gr.Dropdown(visible=False), gr.Textbox(value="")]
164
+
165
+ def update_prefix_llm(model):
166
+ if model in llms:
167
+ return gr.Textbox(value=llms[model]["prefix"], visible=True)
168
+ return gr.Textbox(visible=True)
169
 
170
  with gr.Blocks() as demo:
171
  gr.Markdown(
172
+ f"""
173
+ <div style="display: flex; align-items: center; gap: 10px;">
174
+ <img src="data:image/gif;base64,{image_base64}" height="40px" style="margin-right: 10px;">
175
+ <h1 style="margin: 0;">LLM & VLM Demo</h1>
176
+ </div>
177
+
178
  Use the different LLMs or VLMs to experience the different models.
179
+
180
+ <u>Note</u>: first use of any model will take more time, for the downloading of the weights.
181
  """)
182
  with gr.Tab(label="LLM"):
183
  with gr.Row():
184
  with gr.Column():
185
  model_selector = gr.Dropdown(choices=list(llms.keys()), label="Model", value="Qwen2-1.5B")
186
  text_input = gr.Textbox(label="User Prompt")
187
+ prefix_input = gr.Textbox(label="Prefix", value=llms["Qwen2.5-1.5B"]["prefix"])
188
+ submit_btn = gr.Button(value="Submit", variant='primary')
189
+ reset_btn = gr.Button(value="Reset conversation", variant='stop')
190
  with gr.Column():
191
  model_output_text = gr.Textbox(label="Model Output Text")
192
+ model_selector.change(update_prefix_llm, inputs=model_selector, outputs=prefix_input)
193
 
194
  submit_btn.click(run_llm,
195
+ [text_input, model_selector, prefix_input],
196
  [model_output_text])
197
 
198
  reset_btn.click(reset_conversation)
 
201
  # taken from https://huggingface.co/spaces/maxiw/Qwen2-VL-Detection/blob/main/app.py
202
  with gr.Row():
203
  with gr.Column():
204
+ input_img = gr.Image(label="Input Image", type="pil", scale=2, height=400)
205
+ model_selector = gr.Dropdown(choices=list(vlms.keys()), label="Model", value="Qwen2-vl-2B")
206
  task_select = gr.Dropdown(choices=tasks, label="task", value= "<OD>")
207
  text_input = gr.Textbox(label="User Prompt")
208
+ prefix_input = gr.Textbox(label="Prefix")
209
+ submit_btn = gr.Button(value="Submit", variant='primary')
210
  with gr.Column():
211
  model_output_text = gr.Textbox(label="Model Output Text")
212
  parsed_boxes = gr.Textbox(label="Parsed Boxes")
213
+ annotated_image = gr.Image(label="Annotated Image", scale=2, height=400)
214
 
215
+ model_selector.change(update_task_dropdown,
216
+ inputs=model_selector,
217
+ outputs=[task_select, prefix_input])
218
 
219
 
220
  submit_btn.click(run_vlm,
221
+ [input_img, text_input, model_selector, task_select, prefix_input],
222
+ [model_output_text, parsed_boxes, annotated_image])
223
 
224
 
225
 
assets/hailo.png ADDED
assets/hailo_logo.gif ADDED
requirements.txt CHANGED
@@ -2,7 +2,7 @@ huggingface_hub==0.25.2
2
  torch
3
  torchvision
4
  transformers
5
- gradio
6
  Pillow
7
  qwen_vl_utils
8
  accelerate>=0.26.0
 
2
  torch
3
  torchvision
4
  transformers
5
+ gradio==5.23.3
6
  Pillow
7
  qwen_vl_utils
8
  accelerate>=0.26.0