Spaces:

nadav-ed-26
/

vllm-playgroung

Running

App Files Files Community

Nadav Eden commited on 19 days ago

Commit

d26e194

1 Parent(s): b7a2f31

Added prefix editting, and Hailo logo

Browse files

Files changed (4) hide show

app.py +57 -21
assets/hailo.png +0 -0
assets/hailo_logo.gif +0 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 import gradio as gr
-from PIL import Image
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor, Qwen2VLForConditionalGeneration
 from utils import image_to_base64, rescale_bounding_boxes, draw_bounding_boxes, florence_draw_bboxes
 from qwen_vl_utils import process_vision_info
 import re
 llms = {
     "Qwen2-1.5B":     {"model": "Qwen/Qwen2-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
@@ -13,7 +14,9 @@ llms = {
     "Qwen2-7B":       {"model": "Qwen/Qwen2-7B-Instruct",   "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
     "Qwen2.5-1.5B":   {"model": "Qwen/Qwen2.5-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
     "Qwen2.5-3B":     {"model": "Qwen/Qwen2.5-3B-Instruct",   "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
-    "DeepSeek-Coder": {"model": "DeepSeek/DeepSeek-Coder", "prefix": "You are a helpful assistant."},
 }
 vlms = {
@@ -26,14 +29,27 @@ vlms = {
 tasks = ["<OD>", "<OCR>", "<CAPTION>", "<OCR_WITH_REGION>"]
-def run_llm(text_input, model_id="Qwen2-1.5B"):
     global messages
     tokenizer = AutoTokenizer.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
     if messages is None:
         messages = [
-            {"role": "system", "content": llms[model_id]["prefix"]},
             {"role": "user", "content": text_input},
         ]
     else:
@@ -61,7 +77,7 @@ def run_llm(text_input, model_id="Qwen2-1.5B"):
     return response
-def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt = "<OD>"):
     if "Qwen" in model_id:
         model = Qwen2VLForConditionalGeneration.from_pretrained(vlms[model_id]["model"], torch_dtype="auto", device_map="auto")
     else:
@@ -69,12 +85,15 @@ def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt = "<OD>"):
     processor = AutoProcessor.from_pretrained(vlms[model_id]["model"], trust_remote_code=True)
     if "Qwen" in model_id:
         messages = [
             {
                 "role": "user",
                 "content": [
                     {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
-                    {"type": "text", "text": vlms[model_id]["prefix"]},
                     {"type": "text", "text": text_input},
                 ],
             }
@@ -138,28 +157,42 @@ def reset_conversation():
 def update_task_dropdown(model):
     if "Florence" in model:
-        return gr.Dropdown(visible=True)
-    return gr.Dropdown(visible=False)
 with gr.Blocks() as demo:
     gr.Markdown(
-    """
-    # LLM & VLM Demo
     Use the different LLMs or VLMs to experience the different models.
     """)
     with gr.Tab(label="LLM"):
         with gr.Row():
             with gr.Column():
                 model_selector = gr.Dropdown(choices=list(llms.keys()), label="Model", value="Qwen2-1.5B")
                 text_input = gr.Textbox(label="User Prompt")
-                submit_btn = gr.Button(value="Submit")
-                reset_btn = gr.Button(value="Reset conversation")
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
         submit_btn.click(run_llm,
-                         [text_input, model_selector],
                          [model_output_text])
         reset_btn.click(reset_conversation)
@@ -168,22 +201,25 @@ with gr.Blocks() as demo:
     # taken from https://huggingface.co/spaces/maxiw/Qwen2-VL-Detection/blob/main/app.py
         with gr.Row():
             with gr.Column():
-               input_img = gr.Image(label="Input Image", type="pil")
-               model_selector = gr.Dropdown(choices=list(vlms.keys()), label="Model", value="Florence-2-base")
                task_select = gr.Dropdown(choices=tasks, label="task", value= "<OD>")
                text_input = gr.Textbox(label="User Prompt")
-               submit_btn = gr.Button(value="Submit")
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
                 parsed_boxes = gr.Textbox(label="Parsed Boxes")
-                annotated_image = gr.Image(label="Annotated Image")
-            model_selector.change(update_task_dropdown, inputs=model_selector, outputs=task_select)
         submit_btn.click(run_vlm,
-                         [input_img, text_input, model_selector, task_select],
-                         [model_output_text, parsed_boxes, annotated_image])

 #!/usr/bin/env python3
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor, Qwen2VLForConditionalGeneration
 from utils import image_to_base64, rescale_bounding_boxes, draw_bounding_boxes, florence_draw_bboxes
 from qwen_vl_utils import process_vision_info
 import re
+import base64
+import os
 llms = {
     "Qwen2-1.5B":     {"model": "Qwen/Qwen2-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
     "Qwen2-7B":       {"model": "Qwen/Qwen2-7B-Instruct",   "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
     "Qwen2.5-1.5B":   {"model": "Qwen/Qwen2.5-1.5B-Instruct", "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
     "Qwen2.5-3B":     {"model": "Qwen/Qwen2.5-3B-Instruct",   "prefix": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
+    "DeepSeek-Coder-1.3B": {"model": "deepseek-ai/deepseek-coder-1.3b-instruct", "prefix": "You are a helpful assistant."},
+    "DeepSeek-r1-Qwen-1.5B": {"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "prefix": "You are a helpful assistant."},
 }
 vlms = {
 tasks = ["<OD>", "<OCR>", "<CAPTION>", "<OCR_WITH_REGION>"]
+def get_image_base64(image_path):
+    with open(image_path, "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode()
+    return encoded_string
+# At the top of your file, after imports
+current_dir = os.path.dirname(os.path.abspath(__file__))
+image_path = os.path.join(current_dir, "assets", "hailo_logo.gif")
+image_base64 = get_image_base64(image_path)
+def run_llm(text_input, model_id="Qwen2-1.5B", prefix=None):
     global messages
     tokenizer = AutoTokenizer.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(llms[model_id]["model"], trust_remote_code=True)
+    # Use the provided prefix if available, otherwise fall back to the default
+    system_prefix = prefix if prefix is not None else llms[model_id]["prefix"]
     if messages is None:
         messages = [
+            {"role": "system", "content": system_prefix},
             {"role": "user", "content": text_input},
         ]
     else:
     return response
+def run_vlm(image, text_input, model_id="Qwen2-vl-2B", prompt="<OD>", custom_prefix=None):
     if "Qwen" in model_id:
         model = Qwen2VLForConditionalGeneration.from_pretrained(vlms[model_id]["model"], torch_dtype="auto", device_map="auto")
     else:
     processor = AutoProcessor.from_pretrained(vlms[model_id]["model"], trust_remote_code=True)
     if "Qwen" in model_id:
+        # Use custom prefix if provided, otherwise use default from vlms dictionary
+        prefix_to_use = custom_prefix if custom_prefix is not None else vlms[model_id]["prefix"]
         messages = [
             {
                 "role": "user",
                 "content": [
                     {"type": "image", "image": f"data:image;base64,{image_to_base64(image)}"},
+                    {"type": "text", "text": prefix_to_use},
                     {"type": "text", "text": text_input},
                 ],
             }
 def update_task_dropdown(model):
     if "Florence" in model:
+        return [gr.Dropdown(visible=True), gr.Textbox(value=vlms[model]["prefix"])]
+    elif model in vlms:
+        return [gr.Dropdown(visible=False), gr.Textbox(value=vlms[model]["prefix"])]
+    return [gr.Dropdown(visible=False), gr.Textbox(value="")]
+def update_prefix_llm(model):
+    if model in llms:
+        return gr.Textbox(value=llms[model]["prefix"], visible=True)
+    return gr.Textbox(visible=True)
 with gr.Blocks() as demo:
     gr.Markdown(
+    f"""
+    <div style="display: flex; align-items: center; gap: 10px;">
+        <img src="data:image/gif;base64,{image_base64}" height="40px" style="margin-right: 10px;">
+        <h1 style="margin: 0;">LLM & VLM Demo</h1>
+    </div>
     Use the different LLMs or VLMs to experience the different models.
+    <u>Note</u>: first use of any model will take more time, for the downloading of the weights.
     """)
     with gr.Tab(label="LLM"):
         with gr.Row():
             with gr.Column():
                 model_selector = gr.Dropdown(choices=list(llms.keys()), label="Model", value="Qwen2-1.5B")
                 text_input = gr.Textbox(label="User Prompt")
+                prefix_input = gr.Textbox(label="Prefix", value=llms["Qwen2.5-1.5B"]["prefix"])
+                submit_btn = gr.Button(value="Submit", variant='primary')
+                reset_btn = gr.Button(value="Reset conversation", variant='stop')
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
+            model_selector.change(update_prefix_llm, inputs=model_selector, outputs=prefix_input)
         submit_btn.click(run_llm,
+                         [text_input, model_selector, prefix_input],
                          [model_output_text])
         reset_btn.click(reset_conversation)
     # taken from https://huggingface.co/spaces/maxiw/Qwen2-VL-Detection/blob/main/app.py
         with gr.Row():
             with gr.Column():
+               input_img = gr.Image(label="Input Image", type="pil", scale=2, height=400)
+               model_selector = gr.Dropdown(choices=list(vlms.keys()), label="Model", value="Qwen2-vl-2B")
                task_select = gr.Dropdown(choices=tasks, label="task", value= "<OD>")
                text_input = gr.Textbox(label="User Prompt")
+               prefix_input = gr.Textbox(label="Prefix")
+               submit_btn = gr.Button(value="Submit", variant='primary')
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
                 parsed_boxes = gr.Textbox(label="Parsed Boxes")
+                annotated_image = gr.Image(label="Annotated Image", scale=2, height=400)
+            model_selector.change(update_task_dropdown,
+                                inputs=model_selector,
+                                outputs=[task_select, prefix_input])
         submit_btn.click(run_vlm,
+                        [input_img, text_input, model_selector, task_select, prefix_input],
+                        [model_output_text, parsed_boxes, annotated_image])

assets/hailo.png ADDED Viewed

assets/hailo_logo.gif ADDED Viewed

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ huggingface_hub==0.25.2
 torch
 torchvision
 transformers
-gradio
 Pillow
 qwen_vl_utils
 accelerate>=0.26.0

 torch
 torchvision
 transformers
+gradio==5.23.3
 Pillow
 qwen_vl_utils
 accelerate>=0.26.0