Spaces:

cborg
/

imgprivllm

Running on Zero

App Files Files Community

hugohabicht01 commited on Apr 22

Commit

b2fb4ce

1 Parent(s): 3de588c

make it possible to edit system prompt

Browse files

Files changed (2) hide show

app.py +16 -10
utils.py +1 -1

app.py CHANGED Viewed

@@ -20,9 +20,7 @@ MAX_NEW_TOKENS = 2048
 TEMPERATURE = 1.0
 MIN_P = 0.1
 SYSTEM_PROMPT = """You are a helpful assistant for privacy analysis of images. Please always answer in English. Please obey the users instructions and follow the provided format."""
-DEFAULT_PROMPT = """
-You are an expert at pixel perfect image analysis and in privacy. Your task is to find all private data in the image and report its position, as well as explanations as to why it is private data. Private data is all data that relates to a unique person and can be used to identify them.
 First write down your thoughts within a <think> block.
 Please go through all objects in the image and consider whether they are private data or not.
@@ -52,9 +50,11 @@ Here is the image to analyse, start your analysis directly after:
 """
-def build_messages(image, history: Optional[list[dict[str, Any]]] = None, prompt: Optional[str] = None):
     if not prompt:
         prompt = DEFAULT_PROMPT
     if history:
         return [
@@ -68,7 +68,7 @@ def build_messages(image, history: Optional[list[dict[str, Any]]] = None, prompt
             "content": [
                 {
                     "type": "text",
-                    "text": SYSTEM_PROMPT,
                 }
             ],
         },
@@ -118,7 +118,7 @@ def anonymise_image(input_image_np: np.ndarray, boxes: list[BoundingBox]):
         raise gr.Error(f"Failed to initialize Blurnonymizer. Check logs. Error: {e}")
-def run_model_inference(input_image_pil: Image.Image, prompt_text: str):
     """
     Runs model inference on the input image and prompt.
     """
@@ -127,7 +127,9 @@ def run_model_inference(input_image_pil: Image.Image, prompt_text: str):
     print("Running model inference...")
     messages = build_messages(
         input_image_pil,
-        prompt=prompt_text)
     input_text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
@@ -176,7 +178,7 @@ def run_model_inference(input_image_pil: Image.Image, prompt_text: str):
 @spaces.GPU(duration=90)  # Request GPU for this function, allow up to 120 seconds
-def analyze_image(input_image_pil: Image.Image, prompt_text: str):
     """
     Analyzes the input image using the VLM, visualizes findings, and anonymizes.
     """
@@ -184,9 +186,11 @@ def analyze_image(input_image_pil: Image.Image, prompt_text: str):
         raise gr.Error("Please upload an image.")
     if not prompt_text:
         raise gr.Error("Please provide a prompt.")
     try:
-        raw_model_output, image_height, image_width = run_model_inference(input_image_pil, prompt_text)
     except Exception as e:
         print(f"Error during model inference: {e}")
         print(traceback.format_exc())
@@ -196,6 +200,7 @@ def analyze_image(input_image_pil: Image.Image, prompt_text: str):
     return raw_model_output, visualized_image_np, anonymized_image_np
 @spaces.GPU(duration=90)
 def perform_anonymisation(input_image_pil: Image.Image, raw_model_output: str) -> tuple[np.ndarray, np.ndarray]:
     original_image_np = np.array(input_image_pil)
@@ -276,6 +281,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Upload Image")
             prompt_textbox = gr.Textbox(
                 label="Analysis Prompt", value=DEFAULT_PROMPT, lines=4
             )
@@ -295,7 +301,7 @@ with gr.Blocks() as demo:
     analyze_button.click(
         fn=analyze_image,
-        inputs=[input_image, prompt_textbox],
         outputs=[raw_output, output_visualized, output_anonymized],
     )

 TEMPERATURE = 1.0
 MIN_P = 0.1
 SYSTEM_PROMPT = """You are a helpful assistant for privacy analysis of images. Please always answer in English. Please obey the users instructions and follow the provided format."""
+DEFAULT_PROMPT = """You are an expert at pixel perfect image analysis and in privacy. Your task is to find all private data in the image and report its position, as well as explanations as to why it is private data. Private data is all data that relates to a unique person and can be used to identify them.
 First write down your thoughts within a <think> block.
 Please go through all objects in the image and consider whether they are private data or not.
 """
+def build_messages(image, history: Optional[list[dict[str, Any]]] = None, prompt: Optional[str] = None, system_prompt_text: Optional[str] = None):
     if not prompt:
         prompt = DEFAULT_PROMPT
+    if not system_prompt_text:
+        system_prompt_text = SYSTEM_PROMPT # Fallback if not provided
     if history:
         return [
             "content": [
                 {
                     "type": "text",
+                    "text": system_prompt_text, # Use the passed system prompt
                 }
             ],
         },
         raise gr.Error(f"Failed to initialize Blurnonymizer. Check logs. Error: {e}")
+def run_model_inference(input_image_pil: Image.Image, prompt_text: str, system_prompt_text: str):
     """
     Runs model inference on the input image and prompt.
     """
     print("Running model inference...")
     messages = build_messages(
         input_image_pil,
+        prompt=prompt_text,
+        system_prompt_text=system_prompt_text # Pass system prompt here
+    )
     input_text = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
 @spaces.GPU(duration=90)  # Request GPU for this function, allow up to 120 seconds
+def analyze_image(input_image_pil: Image.Image, prompt_text: str, system_prompt_text: str):
     """
     Analyzes the input image using the VLM, visualizes findings, and anonymizes.
     """
         raise gr.Error("Please upload an image.")
     if not prompt_text:
         raise gr.Error("Please provide a prompt.")
+    if not system_prompt_text:
+        raise gr.Error("Please provide a system prompt.") # Added check
     try:
+        raw_model_output, image_height, image_width = run_model_inference(input_image_pil, prompt_text, system_prompt_text) # Pass system prompt
     except Exception as e:
         print(f"Error during model inference: {e}")
         print(traceback.format_exc())
     return raw_model_output, visualized_image_np, anonymized_image_np
 @spaces.GPU(duration=90)
 def perform_anonymisation(input_image_pil: Image.Image, raw_model_output: str) -> tuple[np.ndarray, np.ndarray]:
     original_image_np = np.array(input_image_pil)
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Upload Image")
+            system_prompt_input = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT, lines=5, interactive=True) # New system prompt input
             prompt_textbox = gr.Textbox(
                 label="Analysis Prompt", value=DEFAULT_PROMPT, lines=4
             )
     analyze_button.click(
         fn=analyze_image,
+        inputs=[input_image, prompt_textbox, system_prompt_input], # Add system_prompt_input here
         outputs=[raw_output, output_visualized, output_anonymized],
     )

utils.py CHANGED Viewed

@@ -361,7 +361,7 @@ def visualize_boxes_annotated(image: np.ndarray | Image.Image, boxes: list[Bound
         ax.add_patch(rect)
         # Add label text above the box
-        ax.text(x_min, y_min-5, label, color=color, fontsize=5,
                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
     # Instead of displaying, save to numpy array

         ax.add_patch(rect)
         # Add label text above the box
+        ax.text(x_min, y_min-5, label, color=color, fontsize=4,
                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
     # Instead of displaying, save to numpy array