Spaces:

amd
/

llama4-maverick-17b-128e-mi-amd

Running

App Files Files Community

Lohia, Aditya commited on 25 days ago

Commit

7bf4f09

1 Parent(s): 643a5ff

updates

Browse files

Files changed (1) hide show

app.py +53 -11

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import re
 import gradio as gr
 import base64
 import io
-import json
 from PIL import Image
 from typing import Iterator
@@ -49,6 +48,8 @@ def validate_media(message: str, chat_history: list = None) -> bool:
         bool: True if the number of image files is less than or equal to MAX_NUM_IMAGES, False otherwise
     """
     image_count = sum(1 for path in message["files"])
     if image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images at a time.")
@@ -59,6 +60,7 @@ def validate_media(message: str, chat_history: list = None) -> bool:
         file.lower().endswith((".png", ".jpg", ".jpeg")) for file in message["files"]
     ):
         gr.Warning("Only images are allowed. Format available: PNG, JPG, JPEG")
     return True
@@ -129,6 +131,20 @@ def process_images(message: list) -> list[dict]:
     return content
 def process_new_user_message(message: dict) -> list[dict]:
     """Process the new user message and return a list of dictionaries containing text and image content.
     Args:
@@ -140,18 +156,32 @@ def process_new_user_message(message: dict) -> list[dict]:
     messages = []
     if message["text"]:
-        # Append the text part to the content list
-        messages.append({"type": "text", "text": message["text"]})
-        if not message["files"]:
-            # If there are no files, return the text part only
-            return messages
-        else:
             # If there are files, process the images
             image_content = process_images(message["files"])
             # Append the image content to the messages list
             messages.extend(image_content)
-            return messages
     else:
         # If there are no text parts, throw a gr.Warning to insert prompt and return nothing
         gr.Warning("Please insert a prompt.")
@@ -300,6 +330,17 @@ examples = [
     ],
 ]
 demo = gr.ChatInterface(
     fn=run,
@@ -309,6 +350,7 @@ demo = gr.ChatInterface(
         file_types=["image"],
         file_count="single" if MAX_NUM_IMAGES == 1 else "multiple",
         autofocus=True,
     ),
     multimodal=True,
     additional_inputs=[
@@ -348,8 +390,8 @@ demo = gr.ChatInterface(
         ),
     ],
     stop_btn=False,
-    title="Llama-4 Scout Instruct",
-    description="This Space is an Alpha release that demonstrates [Llama-4-Scout](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E) model running on AMD MI300 infrastructure. The space is built with Meta Llama 4 [License](https://www.llama.com/llama4/license/). Feel free to play with it!",
     fill_height=True,
     run_examples_on_click=False,
     examples=examples,

 import gradio as gr
 import base64
 import io
 from PIL import Image
 from typing import Iterator
         bool: True if the number of image files is less than or equal to MAX_NUM_IMAGES, False otherwise
     """
     image_count = sum(1 for path in message["files"])
+    # Check if there are <image> tags in the prompt and add count
+    image_count += message["text"].count("<image>")
     if image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images at a time.")
         file.lower().endswith((".png", ".jpg", ".jpeg")) for file in message["files"]
     ):
         gr.Warning("Only images are allowed. Format available: PNG, JPG, JPEG")
+        return False
     return True
     return content
+def extract_image_urls_from_tags(message):
+    """Extract image URLs from the <image> tags in the message text.
+    Args:
+        message (str): message text containing <image> tags
+    Returns:
+        list[str]: list of image URLs extracted from the <image> tags
+    """
+    # Extract all <image> tags from the message text using regex
+    image_urls = re.findall(r"<image>(.*?)</image>", message, re.IGNORECASE | re.DOTALL)
+    # Basic cleanup: strip whitespace from found URLs
+    image_urls = [url.strip() for url in image_urls]
+    return image_urls
 def process_new_user_message(message: dict) -> list[dict]:
     """Process the new user message and return a list of dictionaries containing text and image content.
     Args:
     messages = []
     if message["text"]:
+        # Remove the <image> tags from the message text
+        prompt = re.sub(
+            r"<image>.*?</image>", "", message["text"], flags=re.DOTALL | re.IGNORECASE
+        ).strip()
+        # If the message text is empty after removing <image> tags, return an empty list
+        if not prompt:
+            gr.Warning("Please insert a prompt.")
+            return []
+        # If the message text is not empty, append it to the content list
+        messages.append({"type": "text", "text": prompt})
+        # processing image urls within tags
+        image_urls = extract_image_urls_from_tags(message["text"])
+        for url in image_urls:
+            if not url or not url.lower().startswith(("http://", "https://")):
+                continue
+            # Append the image URL to the content list
+            messages.append({"type": "image_url", "image_url": {"url": url}})
+        if message["files"]:
             # If there are files, process the images
             image_content = process_images(message["files"])
             # Append the image content to the messages list
             messages.extend(image_content)
+        return messages
     else:
         # If there are no text parts, throw a gr.Warning to insert prompt and return nothing
         gr.Warning("Please insert a prompt.")
     ],
 ]
+description = f"""
+<div style="text-align: center;">
+<p style="font-size: 1.1em;">
+This Space is an Alpha release that demonstrates <a href="https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" target="_blank">Llama-4-Maverick</a> model running on AMD MI300 infrastructure. The space is built with Meta Llama 4 <a href="https://www.llama.com/llama4/license/" target="_blank">License</a>. Feel free to play with it!
+</p>
+<p style="font-size:0.9em; font-style: italic; margin-top: 0; margin-bottom:0;">
+You can interleave {MAX_NUM_IMAGES} image using the upload button or enter URL within <code>&lt;image&gt;URL&lt;/image&gt;</code> tag with the prompt.
+</p>
+</div>
+"""
 demo = gr.ChatInterface(
     fn=run,
         file_types=["image"],
         file_count="single" if MAX_NUM_IMAGES == 1 else "multiple",
         autofocus=True,
+        placeholder="Type message, drop PNG/JPEG or use <image>URL</image>...",
     ),
     multimodal=True,
     additional_inputs=[
         ),
     ],
     stop_btn=False,
+    title="Llama-4 Maverick Instruct",
+    description=description,
     fill_height=True,
     run_examples_on_click=False,
     examples=examples,