Lohia, Aditya commited on
Commit
7bf4f09
·
1 Parent(s): 643a5ff
Files changed (1) hide show
  1. app.py +53 -11
app.py CHANGED
@@ -4,7 +4,6 @@ import re
4
  import gradio as gr
5
  import base64
6
  import io
7
- import json
8
  from PIL import Image
9
  from typing import Iterator
10
 
@@ -49,6 +48,8 @@ def validate_media(message: str, chat_history: list = None) -> bool:
49
  bool: True if the number of image files is less than or equal to MAX_NUM_IMAGES, False otherwise
50
  """
51
  image_count = sum(1 for path in message["files"])
 
 
52
 
53
  if image_count > MAX_NUM_IMAGES:
54
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images at a time.")
@@ -59,6 +60,7 @@ def validate_media(message: str, chat_history: list = None) -> bool:
59
  file.lower().endswith((".png", ".jpg", ".jpeg")) for file in message["files"]
60
  ):
61
  gr.Warning("Only images are allowed. Format available: PNG, JPG, JPEG")
 
62
  return True
63
 
64
 
@@ -129,6 +131,20 @@ def process_images(message: list) -> list[dict]:
129
  return content
130
 
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  def process_new_user_message(message: dict) -> list[dict]:
133
  """Process the new user message and return a list of dictionaries containing text and image content.
134
  Args:
@@ -140,18 +156,32 @@ def process_new_user_message(message: dict) -> list[dict]:
140
  messages = []
141
 
142
  if message["text"]:
143
- # Append the text part to the content list
144
- messages.append({"type": "text", "text": message["text"]})
145
-
146
- if not message["files"]:
147
- # If there are no files, return the text part only
148
- return messages
149
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  # If there are files, process the images
151
  image_content = process_images(message["files"])
152
  # Append the image content to the messages list
153
  messages.extend(image_content)
154
- return messages
 
155
  else:
156
  # If there are no text parts, throw a gr.Warning to insert prompt and return nothing
157
  gr.Warning("Please insert a prompt.")
@@ -300,6 +330,17 @@ examples = [
300
  ],
301
  ]
302
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  demo = gr.ChatInterface(
305
  fn=run,
@@ -309,6 +350,7 @@ demo = gr.ChatInterface(
309
  file_types=["image"],
310
  file_count="single" if MAX_NUM_IMAGES == 1 else "multiple",
311
  autofocus=True,
 
312
  ),
313
  multimodal=True,
314
  additional_inputs=[
@@ -348,8 +390,8 @@ demo = gr.ChatInterface(
348
  ),
349
  ],
350
  stop_btn=False,
351
- title="Llama-4 Scout Instruct",
352
- description="This Space is an Alpha release that demonstrates [Llama-4-Scout](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E) model running on AMD MI300 infrastructure. The space is built with Meta Llama 4 [License](https://www.llama.com/llama4/license/). Feel free to play with it!",
353
  fill_height=True,
354
  run_examples_on_click=False,
355
  examples=examples,
 
4
  import gradio as gr
5
  import base64
6
  import io
 
7
  from PIL import Image
8
  from typing import Iterator
9
 
 
48
  bool: True if the number of image files is less than or equal to MAX_NUM_IMAGES, False otherwise
49
  """
50
  image_count = sum(1 for path in message["files"])
51
+ # Check if there are <image> tags in the prompt and add count
52
+ image_count += message["text"].count("<image>")
53
 
54
  if image_count > MAX_NUM_IMAGES:
55
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images at a time.")
 
60
  file.lower().endswith((".png", ".jpg", ".jpeg")) for file in message["files"]
61
  ):
62
  gr.Warning("Only images are allowed. Format available: PNG, JPG, JPEG")
63
+ return False
64
  return True
65
 
66
 
 
131
  return content
132
 
133
 
134
+ def extract_image_urls_from_tags(message):
135
+ """Extract image URLs from the <image> tags in the message text.
136
+ Args:
137
+ message (str): message text containing <image> tags
138
+ Returns:
139
+ list[str]: list of image URLs extracted from the <image> tags
140
+ """
141
+ # Extract all <image> tags from the message text using regex
142
+ image_urls = re.findall(r"<image>(.*?)</image>", message, re.IGNORECASE | re.DOTALL)
143
+ # Basic cleanup: strip whitespace from found URLs
144
+ image_urls = [url.strip() for url in image_urls]
145
+ return image_urls
146
+
147
+
148
  def process_new_user_message(message: dict) -> list[dict]:
149
  """Process the new user message and return a list of dictionaries containing text and image content.
150
  Args:
 
156
  messages = []
157
 
158
  if message["text"]:
159
+ # Remove the <image> tags from the message text
160
+ prompt = re.sub(
161
+ r"<image>.*?</image>", "", message["text"], flags=re.DOTALL | re.IGNORECASE
162
+ ).strip()
163
+ # If the message text is empty after removing <image> tags, return an empty list
164
+ if not prompt:
165
+ gr.Warning("Please insert a prompt.")
166
+ return []
167
+ # If the message text is not empty, append it to the content list
168
+ messages.append({"type": "text", "text": prompt})
169
+
170
+ # processing image urls within tags
171
+ image_urls = extract_image_urls_from_tags(message["text"])
172
+ for url in image_urls:
173
+ if not url or not url.lower().startswith(("http://", "https://")):
174
+ continue
175
+ # Append the image URL to the content list
176
+ messages.append({"type": "image_url", "image_url": {"url": url}})
177
+
178
+ if message["files"]:
179
  # If there are files, process the images
180
  image_content = process_images(message["files"])
181
  # Append the image content to the messages list
182
  messages.extend(image_content)
183
+
184
+ return messages
185
  else:
186
  # If there are no text parts, throw a gr.Warning to insert prompt and return nothing
187
  gr.Warning("Please insert a prompt.")
 
330
  ],
331
  ]
332
 
333
+ description = f"""
334
+ <div style="text-align: center;">
335
+ <p style="font-size: 1.1em;">
336
+ This Space is an Alpha release that demonstrates <a href="https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" target="_blank">Llama-4-Maverick</a> model running on AMD MI300 infrastructure. The space is built with Meta Llama 4 <a href="https://www.llama.com/llama4/license/" target="_blank">License</a>. Feel free to play with it!
337
+ </p>
338
+ <p style="font-size:0.9em; font-style: italic; margin-top: 0; margin-bottom:0;">
339
+ You can interleave {MAX_NUM_IMAGES} image using the upload button or enter URL within <code>&lt;image&gt;URL&lt;/image&gt;</code> tag with the prompt.
340
+ </p>
341
+ </div>
342
+ """
343
+
344
 
345
  demo = gr.ChatInterface(
346
  fn=run,
 
350
  file_types=["image"],
351
  file_count="single" if MAX_NUM_IMAGES == 1 else "multiple",
352
  autofocus=True,
353
+ placeholder="Type message, drop PNG/JPEG or use <image>URL</image>...",
354
  ),
355
  multimodal=True,
356
  additional_inputs=[
 
390
  ),
391
  ],
392
  stop_btn=False,
393
+ title="Llama-4 Maverick Instruct",
394
+ description=description,
395
  fill_height=True,
396
  run_examples_on_click=False,
397
  examples=examples,