Spaces:

nvidia
/

Eagle2-Demo

Running on Zero

App Files Files Community

liuguilin commited on 6 days ago

Commit

4b798bc

1 Parent(s): 9fde336

update

Browse files

Files changed (1) hide show

eagle_vl/serve/inference.py +11 -4

eagle_vl/serve/inference.py CHANGED Viewed

@@ -109,6 +109,8 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
     # converstion = get_conv_template(sft_format)
     # only use the last 3 round of messages
     # latest_messages = messages[-3:]
     for mid, message in enumerate(messages):
         if message["role"] == "user":
             record = {
@@ -118,21 +120,24 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
             if "images" in message:
                 per_round_images = message["images"]
                 for image in per_round_images:
-                    if isinstance(image, Image.Image):
                         record["content"].append(
                             {
                                 "type": "image",
                                 "image": image,
                             }
                         )
-                    elif isinstance(image, str) and image.endswith((".jpeg", ".jpg", ".png", ".gif")):
                         record["content"].append(
                             {
                                 "type": "image",
                                 "image": image,
                             }
                         )
-                    elif isinstance(image, str) and image.endswith((".mp4", ".mov", ".avi", ".webm")):
                         record["content"].append(
                             {
                                 "type": "video",
@@ -140,6 +145,7 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
                                 "nframes": video_nframes,
                             }
                         )
             if 'content' in message:
                 record["content"].append(
                     {
@@ -171,7 +177,8 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
             assert (
                 formatted_answer.count(processor.image_token) == 0
             ), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
     # print(f"messages = {results}")
     text = processor.apply_chat_template(results, add_generation_prompt=False)
     # print(f"raw text = {text}")

     # converstion = get_conv_template(sft_format)
     # only use the last 3 round of messages
     # latest_messages = messages[-3:]
+    all_images_num = 0
     for mid, message in enumerate(messages):
         if message["role"] == "user":
             record = {
             if "images" in message:
                 per_round_images = message["images"]
                 for image in per_round_images:
+                    if isinstance(image, Image.Image) and all_images_num < 128:
                         record["content"].append(
                             {
                                 "type": "image",
                                 "image": image,
                             }
                         )
+                        all_images_num+=1
+                    elif isinstance(image, str) and image.endswith((".jpeg", ".jpg", ".png", ".gif")) and all_images_num < 128:
                         record["content"].append(
                             {
                                 "type": "image",
                                 "image": image,
                             }
                         )
+                        all_images_num+=1
+                    elif isinstance(image, str) and image.endswith((".mp4", ".mov", ".avi", ".webm")) and all_images_num < 128-video_nframes:
                         record["content"].append(
                             {
                                 "type": "video",
                                 "nframes": video_nframes,
                             }
                         )
+                        all_images_num+=video_nframes
             if 'content' in message:
                 record["content"].append(
                     {
             assert (
                 formatted_answer.count(processor.image_token) == 0
             ), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
     # print(f"messages = {results}")
     text = processor.apply_chat_template(results, add_generation_prompt=False)
     # print(f"raw text = {text}")