Spaces:

lbw18601752667
/

IDMR-demo

Sleeping

App Files Files Community

liubangwei commited on Mar 25

Commit

a72a7d4

1 Parent(s): 94a930c

init IDMR demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +194 -0
image_embeddings.pkl +3 -0
imgs/candidates/000000007574.jpg +0 -0
imgs/candidates/000000009448.jpg +0 -0
imgs/candidates/000000014007.jpg +0 -0
imgs/candidates/000000021839.jpg +0 -0
imgs/candidates/000000022892.jpg +0 -0
imgs/candidates/000000024610.jpg +0 -0
imgs/candidates/000000025593.jpg +0 -0
imgs/candidates/000000044068.jpg +0 -0
imgs/candidates/000000084362.jpg +0 -0
imgs/candidates/000000098839.jpg +0 -0
imgs/candidates/000000107339.jpg +0 -0
imgs/candidates/000000144333.jpg +0 -0
imgs/candidates/000000159791.jpg +0 -0
imgs/candidates/000000168593.jpg +0 -0
imgs/candidates/000000182155.jpg +0 -0
imgs/candidates/000000186449.jpg +0 -0
imgs/candidates/000000191845.jpg +0 -0
imgs/candidates/000000210299.jpg +0 -0
imgs/candidates/000000221708.jpg +0 -0
imgs/candidates/000000223747.jpg +0 -0
imgs/candidates/000000226111.jpg +0 -0
imgs/candidates/000000226984.jpg +0 -0
imgs/candidates/000000252294.jpg +0 -0
imgs/candidates/000000256941.jpg +0 -0
imgs/candidates/000000280710.jpg +0 -0
imgs/candidates/000000281179.jpg +0 -0
imgs/candidates/000000283717.jpg +0 -0
imgs/candidates/000000284445.jpg +0 -0
imgs/candidates/000000287649.jpg +0 -0
imgs/candidates/000000289343.jpg +0 -0
imgs/candidates/000000295809.jpg +0 -0
imgs/candidates/000000334371.jpg +0 -0
imgs/candidates/000000350054.jpg +0 -0
imgs/candidates/000000361621.jpg +0 -0
imgs/candidates/000000369503.jpg +0 -0
imgs/candidates/000000384661.jpg +0 -0
imgs/candidates/000000385997.jpg +0 -0
imgs/candidates/000000398377.jpg +0 -0
imgs/candidates/000000402473.jpg +0 -0
imgs/candidates/000000426166.jpg +0 -0
imgs/candidates/000000441247.jpg +0 -0
imgs/candidates/000000455157.jpg +0 -0
imgs/candidates/000000492077.jpg +0 -0
imgs/candidates/000000496854.jpg +0 -0
imgs/candidates/000000501523.jpg +0 -0
imgs/candidates/000000530099.jpg +0 -0
imgs/candidates/000000530162.jpg +0 -0
imgs/candidates/000000530836.jpg +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import gradio as gr
+import os
+from PIL import Image
+import numpy as np
+import torch
+import pickle
+from transformers import AutoProcessor
+from src.model import MMEBModel
+from src.arguments import ModelArguments
+# 假设图片库存储在本地文件夹中
+QUERY_DIR = "imgs/queries"
+IMAGE_DIR = "imgs/candidates"
+# IMAGE_DIR = "imgs"
+image_paths = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) if f.endswith((".jpg", ".png"))]
+global IMAGE_TOKEN, TOP_N
+IMAGE_TOKEN = "<|image_1|>"
+TOP_N = 5
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"device: {device}")
+# 模型加载和初始化
+def load_model():
+    global IMAGE_TOKEN
+    # 模型参数
+    model_args = ModelArguments(
+        # model_name="/fs-computility/ai-shen/kilab-shared/liubangwei/ckpt/IDMR/IDMR_InternVL2_5-2B",  # 替换为你的模型名称
+        model_name="/fs-computility/ai-shen/kilab-shared/liubangwei/ckpt/my_hf/IDMR-2B",
+        model_backbone="internvl_2_5",       # 替换为你的模型 backbone
+    )
+    # 加载处理器
+    if model_args.model_backbone == "phi35v":
+        processor = AutoProcessor.from_pretrained(
+            model_args.model_name,
+            trust_remote_code=True,
+            num_crops=model_args.num_crops,
+        )
+        processor.tokenizer.padding_side = "right"
+    elif model_args.model_backbone == "internvl_2_5":
+        from src.vlm_backbone.intern_vl import InternVLProcessor
+        from transformers import AutoTokenizer, AutoImageProcessor
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_args.model_name,
+            trust_remote_code=True
+        )
+        image_processor = AutoImageProcessor.from_pretrained(
+            model_args.model_name,
+            trust_remote_code=True,
+            use_fast=False
+        )
+        processor = InternVLProcessor(
+            image_processor=image_processor,
+            tokenizer=tokenizer
+        )
+        IMAGE_TOKEN = "<image>"
+    # 加载模型
+    model = MMEBModel.load(model_args)
+    model = model.to(device, dtype=torch.bfloat16)
+    model.eval()
+    return model, processor
+# 加载模型和处理器
+model, processor = load_model()
+def get_inputs(processor, text, image_path=None, image=None):
+    if image_path:
+        image = Image.open(image_path)
+    if image is None:
+        text = text.replace(IMAGE_TOKEN, "")
+    inputs = processor(
+        text=text,
+        images=[image] if image else None,
+        return_tensors="pt",
+        max_length=1024,
+        truncation=True
+    )
+    inputs = {key: value.to(device) for key, value in inputs.items()}
+    inputs["image_flags"] = torch.tensor([1 if image else 0], dtype=torch.long).to(device)
+    if image is None:
+        del inputs['pixel_values']
+    return inputs
+# 将图片库中的图像编码为 embedding
+def encode_image_library(image_paths):
+    embeddings = []
+    for img_path in image_paths:
+        text = f"{IMAGE_TOKEN}\n Represent the given image."
+        print(f"text: {text}")
+        inputs = get_inputs(processor, text, image_path=img_path)
+        with torch.no_grad(), torch.autocast(device_type=device, dtype=torch.bfloat16):
+            output = model(tgt=inputs)
+        embeddings.append(output["tgt_reps"].float().cpu().numpy())
+    return np.stack(embeddings)
+# 保存 embedding 到文件
+def save_embeddings(embeddings, file_path="image_embeddings.pkl"):
+    with open(file_path, "wb") as f:
+        pickle.dump(embeddings, f)
+# 加载 embedding 从文件
+def load_embeddings(file_path="image_embeddings.pkl"):
+    with open(file_path, "rb") as f:
+        return pickle.load(f)
+# 计算相似度（余弦相似度）
+def cosine_similarity(query_embedding, embeddings):
+    similarity = np.sum(query_embedding * embeddings, axis=-1)
+    return similarity
+# 检索逻辑
+def retrieve_images(query_text, query_image, top_n=TOP_N):
+    if query_text:
+        query_text = f"{IMAGE_TOKEN}\n {query_text}"
+    else:
+        query_text = f"{IMAGE_TOKEN}\n Represent the given image."
+    if query_image is not None:
+        image = Image.fromarray(query_image)
+    else:
+        image = None
+    inputs = get_inputs(processor, query_text, image=image)
+    print(f"inputs: {inputs}")
+    # with torch.no_grad():
+    with torch.no_grad(), torch.autocast(device_type=device, dtype=torch.bfloat16):
+        query_embedding = model(qry=inputs)["qry_reps"].float().cpu().numpy()
+    # 加载图片库的 embedding
+    embeddings = load_embeddings()
+    # 计算相似度
+    similarity = cosine_similarity(query_embedding, embeddings)
+    similarity = similarity.T
+    print(f"cosine_similarity: {similarity}")
+    top_indices = np.argsort(-similarity).squeeze(0)[:top_n]
+    print(f"top_indices: {top_indices}")
+    # similarity = model.compute_similarity(np.expand_dims(query_embedding.squeeze(0), axis=1), embeddings.squeeze(1))
+    # print(f"model.compute_similarity: {similarity}")
+    return [image_paths[i] for i in top_indices]
+# 界面逻辑
+def demo(query_text, query_image):
+    # 执行检索
+    # print(f"query_text: {query_text}, query_image: {query_image}, type(query_image): {type(query_image)}, image shape: {query_image.shape if query_image is not None else 'None'}")
+    retrieved_images = retrieve_images(query_text, query_image)
+    # 返回检索结果（图片列表）
+    return [Image.open(img) for img in retrieved_images]
+# 预置示例
+def load_examples():
+    examples = []
+    # 获取QUERY_DIR中的所有图片文件
+    image_files = [f for f in os.listdir(QUERY_DIR) if f.endswith((".jpg", ".png"))]
+    for img_file in image_files:
+        # 构建图片完整路径
+        img_path = os.path.join(QUERY_DIR, img_file)
+        # 获取对应的txt文件名（将图片扩展名替换为.txt）
+        txt_file = os.path.splitext(img_file)[0] + ".txt"
+        txt_path = os.path.join(QUERY_DIR, txt_file)
+        # 如果存在对应的txt文件，读取查询文本
+        if os.path.exists(txt_path):
+            with open(txt_path, 'r', encoding='utf-8') as f:
+                query_text = f.read().strip().replace("<|image_1|>\n", "")
+            examples.append([query_text, img_path])
+    return examples
+# 构建 Gradio 界面
+iface = gr.Interface(
+    fn=demo,
+    inputs=["text", "image"],
+    outputs=gr.Gallery(label=f"Retrieved Images (Top {TOP_N})"),
+    examples=load_examples(),  # 使用动态加载的示例
+    title="Multimodal Retrieval Demo",
+    description="Enter a query and upload an image to retrieve relevant images from the library. You can click on the example below to use it as a query"
+)
+# 在启动时编码图片库并保存 embedding
+if not os.path.exists("image_embeddings.pkl"):
+    embeddings = encode_image_library(image_paths)
+    save_embeddings(embeddings)
+# 启动 Gradio 应用
+iface.launch()

image_embeddings.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8dcedaab4e3bcc555795f56b15a7d830b74ffc707260c3b0152ba8d99a992bd
+size 409764

imgs/candidates/000000007574.jpg ADDED Viewed

imgs/candidates/000000009448.jpg ADDED Viewed

imgs/candidates/000000014007.jpg ADDED Viewed

imgs/candidates/000000021839.jpg ADDED Viewed

imgs/candidates/000000022892.jpg ADDED Viewed

imgs/candidates/000000024610.jpg ADDED Viewed

imgs/candidates/000000025593.jpg ADDED Viewed

imgs/candidates/000000044068.jpg ADDED Viewed

imgs/candidates/000000084362.jpg ADDED Viewed

imgs/candidates/000000098839.jpg ADDED Viewed

imgs/candidates/000000107339.jpg ADDED Viewed

imgs/candidates/000000144333.jpg ADDED Viewed

imgs/candidates/000000159791.jpg ADDED Viewed

imgs/candidates/000000168593.jpg ADDED Viewed

imgs/candidates/000000182155.jpg ADDED Viewed

imgs/candidates/000000186449.jpg ADDED Viewed

imgs/candidates/000000191845.jpg ADDED Viewed

imgs/candidates/000000210299.jpg ADDED Viewed

imgs/candidates/000000221708.jpg ADDED Viewed

imgs/candidates/000000223747.jpg ADDED Viewed

imgs/candidates/000000226111.jpg ADDED Viewed

imgs/candidates/000000226984.jpg ADDED Viewed

imgs/candidates/000000252294.jpg ADDED Viewed

imgs/candidates/000000256941.jpg ADDED Viewed

imgs/candidates/000000280710.jpg ADDED Viewed

imgs/candidates/000000281179.jpg ADDED Viewed

imgs/candidates/000000283717.jpg ADDED Viewed

imgs/candidates/000000284445.jpg ADDED Viewed

imgs/candidates/000000287649.jpg ADDED Viewed

imgs/candidates/000000289343.jpg ADDED Viewed

imgs/candidates/000000295809.jpg ADDED Viewed

imgs/candidates/000000334371.jpg ADDED Viewed

imgs/candidates/000000350054.jpg ADDED Viewed

imgs/candidates/000000361621.jpg ADDED Viewed

imgs/candidates/000000369503.jpg ADDED Viewed

imgs/candidates/000000384661.jpg ADDED Viewed

imgs/candidates/000000385997.jpg ADDED Viewed

imgs/candidates/000000398377.jpg ADDED Viewed

imgs/candidates/000000402473.jpg ADDED Viewed

imgs/candidates/000000426166.jpg ADDED Viewed

imgs/candidates/000000441247.jpg ADDED Viewed

imgs/candidates/000000455157.jpg ADDED Viewed

imgs/candidates/000000492077.jpg ADDED Viewed

imgs/candidates/000000496854.jpg ADDED Viewed

imgs/candidates/000000501523.jpg ADDED Viewed

imgs/candidates/000000530099.jpg ADDED Viewed

imgs/candidates/000000530162.jpg ADDED Viewed

imgs/candidates/000000530836.jpg ADDED Viewed