Spaces:

IP-composer
/

ip-composer

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 16 days ago

Commit

a8ca9e4

verified ·

1 Parent(s): 2ff63f0

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -5

app.py CHANGED Viewed

@@ -111,6 +111,55 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 def change_rank_default(concept_name):
     return RANKS_MAP.get(concept_name, 30)
 @spaces.GPU
 def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
     """Get CLIP image embeddings for a given PIL image"""
@@ -464,9 +513,21 @@ Following the algorithm proposed in IP-Composer: Semantic Composition of Visual
             inputs=[concept_name3],
             outputs=[rank3]
         )
 if __name__ == "__main__":
-    demo.launch()

 def change_rank_default(concept_name):
     return RANKS_MAP.get(concept_name, 30)
+@spaces.GPU
+def match_image_to_concept(image):
+    """
+    Match an uploaded image to the closest concept type using CLIP embeddings
+    """
+    if image is None:
+        return None
+    # Get image embeddings
+    img_pil = Image.fromarray(image).convert("RGB")
+    img_embed = get_image_embeds(img_pil, clip_model, preprocess, device)
+    # Calculate similarity to each concept
+    similarities = {}
+    for concept_name, concept_file in CONCEPTS_MAP.items():
+        try:
+            # Load concept embeddings
+            embeds_path = f"./IP_Composer/text_embeddings/{concept_file}"
+            with open(embeds_path, "rb") as f:
+                concept_embeds = np.load(f)
+            # Calculate similarity to each text embedding
+            sim_scores = []
+            for embed in concept_embeds:
+                # Normalize both embeddings
+                img_embed_norm = img_embed / np.linalg.norm(img_embed)
+                text_embed_norm = embed / np.linalg.norm(embed)
+                # Calculate cosine similarity
+                similarity = np.dot(img_embed_norm.flatten(), text_embed_norm.flatten())
+                sim_scores.append(similarity)
+            # Use the average of top 5 similarities for better matching
+            sim_scores.sort(reverse=True)
+            top_similarities = sim_scores[:min(5, len(sim_scores))]
+            avg_similarity = sum(top_similarities) / len(top_similarities)
+            similarities[concept_name] = avg_similarity
+        except Exception as e:
+            print(f"Error processing concept {concept_name}: {e}")
+    # Return the concept with highest similarity
+    if similarities:
+        matched_concept = max(similarities.items(), key=lambda x: x[1])[0]
+        # Display a notification to the user
+        gr.Info(f"Image automatically matched to concept: {matched_concept}")
+        return matched_concept
+    return None
 @spaces.GPU
 def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
     """Get CLIP image embeddings for a given PIL image"""
             inputs=[concept_name3],
             outputs=[rank3]
         )
+        concept_image1.upload(
+            fn=match_image_to_concept,
+            inputs=[concept_image1],
+            outputs=[concept_name1]
+        )
+        concept_image2.upload(
+            fn=match_image_to_concept,
+            inputs=[concept_image2],
+            outputs=[concept_name2]
+        )
+        concept_image3.upload(
+            fn=match_image_to_concept,
+            inputs=[concept_image3],
+            outputs=[concept_name3]
+        )
 if __name__ == "__main__":
+    demo.launch()