vidore
/

colqwen2-v1.0

Visual Document Retrieval

vidore-experimental

Model card Files Files and versions Community

manu commited on Mar 22

Commit

4988933

·

verified ·

1 Parent(s): 7db3305

Update handler.py

Files changed (1) hide show

handler.py +4 -3

handler.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import Dict, List, Any
 from colpali_engine.models import ColQwen2, ColQwen2Processor
 import torch
@@ -8,7 +9,7 @@ class EndpointHandler():
             path,
             torch_dtype=torch.bfloat16,
             device_map="cuda:0",  # or "mps" if on Apple Silicon
-            # attn_implementation="flash_attention_2", # should work on A100
             ).eval()
         self.processor = ColQwen2Processor.from_pretrained(path)
@@ -24,7 +25,7 @@ class EndpointHandler():
         batch_images = self.processor.process_images([images]).to(self.model.device)
         # Forward pass
         with torch.no_grad():
-            image_embeddings = self.model(**batch_images)
-        return {"embeddings": image_embeddings.tolist()}

 from typing import Dict, List, Any
+from transformers.utils.import_utils import is_flash_attn_2_available
 from colpali_engine.models import ColQwen2, ColQwen2Processor
 import torch
             path,
             torch_dtype=torch.bfloat16,
             device_map="cuda:0",  # or "mps" if on Apple Silicon
+            attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None, # should work on A100
             ).eval()
         self.processor = ColQwen2Processor.from_pretrained(path)
         batch_images = self.processor.process_images([images]).to(self.model.device)
         # Forward pass
         with torch.no_grad():
+            image_embeddings = self.model(**batch_images).tolist()
+        return {"embeddings": image_embeddings}