vidore
/

colqwen2-v1.0

Visual Document Retrieval

ColPali

Safetensors

English

vidore-experimental

vidore

Model card Files Files and versions Community

manuel commited on Mar 25

Commit

f47f1d6

1 Parent(s): 98543a7

binary

Browse files

Files changed (1) hide show

handler.py +12 -19

handler.py CHANGED Viewed

@@ -20,7 +20,7 @@ class EndpointHandler():
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Expects data in one of the following formats:
             {
                 "images": [
                     "base64_encoded_image1",
@@ -28,16 +28,9 @@ class EndpointHandler():
                     ...
                 ]
             }
-            or
             {
-                "processed_images": [
-                    [...], # preprocessed image tensors
-                    [...]
-                ]
-            }
-            or
-            {
-                "text": [
                     "text1",
                     "text2",
                     ...
@@ -48,9 +41,9 @@ class EndpointHandler():
         """
         # Input validation
         data = data.get("inputs", [])
-        input_keys = [key for key in ["images", "processed_images", "text"] if key in data]
         if len(input_keys) != 1:
-            return {"error": "Exactly one of 'images', 'processed_images', or 'text' must be provided"}
         input_type = input_keys[0]
         inputs = data[input_type]
@@ -76,18 +69,18 @@ class EndpointHandler():
             # Process the images using the processor
             batch = self.processor.process_images(decoded_images).to(self.model.device)
-        elif input_type == "processed_images":
-            try:
-                buffer = io.BytesIO(base64.b64decode(inputs))
-                batch = torch.load(buffer, map_location=self.model.device)
-            except Exception as e:
-                return {"error": f"Error processing preprocessed images: {str(e)}"}
         else:  # text
             if not isinstance(inputs, list):
                 inputs = [inputs]
             try:
-                batch = self.processor.process_text(inputs).to(self.model.device)
             except Exception as e:
                 return {"error": f"Error processing text: {str(e)}"}

     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
+        Expects data in one of the following formats in the "inputs" key:
             {
                 "images": [
                     "base64_encoded_image1",
                     ...
                 ]
             }
+            xor
             {
+                "queries": [
                     "text1",
                     "text2",
                     ...
         """
         # Input validation
         data = data.get("inputs", [])
+        input_keys = [key for key in ["images", "queries"] if key in data]
         if len(input_keys) != 1:
+            return {"error": "Exactly one of 'images', 'queries' must be provided"}
         input_type = input_keys[0]
         inputs = data[input_type]
             # Process the images using the processor
             batch = self.processor.process_images(decoded_images).to(self.model.device)
+        # elif input_type == "processed_images":
+        #     try:
+        #         buffer = io.BytesIO(base64.b64decode(inputs))
+        #         batch = torch.load(buffer, map_location=self.model.device)
+        #     except Exception as e:
+        #         return {"error": f"Error processing preprocessed images: {str(e)}"}
         else:  # text
             if not isinstance(inputs, list):
                 inputs = [inputs]
             try:
+                batch = self.processor.process_queries(inputs).to(self.model.device)
             except Exception as e:
                 return {"error": f"Error processing text: {str(e)}"}