Spaces:

Uddipan107
/

ocr-reorder-space

Running

Uddipan Basu Bir commited on 11 days ago

Commit

93dce4d

1 Parent(s): 0cfc73f

Download checkpoint from HF hub in OcrReorderPipeline

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -5,7 +5,7 @@ import base64
 from io import BytesIO
 from huggingface_hub import hf_hub_download
-# point at your HF model repo
 HF_MODEL_REPO = "Uddipan107/ocr-layoutlmv3-base-t5-small"
 class OcrReorderPipeline(Pipeline):
@@ -18,10 +18,11 @@ class OcrReorderPipeline(Pipeline):
         ckpt      = torch.load(ckpt_path, map_location="cpu")
         proj_state= ckpt["projection"]
-        # ── Rebuild & load your projection head ────────────────────────────
         self.projection = torch.nn.Sequential(
-            torch.nn.Linear(768, model.config.d_model),
-            torch.nn.LayerNorm(model.config.d_model),
             torch.nn.GELU()
         )
         self.projection.load_state_dict(proj_state)
@@ -41,20 +42,17 @@ class OcrReorderPipeline(Pipeline):
     def _forward(self, model_inputs):
         pv, ids, mask, bbox = (
             model_inputs[k].to(self.device)
-            for k in ("pixel_values", "input_ids", "attention_mask", "bbox")
         )
         vision_out = self.model.vision_model(
             pixel_values=pv,
             input_ids=ids,
             attention_mask=mask,
             bbox=bbox
         )
         seq_len    = ids.size(1)
         text_feats = vision_out.last_hidden_state[:, :seq_len, :]
         proj_feats = self.projection(text_feats)
         gen_ids = self.model.text_model.generate(
             inputs_embeds=proj_feats,
             attention_mask=mask,

 from io import BytesIO
 from huggingface_hub import hf_hub_download
+# HF model repo containing pytorch_model.bin with 'projection' state
 HF_MODEL_REPO = "Uddipan107/ocr-layoutlmv3-base-t5-small"
 class OcrReorderPipeline(Pipeline):
         ckpt      = torch.load(ckpt_path, map_location="cpu")
         proj_state= ckpt["projection"]
+        # ── Rebuild & load your projection head (T5-small hidden size = 512) ─
+        d_model = 512
         self.projection = torch.nn.Sequential(
+            torch.nn.Linear(768, d_model),
+            torch.nn.LayerNorm(d_model),
             torch.nn.GELU()
         )
         self.projection.load_state_dict(proj_state)
     def _forward(self, model_inputs):
         pv, ids, mask, bbox = (
             model_inputs[k].to(self.device)
+            for k in ("pixel_values","input_ids","attention_mask","bbox")
         )
         vision_out = self.model.vision_model(
             pixel_values=pv,
             input_ids=ids,
             attention_mask=mask,
             bbox=bbox
         )
         seq_len    = ids.size(1)
         text_feats = vision_out.last_hidden_state[:, :seq_len, :]
         proj_feats = self.projection(text_feats)
         gen_ids = self.model.text_model.generate(
             inputs_embeds=proj_feats,
             attention_mask=mask,