Spaces:

darpanaswal
/

Patent_Retrieval

Configuration error

darpanaswal commited on Apr 11

Commit

3a53014

verified ·

1 Parent(s): 4202987

Update cross_encoder_reranking_train.py

Files changed (1) hide show

cross_encoder_reranking_train.py CHANGED Viewed

@@ -226,13 +226,6 @@ def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tenso
         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
-def cls_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
-    """Extract [CLS] token representations, accounting for left padding."""
-    # Get the index of the first non-padding token in each sequence
-    cls_indices = attention_mask.float().argmax(dim=1)
-    batch_size = last_hidden_states.size(0)
-    return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), cls_indices]
 def get_detailed_instruct(task_description: str, query: str) -> str:
     """Create an instruction-formatted query"""
     return f'Instruct: {task_description}\nQuery: {query}'
@@ -274,7 +267,7 @@ def cross_encoder_reranking(query_text, doc_texts, model, tokenizer, batch_size=
             # Get embeddings
             outputs = model(**batch_dict)
-            embeddings = cls_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
             # Normalize embeddings
             embeddings = F.normalize(embeddings, p=2, dim=1)

         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def get_detailed_instruct(task_description: str, query: str) -> str:
     """Create an instruction-formatted query"""
     return f'Instruct: {task_description}\nQuery: {query}'
             # Get embeddings
             outputs = model(**batch_dict)
+            embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
             # Normalize embeddings
             embeddings = F.normalize(embeddings, p=2, dim=1)