inclusionAI
/

Ling-lite

Text Generation

Model card Files Files and versions Community

rbao2018 commited on 12 days ago

Commit

2c3e6be

·

1 Parent(s): 7440363

update

Files changed (1) hide show

modeling_bailing_moe.py +4 -13

modeling_bailing_moe.py CHANGED Viewed

@@ -1438,21 +1438,12 @@ class BailingMoeForCausalLM(BailingMoePreTrainedModel):
     def compute_logit(self, hidden_states):
         if self.norm_head:
-            if self.training:
-                norm_weight = (
-                    self.lm_head.weight / (torch.norm(self.lm_head.weight, p=2, dim=0, keepdim=True) + 1e-7).detach()
-                )
-                logits = F.linear(hidden_states, norm_weight, None)
-            else:
-                self.lm_head.weight.data = (
-                    self.lm_head.weight.data.float()
-                    / (torch.norm(self.lm_head.weight.data.float(), p=2, dim=0, keepdim=True) + 1e-7)
-                ).to(hidden_states.dtype)
-                logits = F.linear(hidden_states, self.lm_head.weight.data, None)
-                self.norm_head = False
         else:
             logits = self.lm_head(hidden_states)
-        return logits
     @add_start_docstrings_to_model_forward(BAILINGMOE_INPUTS_DOCSTRING)
     @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)

     def compute_logit(self, hidden_states):
         if self.norm_head:
+            weight_float = self.lm_head.weight.float()
+            norm = torch.norm(weight_float, p=2, dim=0, keepdim=True).clamp(min=1e-7)
+            norm_weight = (weight_float / norm).to(hidden_states.dtype)
+            logits = F.linear(hidden_states, norm_weight, None)
         else:
             logits = self.lm_head(hidden_states)
     @add_start_docstrings_to_model_forward(BAILINGMOE_INPUTS_DOCSTRING)
     @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)