Spaces:

voxmenthe
/

imdb-sentiment-demo

Sleeping

App Files Files Community

voxmenthe commited on 4 days ago

Commit

e37651e

1 Parent(s): 105a9fa

Update device setting for inference

Browse files

Files changed (1) hide show

inference.py +11 -1

inference.py CHANGED Viewed

@@ -16,6 +16,15 @@ class SentimentInference:
         model_yaml_cfg = config_data.get('model', {})
         inference_yaml_cfg = config_data.get('inference', {})
         model_hf_repo_id = model_yaml_cfg.get('name_or_path')
         tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
         local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
@@ -127,12 +136,13 @@ class SentimentInference:
                     print(f"[INFERENCE_LOG] HUB_LOAD_FALLBACK: Critical error during fallback load: {e_fallback}")
                     raise e_fallback # Re-raise if fallback also fails catastrophically
         self.model.eval()
     def predict(self, text: str) -> Dict[str, Any]:
         inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
         with torch.no_grad():
-            outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
         logits = outputs.get("logits") # Use .get for safety
         if logits is None:
             raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")

         model_yaml_cfg = config_data.get('model', {})
         inference_yaml_cfg = config_data.get('inference', {})
+        # Determine device early
+        if torch.cuda.is_available():
+            self.device = torch.device("cuda")
+        elif torch.backends.mps.is_available(): # Check for MPS (Apple Silicon GPU)
+            self.device = torch.device("mps")
+        else:
+            self.device = torch.device("cpu")
+        print(f"[INFERENCE_LOG] Using device: {self.device}")
         model_hf_repo_id = model_yaml_cfg.get('name_or_path')
         tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
         local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
                     print(f"[INFERENCE_LOG] HUB_LOAD_FALLBACK: Critical error during fallback load: {e_fallback}")
                     raise e_fallback # Re-raise if fallback also fails catastrophically
+        self.model.to(self.device) # Move model to the determined device
         self.model.eval()
     def predict(self, text: str) -> Dict[str, Any]:
         inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
         with torch.no_grad():
+            outputs = self.model(input_ids=inputs['input_ids'].to(self.device), attention_mask=inputs['attention_mask'].to(self.device))
         logits = outputs.get("logits") # Use .get for safety
         if logits is None:
             raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")