tezuesh
/

moshi_general

@@ -69,7 +69,7 @@ class InferenceRecipe:
         """Load and preprocess audio."""
         try:
             # Convert to tensor
-            wav = torch.from_numpy(audio_array).float().unsqueeze(0)
             # Resample if needed
             if sample_rate != self.sample_rate:
@@ -93,15 +93,15 @@ class InferenceRecipe:
             raise
     def _pad_codes(self, all_codes, time_seconds=30):
-        """Pad codes to minimum length if needed."""
         try:
             min_frames = int(time_seconds * self.frame_rate)
             frame_size = int(self.sample_rate / self.frame_rate)
             if len(all_codes) < min_frames:
                 frames_to_add = min_frames - len(all_codes)
                 logger.info(f"Padding {frames_to_add} frames to reach minimum length")
                 with torch.no_grad(), self.mimi.streaming(batch_size=1):
                     chunk = torch.zeros(1, 1, frame_size, dtype=torch.float32, device=self.device)
                     for _ in range(frames_to_add):
                         additional_code = self.mimi.encode(chunk)
@@ -137,15 +137,17 @@ class InferenceRecipe:
         """Run a warmup pass."""
         try:
             frame_size = int(self.sample_rate / self.frame_rate)
-            chunk = torch.zeros(1, 1, frame_size, dtype=torch.float32)
-            codes = self.mimi.encode(chunk)
             with torch.no_grad(), self.lm_gen.streaming(1), self.mimi.streaming(1):
                 tokens = self.lm_gen.step(codes[:, :, 0:1])
                 if tokens is not None:
                     _ = self.mimi.decode(tokens[:, 1:])
-            torch.cuda.synchronize()
             logger.info("Warmup pass completed")
         except Exception as e:

         """Load and preprocess audio."""
         try:
             # Convert to tensor
+            wav = torch.from_numpy(audio_array).float().unsqueeze(0).to(self.device)
             # Resample if needed
             if sample_rate != self.sample_rate:
             raise
     def _pad_codes(self, all_codes, time_seconds=30):
         try:
             min_frames = int(time_seconds * self.frame_rate)
             frame_size = int(self.sample_rate / self.frame_rate)
             if len(all_codes) < min_frames:
                 frames_to_add = min_frames - len(all_codes)
                 logger.info(f"Padding {frames_to_add} frames to reach minimum length")
                 with torch.no_grad(), self.mimi.streaming(batch_size=1):
+                    # Create tensor on the correct device
                     chunk = torch.zeros(1, 1, frame_size, dtype=torch.float32, device=self.device)
                     for _ in range(frames_to_add):
                         additional_code = self.mimi.encode(chunk)
         """Run a warmup pass."""
         try:
             frame_size = int(self.sample_rate / self.frame_rate)
+            # Create tensor on the correct device from the start
+            chunk = torch.zeros(1, 1, frame_size, dtype=torch.float32, device=self.device)
             with torch.no_grad(), self.lm_gen.streaming(1), self.mimi.streaming(1):
+                codes = self.mimi.encode(chunk)  # chunk already on correct device
                 tokens = self.lm_gen.step(codes[:, :, 0:1])
                 if tokens is not None:
                     _ = self.mimi.decode(tokens[:, 1:])
+            if self.device.type == 'cuda':
+                torch.cuda.synchronize()
             logger.info("Warmup pass completed")
         except Exception as e: