KIFF
/

pyannote-speaker-diarization-endpoint

Voice Activity Detection

pyannote-audio-pipeline

speaker-diarization

speaker-change-detection

overlapped-speech-detection

Model card Files Files and versions Community

KIFF commited on Dec 29, 2024

Commit

a4109dd

·

verified ·

1 Parent(s): c06fa1f

Update handler.py

Files changed (1) hide show

handler.py +20 -6

handler.py CHANGED Viewed

@@ -1,28 +1,41 @@
 from pyannote.audio import Pipeline, Audio
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
-        # initialize pretrained pipeline
-        self._pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
-        # send pipeline to GPU if available
         if torch.cuda.is_available():
             self._pipeline.to(torch.device("cuda"))
-        # initialize audio reader
         self._io = Audio()
     def __call__(self, data):
         inputs = data.pop("inputs", data)
         waveform, sample_rate = self._io(inputs)
         parameters = data.pop("parameters", dict())
-        diarization = self.pipeline(
             {"waveform": waveform, "sample_rate": sample_rate}, **parameters
         )
         processed_diarization = [
             {
                 "speaker": speaker,
@@ -32,4 +45,5 @@ class EndpointHandler:
             for turn, _, speaker in diarization.itertracks(yield_label=True)
         ]
-        return {"diarization": processed_diarization}

+import os
 from pyannote.audio import Pipeline, Audio
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
+        # Get the Hugging Face authentication token from the environment variable
+        auth_token = os.getenv("MY_KEY")
+        if not auth_token:
+            raise ValueError("Hugging Face authentication token (MY_KEY) is missing.")
+        # Initialize pretrained pipeline with the token
+        self._pipeline = Pipeline.from_pretrained(
+            "pyannote/speaker-diarization-3.1", use_auth_token=auth_token
+        )
+        # Send pipeline to GPU if available
         if torch.cuda.is_available():
             self._pipeline.to(torch.device("cuda"))
+        # Initialize audio reader
         self._io = Audio()
     def __call__(self, data):
+        # Extract inputs from request data
         inputs = data.pop("inputs", data)
         waveform, sample_rate = self._io(inputs)
+        # Extract pipeline parameters if provided
         parameters = data.pop("parameters", dict())
+        # Run speaker diarization
+        diarization = self._pipeline(
             {"waveform": waveform, "sample_rate": sample_rate}, **parameters
         )
+        # Process diarization results
         processed_diarization = [
             {
                 "speaker": speaker,
             for turn, _, speaker in diarization.itertracks(yield_label=True)
         ]
+        # Return results as JSON
+        return {"diarization": processed_diarization}