Spaces:

mangoesai
/

Pyannote_diarization

Running on T4

App Files Files Community

Y-Mangoes commited on 10 days ago

Commit

c54eecb

verified ·

1 Parent(s): 14caf39

Create app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import torch
+from pyannote.audio import Pipeline
+from pyannote.core import Segment, Annotation
+import os
+from huggingface_hub import login
+import tempfile
+# Authenticate with Hugging Face
+HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN:
+    login(token=HF_TOKEN)
+else:
+    raise ValueError("HF_TOKEN environment variable not set. Please set it in Hugging Face Space settings.")
+# Initialize the pyannote pipeline with GPU support
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+pipeline = Pipeline.from_pretrained(
+    "pyannote/speaker-diarization-3.1",
+    use_auth_token=HF_TOKEN
+).to(device)
+def diarize_audio(audio_file):
+    try:
+        # Verify audio file format
+        if not audio_file.endswith('.wav'):
+            return "Error: Please upload a WAV file."
+        # Process the audio file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            temp_file.write(open(audio_file, 'rb').read())
+            temp_file_path = temp_file.name
+        # Perform diarization
+        diarization = pipeline(temp_file_path)
+        # Format the output
+        output = []
+        for turn, _, speaker in diarization.itertracks(yield_label=True):
+            start = turn.start
+            end = turn.end
+            output.append(f"Speaker {speaker}: {start:.1f}s - {end:.1f}s")
+        # Clean up temporary file
+        os.unlink(temp_file_path)
+        # Return formatted results
+        return "\n".join(output) if output else "No speakers detected."
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Create Gradio interface
+iface = gr.Interface(
+    fn=diarize_audio,
+    inputs=gr.Audio(type="filepath", label="Upload WAV Audio File"),
+    outputs=gr.Textbox(label="Diarization Results"),
+    title="Speaker Diarization with pyannote.audio 3.1",
+    description="Upload a WAV audio file to perform speaker diarization. Results show speaker segments with timestamps."
+)
+# Launch the interface
+if __name__ == "__main__":
+    iface.launch()