Y-Mangoes commited on
Commit
c54eecb
·
verified ·
1 Parent(s): 14caf39

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from pyannote.audio import Pipeline
4
+ from pyannote.core import Segment, Annotation
5
+ import os
6
+ from huggingface_hub import login
7
+ import tempfile
8
+
9
+ # Authenticate with Hugging Face
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+ if HF_TOKEN:
12
+ login(token=HF_TOKEN)
13
+ else:
14
+ raise ValueError("HF_TOKEN environment variable not set. Please set it in Hugging Face Space settings.")
15
+
16
+ # Initialize the pyannote pipeline with GPU support
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ pipeline = Pipeline.from_pretrained(
19
+ "pyannote/speaker-diarization-3.1",
20
+ use_auth_token=HF_TOKEN
21
+ ).to(device)
22
+
23
+ def diarize_audio(audio_file):
24
+ try:
25
+ # Verify audio file format
26
+ if not audio_file.endswith('.wav'):
27
+ return "Error: Please upload a WAV file."
28
+
29
+ # Process the audio file
30
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
31
+ temp_file.write(open(audio_file, 'rb').read())
32
+ temp_file_path = temp_file.name
33
+
34
+ # Perform diarization
35
+ diarization = pipeline(temp_file_path)
36
+
37
+ # Format the output
38
+ output = []
39
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
40
+ start = turn.start
41
+ end = turn.end
42
+ output.append(f"Speaker {speaker}: {start:.1f}s - {end:.1f}s")
43
+
44
+ # Clean up temporary file
45
+ os.unlink(temp_file_path)
46
+
47
+ # Return formatted results
48
+ return "\n".join(output) if output else "No speakers detected."
49
+
50
+ except Exception as e:
51
+ return f"Error processing audio: {str(e)}"
52
+
53
+ # Create Gradio interface
54
+ iface = gr.Interface(
55
+ fn=diarize_audio,
56
+ inputs=gr.Audio(type="filepath", label="Upload WAV Audio File"),
57
+ outputs=gr.Textbox(label="Diarization Results"),
58
+ title="Speaker Diarization with pyannote.audio 3.1",
59
+ description="Upload a WAV audio file to perform speaker diarization. Results show speaker segments with timestamps."
60
+ )
61
+
62
+ # Launch the interface
63
+ if __name__ == "__main__":
64
+ iface.launch()