Ben Wiley commited on
Commit
4739174
·
1 Parent(s): a7ab6a9

Adding HF Auth attempt

Browse files
Files changed (1) hide show
  1. app.py +61 -33
app.py CHANGED
@@ -4,14 +4,26 @@ from pyannote.audio import Pipeline
4
  from pyannote.audio.pipelines.utils.hook import ProgressHook
5
  import scipy.io.wavfile
6
  import os
 
 
 
 
7
 
8
 
9
  def perform_separation(audio_file_path: str):
 
 
 
 
 
10
  # Instantiate the pipeline
11
- pipeline = Pipeline.from_pretrained(
12
- "pyannote/speech-separation-ami-1.0",
13
- use_auth_token=HUGGINGFACE_ACCESS_TOKEN,
14
- )
 
 
 
15
 
16
  waveform, sample_rate = torchaudio.load(audio_file_path)
17
 
@@ -40,33 +52,49 @@ def perform_separation(audio_file_path: str):
40
  return output_file_paths, rttm_content
41
 
42
 
43
- def gradio_wrapper(audio_file_path: str):
 
 
 
 
 
44
  output_file_paths, rttm_content = perform_separation(audio_file_path)
45
- return output_file_paths + [rttm_content]
46
-
47
-
48
- inputs = gr.inputs.Audio(label="Input Audio", type="filepath")
49
-
50
- # Dynamic output for audio files
51
- outputs = []
52
- max_speakers = 10 # Set a reasonable maximum number of speakers
53
- for i in range(max_speakers):
54
- outputs.append(gr.outputs.Audio(label=f"Speaker {i+1}", type="filepath"))
55
-
56
- # Add RTTM output
57
- outputs.append(gr.outputs.Textbox(label="RTTM Output"))
58
-
59
- title = "Speech Separation and Diarization"
60
- description = "Gradio demo for Speech Separation and Diarization using Pyannote's pyannote/speech-separation-ami-1.0. To use it, simply upload your audio, or click one of the examples to load them. The app will output separated audio for each speaker and the RTTM file content."
61
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2403.02288' target='_blank'>PixIT: Joint Training of Speaker Diarization and Speech Separation from Real-world Multi-speaker Recordings</a> | <a href='https://huggingface.co/pyannote/speech-separation-ami-1.0' '_blank'>HuggingFace Pipeline</a></p>"
62
- examples = [["samples_audio_samples_test_mixture.wav"]]
63
-
64
- gr.Interface(
65
- gradio_wrapper,
66
- inputs,
67
- outputs,
68
- title=title,
69
- description=description,
70
- article=article,
71
- examples=examples,
72
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
4
  from pyannote.audio.pipelines.utils.hook import ProgressHook
5
  import scipy.io.wavfile
6
  import os
7
+ from huggingface_hub import HfApi
8
+
9
+ # Global variable to store the user's token
10
+ HUGGINGFACE_ACCESS_TOKEN = None
11
 
12
 
13
  def perform_separation(audio_file_path: str):
14
+ global HUGGINGFACE_ACCESS_TOKEN
15
+
16
+ if not HUGGINGFACE_ACCESS_TOKEN:
17
+ return [], "Please log in with your HuggingFace account first."
18
+
19
  # Instantiate the pipeline
20
+ try:
21
+ pipeline = Pipeline.from_pretrained(
22
+ "pyannote/speech-separation-ami-1.0",
23
+ use_auth_token=HUGGINGFACE_ACCESS_TOKEN,
24
+ )
25
+ except Exception as e:
26
+ return [], f"Error loading pipeline: {str(e)}"
27
 
28
  waveform, sample_rate = torchaudio.load(audio_file_path)
29
 
 
52
  return output_file_paths, rttm_content
53
 
54
 
55
+ def gradio_wrapper(audio_file_path: str, request: gr.Request):
56
+ global HUGGINGFACE_ACCESS_TOKEN
57
+
58
+ if not HUGGINGFACE_ACCESS_TOKEN:
59
+ return [""] * 10 + ["Please log in with your HuggingFace account first."]
60
+
61
  output_file_paths, rttm_content = perform_separation(audio_file_path)
62
+ return output_file_paths + [""] * (10 - len(output_file_paths)) + [rttm_content]
63
+
64
+
65
+ def login(request: gr.Request):
66
+ global HUGGINGFACE_ACCESS_TOKEN
67
+
68
+ if request.username:
69
+ # User is authenticated
70
+ HUGGINGFACE_ACCESS_TOKEN = request.auth
71
+ return f"Welcome, {request.username}! You are now logged in."
72
+ else:
73
+ return "Please log in with your HuggingFace account to use this app."
74
+
75
+
76
+ with gr.Blocks() as demo:
77
+ gr.Markdown("## Speech Separation and Diarization")
78
+ gr.Markdown("Please log in with your HuggingFace account to use this app.")
79
+
80
+ login_status = gr.Markdown()
81
+
82
+ with gr.Row():
83
+ input_audio = gr.Audio(label="Input Audio", type="filepath")
84
+
85
+ with gr.Row():
86
+ submit_button = gr.Button("Process Audio")
87
+
88
+ outputs = []
89
+ max_speakers = 10
90
+ for i in range(max_speakers):
91
+ outputs.append(gr.Audio(label=f"Speaker {i+1}", type="filepath"))
92
+
93
+ rttm_output = gr.Textbox(label="RTTM Output")
94
+
95
+ demo.load(login, inputs=None, outputs=login_status)
96
+ submit_button.click(
97
+ gradio_wrapper, inputs=[input_audio], outputs=outputs + [rttm_output]
98
+ )
99
+
100
+ demo.launch(auth={"hf_oauth": True})