qqwjq1981 commited on
Commit
6e5afce
·
verified ·
1 Parent(s): 54c4dc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -128,32 +128,33 @@ def handle_feedback(feedback):
128
 
129
  def segment_background_audio(audio_path, output_path="background_segments.wav"):
130
  # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
131
- pipeline = Pipeline.from_pretrained(
132
- "pyannote/voice-activity-detection",
133
- use_auth_token=hf_api_key
134
- )
135
- # Step 3: Run VAD to get speech segments
136
- vad_result = pipeline(audio_path)
137
- print(f"Detected speech segments: {vad_result}")
138
-
139
- # Step 4: Load full audio and subtract speech segments
140
- full_audio = AudioSegment.from_wav(audio_path)
141
- background_audio = AudioSegment.silent(duration=len(full_audio))
142
-
143
- for segment in vad_result.itersegments():
144
- start_ms = int(segment.start * 1000)
145
- end_ms = int(segment.end * 1000)
146
- # Remove speech by muting that portion
147
- background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
148
-
149
- # Step 5: Subtract background_audio from full_audio
150
- result_audio = full_audio.overlay(background_audio)
151
-
152
- # Step 6: Export non-speech segments
153
- result_audio.export(output_path, format="wav")
154
- print(f"Saved non-speech (background) audio to: {output_path}")
155
-
156
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  def transcribe_video_with_speakers(video_path):
159
  # Extract audio from video
 
128
 
129
  def segment_background_audio(audio_path, output_path="background_segments.wav"):
130
  # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  return True
132
+
133
+ # pipeline = Pipeline.from_pretrained(
134
+ # "pyannote/voice-activity-detection",
135
+ # use_auth_token=hf_api_key
136
+ # )
137
+ # # Step 3: Run VAD to get speech segments
138
+ # vad_result = pipeline(audio_path)
139
+ # print(f"Detected speech segments: {vad_result}")
140
+
141
+ # # Step 4: Load full audio and subtract speech segments
142
+ # full_audio = AudioSegment.from_wav(audio_path)
143
+ # background_audio = AudioSegment.silent(duration=len(full_audio))
144
+
145
+ # for segment in vad_result.itersegments():
146
+ # start_ms = int(segment.start * 1000)
147
+ # end_ms = int(segment.end * 1000)
148
+ # # Remove speech by muting that portion
149
+ # background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
150
+
151
+ # # Step 5: Subtract background_audio from full_audio
152
+ # result_audio = full_audio.overlay(background_audio)
153
+
154
+ # # Step 6: Export non-speech segments
155
+ # result_audio.export(output_path, format="wav")
156
+ # print(f"Saved non-speech (background) audio to: {output_path}")
157
+
158
 
159
  def transcribe_video_with_speakers(video_path):
160
  # Extract audio from video