bunyaminergen commited on
Commit
b6462d6
·
1 Parent(s): 3c2ab21
Files changed (1) hide show
  1. app.py +37 -18
app.py CHANGED
@@ -1,7 +1,9 @@
1
  # Standard library imports
2
  import os
 
3
 
4
  # Related third-party imports
 
5
  from omegaconf import OmegaConf
6
  from nemo.collections.asr.models.msdd_models import NeuralDiarizer
7
 
@@ -17,7 +19,7 @@ from src.audio.analysis import WordSpeakerMapper, SentenceSpeakerMapper, Audio
17
  from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
18
  from src.text.utils import Annotator
19
  from src.text.llm import LLMOrchestrator, LLMResultHandler
20
- from src.utils.utils import Cleaner, Watcher
21
  from src.db.manager import Database
22
 
23
 
@@ -74,7 +76,7 @@ async def main(audio_file_path: str):
74
  # Step 1: Detect Dialogue
75
  has_dialogue = dialogue_detector.process(audio_file_path)
76
  if not has_dialogue:
77
- return {"error": "No dialogue detected in the audio file."}
78
 
79
  # Step 2: Speech Enhancement
80
  audio_path = enhancer.enhance_audio(
@@ -197,7 +199,7 @@ async def main(audio_file_path: str):
197
  final_output = annotator.finalize()
198
 
199
  # Step 16: Total Silence Calculation
200
- stats = SilenceStats.from_segments(final_output['ssm'])
201
  t_std = stats.threshold_std(factor=0.99)
202
  final_output["silence"] = t_std
203
 
@@ -272,24 +274,41 @@ async def main(audio_file_path: str):
272
 
273
  return final_output
274
 
275
- async def process(path: str):
276
- """
277
- Asynchronous callback function that is triggered when a new audio file is detected.
278
 
279
- Parameters
280
- ----------
281
- path : str
282
- The path to the newly created audio file.
283
 
284
- Returns
285
- -------
286
- None
287
  """
288
- print(f"Processing new audio file: {path}")
289
- await main(path)
 
 
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  if __name__ == "__main__":
293
- directory_to_watch = ".data/input"
294
- # Watcher.start_watcher(directory_to_watch, process)
295
- pass
 
1
  # Standard library imports
2
  import os
3
+ import asyncio
4
 
5
  # Related third-party imports
6
+ import gradio as gr
7
  from omegaconf import OmegaConf
8
  from nemo.collections.asr.models.msdd_models import NeuralDiarizer
9
 
 
19
  from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
20
  from src.text.utils import Annotator
21
  from src.text.llm import LLMOrchestrator, LLMResultHandler
22
+ from src.utils.utils import Cleaner
23
  from src.db.manager import Database
24
 
25
 
 
76
  # Step 1: Detect Dialogue
77
  has_dialogue = dialogue_detector.process(audio_file_path)
78
  if not has_dialogue:
79
+ return {"error": "No dialogue detected in this audio."}
80
 
81
  # Step 2: Speech Enhancement
82
  audio_path = enhancer.enhance_audio(
 
199
  final_output = annotator.finalize()
200
 
201
  # Step 16: Total Silence Calculation
202
+ stats = SilenceStats.from_segments(final_output["ssm"])
203
  t_std = stats.threshold_std(factor=0.99)
204
  final_output["silence"] = t_std
205
 
 
274
 
275
  return final_output
276
 
 
 
 
277
 
278
+ def process_audio(uploaded_audio):
279
+ """
280
+ Synchronous wrapper for Gradio.
 
281
 
282
+ 1. Save the incoming audio to a temporary file.
283
+ 2. Run the `main` pipeline (async) via `asyncio.run`.
284
+ 3. Return the result so Gradio can display it.
285
  """
286
+ if uploaded_audio is None:
287
+ return {"error": "No audio provided."}
288
+
289
+ in_file_path = uploaded_audio
290
 
291
+ try:
292
+ result = asyncio.run(main(in_file_path))
293
+ return result
294
+ except Exception as e:
295
+ return {"error": str(e)}
296
+
297
+
298
+ with gr.Blocks() as demo:
299
+ gr.Markdown("## Diarization, Transcription & Analysis")
300
+
301
+ with gr.Row():
302
+ audio_input = gr.Audio(type="filepath", label="Upload your audio")
303
+ output_display = gr.JSON(label="Final Output")
304
+
305
+ submit_btn = gr.Button("Process")
306
+
307
+ submit_btn.click(
308
+ fn=process_audio,
309
+ inputs=audio_input,
310
+ outputs=output_display
311
+ )
312
 
313
  if __name__ == "__main__":
314
+ demo.launch(server_name="0.0.0.0", server_port=7860)