Spaces:
Running
Running
Commit
·
b6462d6
1
Parent(s):
3c2ab21
Initial
Browse files
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
# Standard library imports
|
2 |
import os
|
|
|
3 |
|
4 |
# Related third-party imports
|
|
|
5 |
from omegaconf import OmegaConf
|
6 |
from nemo.collections.asr.models.msdd_models import NeuralDiarizer
|
7 |
|
@@ -17,7 +19,7 @@ from src.audio.analysis import WordSpeakerMapper, SentenceSpeakerMapper, Audio
|
|
17 |
from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
|
18 |
from src.text.utils import Annotator
|
19 |
from src.text.llm import LLMOrchestrator, LLMResultHandler
|
20 |
-
from src.utils.utils import Cleaner
|
21 |
from src.db.manager import Database
|
22 |
|
23 |
|
@@ -74,7 +76,7 @@ async def main(audio_file_path: str):
|
|
74 |
# Step 1: Detect Dialogue
|
75 |
has_dialogue = dialogue_detector.process(audio_file_path)
|
76 |
if not has_dialogue:
|
77 |
-
return {"error": "No dialogue detected in
|
78 |
|
79 |
# Step 2: Speech Enhancement
|
80 |
audio_path = enhancer.enhance_audio(
|
@@ -197,7 +199,7 @@ async def main(audio_file_path: str):
|
|
197 |
final_output = annotator.finalize()
|
198 |
|
199 |
# Step 16: Total Silence Calculation
|
200 |
-
stats = SilenceStats.from_segments(final_output[
|
201 |
t_std = stats.threshold_std(factor=0.99)
|
202 |
final_output["silence"] = t_std
|
203 |
|
@@ -272,24 +274,41 @@ async def main(audio_file_path: str):
|
|
272 |
|
273 |
return final_output
|
274 |
|
275 |
-
async def process(path: str):
|
276 |
-
"""
|
277 |
-
Asynchronous callback function that is triggered when a new audio file is detected.
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
The path to the newly created audio file.
|
283 |
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
"""
|
288 |
-
|
289 |
-
|
|
|
|
|
290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
if __name__ == "__main__":
|
293 |
-
|
294 |
-
# Watcher.start_watcher(directory_to_watch, process)
|
295 |
-
pass
|
|
|
1 |
# Standard library imports
|
2 |
import os
|
3 |
+
import asyncio
|
4 |
|
5 |
# Related third-party imports
|
6 |
+
import gradio as gr
|
7 |
from omegaconf import OmegaConf
|
8 |
from nemo.collections.asr.models.msdd_models import NeuralDiarizer
|
9 |
|
|
|
19 |
from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
|
20 |
from src.text.utils import Annotator
|
21 |
from src.text.llm import LLMOrchestrator, LLMResultHandler
|
22 |
+
from src.utils.utils import Cleaner
|
23 |
from src.db.manager import Database
|
24 |
|
25 |
|
|
|
76 |
# Step 1: Detect Dialogue
|
77 |
has_dialogue = dialogue_detector.process(audio_file_path)
|
78 |
if not has_dialogue:
|
79 |
+
return {"error": "No dialogue detected in this audio."}
|
80 |
|
81 |
# Step 2: Speech Enhancement
|
82 |
audio_path = enhancer.enhance_audio(
|
|
|
199 |
final_output = annotator.finalize()
|
200 |
|
201 |
# Step 16: Total Silence Calculation
|
202 |
+
stats = SilenceStats.from_segments(final_output["ssm"])
|
203 |
t_std = stats.threshold_std(factor=0.99)
|
204 |
final_output["silence"] = t_std
|
205 |
|
|
|
274 |
|
275 |
return final_output
|
276 |
|
|
|
|
|
|
|
277 |
|
278 |
+
def process_audio(uploaded_audio):
|
279 |
+
"""
|
280 |
+
Synchronous wrapper for Gradio.
|
|
|
281 |
|
282 |
+
1. Save the incoming audio to a temporary file.
|
283 |
+
2. Run the `main` pipeline (async) via `asyncio.run`.
|
284 |
+
3. Return the result so Gradio can display it.
|
285 |
"""
|
286 |
+
if uploaded_audio is None:
|
287 |
+
return {"error": "No audio provided."}
|
288 |
+
|
289 |
+
in_file_path = uploaded_audio
|
290 |
|
291 |
+
try:
|
292 |
+
result = asyncio.run(main(in_file_path))
|
293 |
+
return result
|
294 |
+
except Exception as e:
|
295 |
+
return {"error": str(e)}
|
296 |
+
|
297 |
+
|
298 |
+
with gr.Blocks() as demo:
|
299 |
+
gr.Markdown("## Diarization, Transcription & Analysis")
|
300 |
+
|
301 |
+
with gr.Row():
|
302 |
+
audio_input = gr.Audio(type="filepath", label="Upload your audio")
|
303 |
+
output_display = gr.JSON(label="Final Output")
|
304 |
+
|
305 |
+
submit_btn = gr.Button("Process")
|
306 |
+
|
307 |
+
submit_btn.click(
|
308 |
+
fn=process_audio,
|
309 |
+
inputs=audio_input,
|
310 |
+
outputs=output_display
|
311 |
+
)
|
312 |
|
313 |
if __name__ == "__main__":
|
314 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|