Spaces:

bunyaminergen
/

CallyticsDemo

Running

App Files Files Community

bunyaminergen commited on 24 days ago

Commit

b6462d6

1 Parent(s): 3c2ab21

Initial

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # Standard library imports
 import os
 # Related third-party imports
 from omegaconf import OmegaConf
 from nemo.collections.asr.models.msdd_models import NeuralDiarizer
@@ -17,7 +19,7 @@ from src.audio.analysis import WordSpeakerMapper, SentenceSpeakerMapper, Audio
 from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
 from src.text.utils import Annotator
 from src.text.llm import LLMOrchestrator, LLMResultHandler
-from src.utils.utils import Cleaner, Watcher
 from src.db.manager import Database
@@ -74,7 +76,7 @@ async def main(audio_file_path: str):
     # Step 1: Detect Dialogue
     has_dialogue = dialogue_detector.process(audio_file_path)
     if not has_dialogue:
-        return {"error": "No dialogue detected in the audio file."}
     # Step 2: Speech Enhancement
     audio_path = enhancer.enhance_audio(
@@ -197,7 +199,7 @@ async def main(audio_file_path: str):
     final_output = annotator.finalize()
     # Step 16: Total Silence Calculation
-    stats = SilenceStats.from_segments(final_output['ssm'])
     t_std = stats.threshold_std(factor=0.99)
     final_output["silence"] = t_std
@@ -272,24 +274,41 @@ async def main(audio_file_path: str):
     return final_output
-async def process(path: str):
-    """
-    Asynchronous callback function that is triggered when a new audio file is detected.
-    Parameters
-    ----------
-    path : str
-        The path to the newly created audio file.
-    Returns
-    -------
-    None
     """
-    print(f"Processing new audio file: {path}")
-    await main(path)
 if __name__ == "__main__":
-    directory_to_watch = ".data/input"
-    # Watcher.start_watcher(directory_to_watch, process)
-    pass

 # Standard library imports
 import os
+import asyncio
 # Related third-party imports
+import gradio as gr
 from omegaconf import OmegaConf
 from nemo.collections.asr.models.msdd_models import NeuralDiarizer
 from src.audio.processing import AudioProcessor, Transcriber, PunctuationRestorer
 from src.text.utils import Annotator
 from src.text.llm import LLMOrchestrator, LLMResultHandler
+from src.utils.utils import Cleaner
 from src.db.manager import Database
     # Step 1: Detect Dialogue
     has_dialogue = dialogue_detector.process(audio_file_path)
     if not has_dialogue:
+        return {"error": "No dialogue detected in this audio."}
     # Step 2: Speech Enhancement
     audio_path = enhancer.enhance_audio(
     final_output = annotator.finalize()
     # Step 16: Total Silence Calculation
+    stats = SilenceStats.from_segments(final_output["ssm"])
     t_std = stats.threshold_std(factor=0.99)
     final_output["silence"] = t_std
     return final_output
+def process_audio(uploaded_audio):
+    """
+    Synchronous wrapper for Gradio.
+    1. Save the incoming audio to a temporary file.
+    2. Run the `main` pipeline (async) via `asyncio.run`.
+    3. Return the result so Gradio can display it.
     """
+    if uploaded_audio is None:
+        return {"error": "No audio provided."}
+    in_file_path = uploaded_audio
+    try:
+        result = asyncio.run(main(in_file_path))
+        return result
+    except Exception as e:
+        return {"error": str(e)}
+with gr.Blocks() as demo:
+    gr.Markdown("## Diarization, Transcription & Analysis")
+    with gr.Row():
+        audio_input = gr.Audio(type="filepath", label="Upload your audio")
+        output_display = gr.JSON(label="Final Output")
+    submit_btn = gr.Button("Process")
+    submit_btn.click(
+        fn=process_audio,
+        inputs=audio_input,
+        outputs=output_display
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)