whisper-tg

Running

App Files Files Community

muhtasham commited on Mar 23

Commit

020bd94

1 Parent(s): 0844bdd

WIP

Browse files

Files changed (2) hide show

app.py +138 -2
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -3,9 +3,15 @@ import subprocess
 import datetime
 import tempfile
 import requests
 from loguru import logger
-API_URL = "https://skdpcqcdd929o4k3.us-east-1.aws.endpoints.huggingface.cloud"
 headers = {
     "Accept": "application/json",
     "Content-Type": "audio/flac"
@@ -109,6 +115,114 @@ def check_ffmpeg():
 # Initialize ffmpeg check
 check_ffmpeg()
 def transcribe(inputs, return_timestamps, generate_subs):
     """Transcribe audio input using Whisper model via Hugging Face Inference API.
@@ -194,6 +308,25 @@ def transcribe(inputs, return_timestamps, generate_subs):
 demo = gr.Blocks(theme=gr.themes.Ocean())
 # Define interfaces first
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
@@ -234,7 +367,10 @@ file_transcribe = gr.Interface(
 # Then set up the demo with the interfaces
 with demo:
-    gr.TabbedInterface([file_transcribe, mf_transcribe], ["Audio file", "Microphone"])
 logger.info("Starting Gradio interface")
 demo.queue().launch(ssr_mode=False)

 import datetime
 import tempfile
 import requests
+import os
+import time
 from loguru import logger
+# Load API keys from environment variables
+API_URL = os.getenv("API_URL")
+SIEVE_API_KEY = os.getenv("SIEVE_API_KEY")
+SIEVE_API_URL = "https://mango.sievedata.com/v2"
 headers = {
     "Accept": "application/json",
     "Content-Type": "audio/flac"
 # Initialize ffmpeg check
 check_ffmpeg()
+def download_youtube_audio(url):
+    """Download audio from YouTube using Sieve API.
+    Args:
+        url (str): YouTube video URL
+    Returns:
+        str: Path to downloaded audio file
+    Raises:
+        gr.Error: If download fails or API key is not set
+    """
+    if not SIEVE_API_KEY:
+        raise gr.Error("SIEVE_API_KEY environment variable is not set")
+    try:
+        # Create a temporary file for the audio
+        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
+        temp_file.close()
+        output_path = temp_file.name
+        # Prepare the request to Sieve API
+        payload = {
+            "function": "sieve/youtube-downloader",
+            "inputs": {
+                "url": url,
+                "download_type": "audio",
+                "audio_format": "mp3",
+                "include_metadata": False,
+                "include_subtitles": False
+            }
+        }
+        # Send request to Sieve API
+        response = requests.post(
+            f"{SIEVE_API_URL}/push",
+            headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
+            json=payload
+        )
+        response.raise_for_status()
+        job_id = response.json().get("id")
+        if not job_id:
+            raise gr.Error("Failed to get job ID from Sieve API")
+        # Poll for job completion
+        while True:
+            job_response = requests.get(
+                f"{SIEVE_API_URL}/jobs/{job_id}",
+                headers={"X-API-Key": SIEVE_API_KEY}
+            )
+            job_response.raise_for_status()
+            job_data = job_response.json()
+            if job_data.get("status") == "completed":
+                # Download the audio file
+                audio_url = job_data.get("output_0", {}).get("url")
+                if not audio_url:
+                    raise gr.Error("No audio URL in job response")
+                audio_response = requests.get(audio_url)
+                audio_response.raise_for_status()
+                with open(output_path, "wb") as f:
+                    f.write(audio_response.content)
+                return output_path
+            elif job_data.get("status") == "failed":
+                raise gr.Error(f"Job failed: {job_data.get('error', 'Unknown error')}")
+            # Wait before polling again
+            time.sleep(2)
+    except Exception as e:
+        logger.exception(f"Error downloading YouTube audio: {str(e)}")
+        raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
+def transcribe_youtube(url, return_timestamps, generate_subs):
+    """Transcribe audio from YouTube video.
+    Args:
+        url (str): YouTube video URL
+        return_timestamps (bool): Whether to include timestamps in output
+        generate_subs (bool): Whether to generate SRT subtitles
+    Returns:
+        tuple: (formatted_result, srt_file, correction_text)
+    """
+    try:
+        # Download audio from YouTube
+        audio_path = download_youtube_audio(url)
+        # Transcribe the downloaded audio
+        result = transcribe(audio_path, return_timestamps, generate_subs)
+        # Clean up the temporary file
+        try:
+            os.unlink(audio_path)
+        except Exception as e:
+            logger.warning(f"Failed to delete temporary file: {str(e)}")
+        return result
+    except Exception as e:
+        logger.exception(f"Error in YouTube transcription: {str(e)}")
+        raise gr.Error(f"Failed to transcribe YouTube video: {str(e)}")
 def transcribe(inputs, return_timestamps, generate_subs):
     """Transcribe audio input using Whisper model via Hugging Face Inference API.
 demo = gr.Blocks(theme=gr.themes.Ocean())
 # Define interfaces first
+youtube_transcribe = gr.Interface(
+    fn=transcribe_youtube,
+    inputs=[
+        gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."),
+        gr.Checkbox(label="Include timestamps", value=True),
+        gr.Checkbox(label="Generate subtitles", value=True),
+    ],
+    outputs=[
+        gr.JSON(label="Transcription", open=True),
+        gr.File(label="Subtitles (SRT)", visible=True),
+    ],
+    title="Tajik Speech Transcription",
+    description=(
+        "Transcribe Tajik language audio from YouTube videos. "
+        "Paste a YouTube URL and get accurate transcription with optional timestamps "
+        "and subtitles."
+    )
+)
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
 # Then set up the demo with the interfaces
 with demo:
+    gr.TabbedInterface(
+        [youtube_transcribe, file_transcribe, mf_transcribe],
+        ["YouTube", "Audio file", "Microphone"]
+    )
 logger.info("Starting Gradio interface")
 demo.queue().launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 loguru
-gradio

 loguru
+gradio
+requests