Spaces:

OmarHusseinZaki
/

vid-to-notes-backend

Running

App Files Files Community

OmarHusseinZaki commited on 18 days ago

Commit

03986eb

1 Parent(s): 163b772

add transcribing audio

Browse files

Files changed (1) hide show

main.py +31 -2

main.py CHANGED Viewed

@@ -73,7 +73,7 @@ class ProcessRequest(BaseModel):
 def download_audio_bytes(youtube_url: str) -> bytes:
     # Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
     print(f"Attempting to download audio for: {youtube_url}")
     ydl_opts = {
         'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
@@ -152,4 +152,33 @@ def download_audio_bytes(youtube_url: str) -> bytes:
     except Exception as e:
         print(f"ERROR: Unexpected error during audio download: {e}")
         # Log the full traceback here in a real app: import traceback; traceback.print_exc()
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")

 def download_audio_bytes(youtube_url: str) -> bytes:
     # Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
     print(f"Attempting to download audio for: {youtube_url}")
     ydl_opts = {
         'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
     except Exception as e:
         print(f"ERROR: Unexpected error during audio download: {e}")
         # Log the full traceback here in a real app: import traceback; traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")
+def transcribe_audio(audio_bytes: bytes) -> str:
+    """
+    Sends audio bytes to the Hugging Face ASR (Automatic Speech Recognition) API.
+    """
+    if not hf_inference:
+        raise HTTPException(status_code=503, detail="Transcription service client not initialized.")
+    if not audio_bytes:
+        raise ValueError("Cannot transcribe empty audio data.")
+    print(f"Transcribing {len(audio_bytes) / (1024*1024):.2f} MB using {ASR_MODEL}...")
+    try:
+        # Use the InferenceClient for ASR task
+        # It expects the raw audio bytes
+        transcript_result = hf_inference.automatic_speech_recognition(
+            audio=audio_bytes,
+            model=ASR_MODEL
+        )
+        transcript = transcript_result.get('text', '').strip() # Get text, default to '', remove whitespace
+        if not transcript:
+            print("Warning: Transcription result was empty.")
+            # Decide: return empty string or raise error? Let's return empty for now.
+        print("Transcription successful.")
+        return transcript
+    except Exception as e:
+        print(f"ERROR: Hugging Face ASR API call failed: {e}")
+        # Check for specific HF error types if possible
+        raise HTTPException(status_code=503, detail=f"Transcription service failed: {e}") # 503 Service Unavailable