OmarHusseinZaki commited on
Commit
03986eb
·
1 Parent(s): 163b772

add transcribing audio

Browse files
Files changed (1) hide show
  1. main.py +31 -2
main.py CHANGED
@@ -73,7 +73,7 @@ class ProcessRequest(BaseModel):
73
  def download_audio_bytes(youtube_url: str) -> bytes:
74
 
75
  # Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
76
-
77
  print(f"Attempting to download audio for: {youtube_url}")
78
  ydl_opts = {
79
  'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
@@ -152,4 +152,33 @@ def download_audio_bytes(youtube_url: str) -> bytes:
152
  except Exception as e:
153
  print(f"ERROR: Unexpected error during audio download: {e}")
154
  # Log the full traceback here in a real app: import traceback; traceback.print_exc()
155
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def download_audio_bytes(youtube_url: str) -> bytes:
74
 
75
  # Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
76
+
77
  print(f"Attempting to download audio for: {youtube_url}")
78
  ydl_opts = {
79
  'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
 
152
  except Exception as e:
153
  print(f"ERROR: Unexpected error during audio download: {e}")
154
  # Log the full traceback here in a real app: import traceback; traceback.print_exc()
155
+ raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")
156
+
157
+
158
+ def transcribe_audio(audio_bytes: bytes) -> str:
159
+ """
160
+ Sends audio bytes to the Hugging Face ASR (Automatic Speech Recognition) API.
161
+ """
162
+ if not hf_inference:
163
+ raise HTTPException(status_code=503, detail="Transcription service client not initialized.")
164
+ if not audio_bytes:
165
+ raise ValueError("Cannot transcribe empty audio data.")
166
+
167
+ print(f"Transcribing {len(audio_bytes) / (1024*1024):.2f} MB using {ASR_MODEL}...")
168
+ try:
169
+ # Use the InferenceClient for ASR task
170
+ # It expects the raw audio bytes
171
+ transcript_result = hf_inference.automatic_speech_recognition(
172
+ audio=audio_bytes,
173
+ model=ASR_MODEL
174
+ )
175
+ transcript = transcript_result.get('text', '').strip() # Get text, default to '', remove whitespace
176
+ if not transcript:
177
+ print("Warning: Transcription result was empty.")
178
+ # Decide: return empty string or raise error? Let's return empty for now.
179
+ print("Transcription successful.")
180
+ return transcript
181
+ except Exception as e:
182
+ print(f"ERROR: Hugging Face ASR API call failed: {e}")
183
+ # Check for specific HF error types if possible
184
+ raise HTTPException(status_code=503, detail=f"Transcription service failed: {e}") # 503 Service Unavailable