Spaces:

OmarHusseinZaki
/

vid-to-notes-backend

Running

App Files Files Community

OmarHusseinZaki commited on 17 days ago

Commit

fb1ef83

1 Parent(s): c1ef5cd

adjust indentiation agaon

Browse files

Files changed (1) hide show

main.py +50 -51

main.py CHANGED Viewed

@@ -71,10 +71,9 @@ class ProcessRequest(BaseModel):
     youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
 def download_audio_bytes(youtube_url: str) -> bytes:
-    """
-    Downloads the best audio-only format from a YouTube URL using yt-dlp
-    and returns the raw audio data as bytes.
-    """
     print(f"Attempting to download audio for: {youtube_url}")
     ydl_opts = {
         'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
@@ -96,53 +95,53 @@ def download_audio_bytes(youtube_url: str) -> bytes:
         # Use yt-dlp's ability to write to a file-like object
         ydl_opts['outtmpl'] = '-' # Special template meaning stdout
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                # Trick: Use a hook to capture stdout to our buffer instead of printing
-                # This is complex; simpler method below is preferred if ffmpeg isn't used
-                # Or, a better way: get the direct audio URL first
-                # --- Simpler & Often Better Approach: Get URL, then download with requests ---
-                info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
-                best_audio_format = None
-                for f in info.get('formats', []):
-                    # Look for formats processed by FFmpegExtractAudio or good audio codecs
-                    if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
-                        if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
-                            best_audio_format = f
-                            break # Take the first good one
-                # Fallback if no ideal format found
-                if not best_audio_format:
-                    for f in info.get('formats', []):
-                        if f.get('acodec') != 'none':
-                            best_audio_format = f
-                            break # Take first available audio
-                if not best_audio_format or 'url' not in best_audio_format:
-                    print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
-                    # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
-                    # Let's try the download anyway, it might work for some native formats
-                    # This path is less reliable without guaranteed ffmpeg.
-                    error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
-                    # This part is complex - capturing output might need more work if direct URL fetch failed.
-                    # Let's raise an error if we couldn't get a direct URL for now.
-                    raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
-                audio_url = best_audio_format['url']
-                format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
-                print(f"Found audio format: {format_note}. Downloading directly from URL...")
-                # Download the audio URL content into the buffer
-                with requests.get(audio_url, stream=True) as r:
-                    r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
-                    for chunk in r.iter_content(chunk_size=8192):
-                        buffer.write(chunk)
-                audio_bytes = buffer.getvalue()
-                print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
-                if not audio_bytes:
-                    raise ValueError("Downloaded audio data is empty.")
-                return audio_bytes
     except yt_dlp.utils.DownloadError as e:
         print(f"ERROR: yt-dlp failed to download or process audio: {e}")

     youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
 def download_audio_bytes(youtube_url: str) -> bytes:
+    # Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
     print(f"Attempting to download audio for: {youtube_url}")
     ydl_opts = {
         'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
         # Use yt-dlp's ability to write to a file-like object
         ydl_opts['outtmpl'] = '-' # Special template meaning stdout
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+             # Trick: Use a hook to capture stdout to our buffer instead of printing
+             # This is complex; simpler method below is preferred if ffmpeg isn't used
+             # Or, a better way: get the direct audio URL first
+             # --- Simpler & Often Better Approach: Get URL, then download with requests ---
+             info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
+             best_audio_format = None
+             for f in info.get('formats', []):
+                 # Look for formats processed by FFmpegExtractAudio or good audio codecs
+                 if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
+                     if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
+                         best_audio_format = f
+                         break # Take the first good one
+             # Fallback if no ideal format found
+             if not best_audio_format:
+                 for f in info.get('formats', []):
+                     if f.get('acodec') != 'none':
+                         best_audio_format = f
+                         break # Take first available audio
+             if not best_audio_format or 'url' not in best_audio_format:
+                 print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
+                 # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
+                 # Let's try the download anyway, it might work for some native formats
+                 # This path is less reliable without guaranteed ffmpeg.
+                 error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
+                 # This part is complex - capturing output might need more work if direct URL fetch failed.
+                 # Let's raise an error if we couldn't get a direct URL for now.
+                 raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
+             audio_url = best_audio_format['url']
+             format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
+             print(f"Found audio format: {format_note}. Downloading directly from URL...")
+             # Download the audio URL content into the buffer
+             with requests.get(audio_url, stream=True) as r:
+                 r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
+                 for chunk in r.iter_content(chunk_size=8192):
+                     buffer.write(chunk)
+             audio_bytes = buffer.getvalue()
+             print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
+             if not audio_bytes:
+                 raise ValueError("Downloaded audio data is empty.")
+             return audio_bytes
     except yt_dlp.utils.DownloadError as e:
         print(f"ERROR: yt-dlp failed to download or process audio: {e}")