Spaces:

OmarHusseinZaki
/

vid-to-notes-backend

Running

App Files Files Community

OmarHusseinZaki commited on 17 days ago

Commit

c1ef5cd

1 Parent(s): 3faf1d8

add indentation

Browse files

Files changed (1) hide show

main.py +82 -82

main.py CHANGED Viewed

@@ -71,86 +71,86 @@ class ProcessRequest(BaseModel):
     youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
 def download_audio_bytes(youtube_url: str) -> bytes:
-"""
-Downloads the best audio-only format from a YouTube URL using yt-dlp
-and returns the raw audio data as bytes.
-"""
-print(f"Attempting to download audio for: {youtube_url}")
-ydl_opts = {
-    'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
-    'noplaylist': True,       # Don't download playlist if URL is part of one
-    'quiet': True,            # Suppress yt-dlp console output
-    'no_warnings': True,
-    'postprocessors': [{       # Use ffmpeg (if installed) to extract audio if needed
-        'key': 'FFmpegExtractAudio',
-        'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
-        'preferredquality': '128', # Lower quality = smaller file = faster processing
-    }],
-    # Limit duration - uncomment and adjust if needed to prevent very long processing
-    # 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
-}
-buffer = io.BytesIO() # Create an in-memory binary buffer
-try:
-    # Use yt-dlp's ability to write to a file-like object
-    ydl_opts['outtmpl'] = '-' # Special template meaning stdout
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            # Trick: Use a hook to capture stdout to our buffer instead of printing
-            # This is complex; simpler method below is preferred if ffmpeg isn't used
-            # Or, a better way: get the direct audio URL first
-            # --- Simpler & Often Better Approach: Get URL, then download with requests ---
-            info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
-            best_audio_format = None
-            for f in info.get('formats', []):
-                # Look for formats processed by FFmpegExtractAudio or good audio codecs
-                if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
-                    if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
-                        best_audio_format = f
-                        break # Take the first good one
-            # Fallback if no ideal format found
-            if not best_audio_format:
                 for f in info.get('formats', []):
-                    if f.get('acodec') != 'none':
-                        best_audio_format = f
-                        break # Take first available audio
-            if not best_audio_format or 'url' not in best_audio_format:
-                print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
-                # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
-                # Let's try the download anyway, it might work for some native formats
-                # This path is less reliable without guaranteed ffmpeg.
-                error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
-                # This part is complex - capturing output might need more work if direct URL fetch failed.
-                # Let's raise an error if we couldn't get a direct URL for now.
-                raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
-            audio_url = best_audio_format['url']
-            format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
-            print(f"Found audio format: {format_note}. Downloading directly from URL...")
-            # Download the audio URL content into the buffer
-            with requests.get(audio_url, stream=True) as r:
-                r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
-                for chunk in r.iter_content(chunk_size=8192):
-                    buffer.write(chunk)
-            audio_bytes = buffer.getvalue()
-            print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
-            if not audio_bytes:
-                raise ValueError("Downloaded audio data is empty.")
-            return audio_bytes
-except yt_dlp.utils.DownloadError as e:
-    print(f"ERROR: yt-dlp failed to download or process audio: {e}")
-    raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
-except requests.exceptions.RequestException as e:
-    print(f"ERROR: Failed to download audio stream from URL: {e}")
-    raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
-except Exception as e:
-    print(f"ERROR: Unexpected error during audio download: {e}")
-    # Log the full traceback here in a real app: import traceback; traceback.print_exc()
-    raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")

     youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
 def download_audio_bytes(youtube_url: str) -> bytes:
+    """
+    Downloads the best audio-only format from a YouTube URL using yt-dlp
+    and returns the raw audio data as bytes.
+    """
+    print(f"Attempting to download audio for: {youtube_url}")
+    ydl_opts = {
+        'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
+        'noplaylist': True,       # Don't download playlist if URL is part of one
+        'quiet': True,            # Suppress yt-dlp console output
+        'no_warnings': True,
+        'postprocessors': [{       # Use ffmpeg (if installed) to extract audio if needed
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
+            'preferredquality': '128', # Lower quality = smaller file = faster processing
+        }],
+        # Limit duration - uncomment and adjust if needed to prevent very long processing
+        # 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
+    }
+    buffer = io.BytesIO() # Create an in-memory binary buffer
+    try:
+        # Use yt-dlp's ability to write to a file-like object
+        ydl_opts['outtmpl'] = '-' # Special template meaning stdout
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                # Trick: Use a hook to capture stdout to our buffer instead of printing
+                # This is complex; simpler method below is preferred if ffmpeg isn't used
+                # Or, a better way: get the direct audio URL first
+                # --- Simpler & Often Better Approach: Get URL, then download with requests ---
+                info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
+                best_audio_format = None
                 for f in info.get('formats', []):
+                    # Look for formats processed by FFmpegExtractAudio or good audio codecs
+                    if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
+                        if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
+                            best_audio_format = f
+                            break # Take the first good one
+                # Fallback if no ideal format found
+                if not best_audio_format:
+                    for f in info.get('formats', []):
+                        if f.get('acodec') != 'none':
+                            best_audio_format = f
+                            break # Take first available audio
+                if not best_audio_format or 'url' not in best_audio_format:
+                    print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
+                    # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
+                    # Let's try the download anyway, it might work for some native formats
+                    # This path is less reliable without guaranteed ffmpeg.
+                    error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
+                    # This part is complex - capturing output might need more work if direct URL fetch failed.
+                    # Let's raise an error if we couldn't get a direct URL for now.
+                    raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
+                audio_url = best_audio_format['url']
+                format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
+                print(f"Found audio format: {format_note}. Downloading directly from URL...")
+                # Download the audio URL content into the buffer
+                with requests.get(audio_url, stream=True) as r:
+                    r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
+                    for chunk in r.iter_content(chunk_size=8192):
+                        buffer.write(chunk)
+                audio_bytes = buffer.getvalue()
+                print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
+                if not audio_bytes:
+                    raise ValueError("Downloaded audio data is empty.")
+                return audio_bytes
+    except yt_dlp.utils.DownloadError as e:
+        print(f"ERROR: yt-dlp failed to download or process audio: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
+    except requests.exceptions.RequestException as e:
+        print(f"ERROR: Failed to download audio stream from URL: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
+    except Exception as e:
+        print(f"ERROR: Unexpected error during audio download: {e}")
+        # Log the full traceback here in a real app: import traceback; traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")