OmarHusseinZaki commited on
Commit
c1ef5cd
·
1 Parent(s): 3faf1d8

add indentation

Browse files
Files changed (1) hide show
  1. main.py +82 -82
main.py CHANGED
@@ -71,86 +71,86 @@ class ProcessRequest(BaseModel):
71
  youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
72
 
73
  def download_audio_bytes(youtube_url: str) -> bytes:
74
- """
75
- Downloads the best audio-only format from a YouTube URL using yt-dlp
76
- and returns the raw audio data as bytes.
77
- """
78
- print(f"Attempting to download audio for: {youtube_url}")
79
- ydl_opts = {
80
- 'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
81
- 'noplaylist': True, # Don't download playlist if URL is part of one
82
- 'quiet': True, # Suppress yt-dlp console output
83
- 'no_warnings': True,
84
- 'postprocessors': [{ # Use ffmpeg (if installed) to extract audio if needed
85
- 'key': 'FFmpegExtractAudio',
86
- 'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
87
- 'preferredquality': '128', # Lower quality = smaller file = faster processing
88
- }],
89
- # Limit duration - uncomment and adjust if needed to prevent very long processing
90
- # 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
91
- }
92
-
93
- buffer = io.BytesIO() # Create an in-memory binary buffer
94
-
95
- try:
96
- # Use yt-dlp's ability to write to a file-like object
97
- ydl_opts['outtmpl'] = '-' # Special template meaning stdout
98
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
99
- # Trick: Use a hook to capture stdout to our buffer instead of printing
100
- # This is complex; simpler method below is preferred if ffmpeg isn't used
101
- # Or, a better way: get the direct audio URL first
102
-
103
- # --- Simpler & Often Better Approach: Get URL, then download with requests ---
104
- info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
105
- best_audio_format = None
106
- for f in info.get('formats', []):
107
- # Look for formats processed by FFmpegExtractAudio or good audio codecs
108
- if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
109
- if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
110
- best_audio_format = f
111
- break # Take the first good one
112
-
113
- # Fallback if no ideal format found
114
- if not best_audio_format:
115
  for f in info.get('formats', []):
116
- if f.get('acodec') != 'none':
117
- best_audio_format = f
118
- break # Take first available audio
119
-
120
- if not best_audio_format or 'url' not in best_audio_format:
121
- print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
122
- # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
123
- # Let's try the download anyway, it might work for some native formats
124
- # This path is less reliable without guaranteed ffmpeg.
125
- error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
126
- # This part is complex - capturing output might need more work if direct URL fetch failed.
127
- # Let's raise an error if we couldn't get a direct URL for now.
128
- raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
129
-
130
-
131
- audio_url = best_audio_format['url']
132
- format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
133
- print(f"Found audio format: {format_note}. Downloading directly from URL...")
134
-
135
- # Download the audio URL content into the buffer
136
- with requests.get(audio_url, stream=True) as r:
137
- r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
138
- for chunk in r.iter_content(chunk_size=8192):
139
- buffer.write(chunk)
140
-
141
- audio_bytes = buffer.getvalue()
142
- print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
143
- if not audio_bytes:
144
- raise ValueError("Downloaded audio data is empty.")
145
- return audio_bytes
146
-
147
- except yt_dlp.utils.DownloadError as e:
148
- print(f"ERROR: yt-dlp failed to download or process audio: {e}")
149
- raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
150
- except requests.exceptions.RequestException as e:
151
- print(f"ERROR: Failed to download audio stream from URL: {e}")
152
- raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
153
- except Exception as e:
154
- print(f"ERROR: Unexpected error during audio download: {e}")
155
- # Log the full traceback here in a real app: import traceback; traceback.print_exc()
156
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")
 
 
 
 
 
 
 
 
 
 
71
  youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
72
 
73
  def download_audio_bytes(youtube_url: str) -> bytes:
74
+ """
75
+ Downloads the best audio-only format from a YouTube URL using yt-dlp
76
+ and returns the raw audio data as bytes.
77
+ """
78
+ print(f"Attempting to download audio for: {youtube_url}")
79
+ ydl_opts = {
80
+ 'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
81
+ 'noplaylist': True, # Don't download playlist if URL is part of one
82
+ 'quiet': True, # Suppress yt-dlp console output
83
+ 'no_warnings': True,
84
+ 'postprocessors': [{ # Use ffmpeg (if installed) to extract audio if needed
85
+ 'key': 'FFmpegExtractAudio',
86
+ 'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
87
+ 'preferredquality': '128', # Lower quality = smaller file = faster processing
88
+ }],
89
+ # Limit duration - uncomment and adjust if needed to prevent very long processing
90
+ # 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
91
+ }
92
+
93
+ buffer = io.BytesIO() # Create an in-memory binary buffer
94
+
95
+ try:
96
+ # Use yt-dlp's ability to write to a file-like object
97
+ ydl_opts['outtmpl'] = '-' # Special template meaning stdout
98
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
99
+ # Trick: Use a hook to capture stdout to our buffer instead of printing
100
+ # This is complex; simpler method below is preferred if ffmpeg isn't used
101
+ # Or, a better way: get the direct audio URL first
102
+
103
+ # --- Simpler & Often Better Approach: Get URL, then download with requests ---
104
+ info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
105
+ best_audio_format = None
 
 
 
 
 
 
 
 
 
106
  for f in info.get('formats', []):
107
+ # Look for formats processed by FFmpegExtractAudio or good audio codecs
108
+ if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
109
+ if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
110
+ best_audio_format = f
111
+ break # Take the first good one
112
+
113
+ # Fallback if no ideal format found
114
+ if not best_audio_format:
115
+ for f in info.get('formats', []):
116
+ if f.get('acodec') != 'none':
117
+ best_audio_format = f
118
+ break # Take first available audio
119
+
120
+ if not best_audio_format or 'url' not in best_audio_format:
121
+ print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
122
+ # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
123
+ # Let's try the download anyway, it might work for some native formats
124
+ # This path is less reliable without guaranteed ffmpeg.
125
+ error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
126
+ # This part is complex - capturing output might need more work if direct URL fetch failed.
127
+ # Let's raise an error if we couldn't get a direct URL for now.
128
+ raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
129
+
130
+
131
+ audio_url = best_audio_format['url']
132
+ format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
133
+ print(f"Found audio format: {format_note}. Downloading directly from URL...")
134
+
135
+ # Download the audio URL content into the buffer
136
+ with requests.get(audio_url, stream=True) as r:
137
+ r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
138
+ for chunk in r.iter_content(chunk_size=8192):
139
+ buffer.write(chunk)
140
+
141
+ audio_bytes = buffer.getvalue()
142
+ print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
143
+ if not audio_bytes:
144
+ raise ValueError("Downloaded audio data is empty.")
145
+ return audio_bytes
146
+
147
+ except yt_dlp.utils.DownloadError as e:
148
+ print(f"ERROR: yt-dlp failed to download or process audio: {e}")
149
+ raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
150
+ except requests.exceptions.RequestException as e:
151
+ print(f"ERROR: Failed to download audio stream from URL: {e}")
152
+ raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
153
+ except Exception as e:
154
+ print(f"ERROR: Unexpected error during audio download: {e}")
155
+ # Log the full traceback here in a real app: import traceback; traceback.print_exc()
156
+ raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")