Commit
·
fb1ef83
1
Parent(s):
c1ef5cd
adjust indentiation agaon
Browse files
main.py
CHANGED
@@ -71,10 +71,9 @@ class ProcessRequest(BaseModel):
|
|
71 |
youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
|
72 |
|
73 |
def download_audio_bytes(youtube_url: str) -> bytes:
|
74 |
-
|
75 |
-
Downloads the best audio-only format from a YouTube URL using yt-dlp
|
76 |
-
|
77 |
-
"""
|
78 |
print(f"Attempting to download audio for: {youtube_url}")
|
79 |
ydl_opts = {
|
80 |
'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
|
@@ -96,53 +95,53 @@ def download_audio_bytes(youtube_url: str) -> bytes:
|
|
96 |
# Use yt-dlp's ability to write to a file-like object
|
97 |
ydl_opts['outtmpl'] = '-' # Special template meaning stdout
|
98 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
|
147 |
except yt_dlp.utils.DownloadError as e:
|
148 |
print(f"ERROR: yt-dlp failed to download or process audio: {e}")
|
|
|
71 |
youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
|
72 |
|
73 |
def download_audio_bytes(youtube_url: str) -> bytes:
|
74 |
+
|
75 |
+
# Downloads the best audio-only format from a YouTube URL using yt-dlp and returns the raw audio data as bytes.
|
76 |
+
|
|
|
77 |
print(f"Attempting to download audio for: {youtube_url}")
|
78 |
ydl_opts = {
|
79 |
'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
|
|
|
95 |
# Use yt-dlp's ability to write to a file-like object
|
96 |
ydl_opts['outtmpl'] = '-' # Special template meaning stdout
|
97 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
98 |
+
# Trick: Use a hook to capture stdout to our buffer instead of printing
|
99 |
+
# This is complex; simpler method below is preferred if ffmpeg isn't used
|
100 |
+
# Or, a better way: get the direct audio URL first
|
101 |
+
|
102 |
+
# --- Simpler & Often Better Approach: Get URL, then download with requests ---
|
103 |
+
info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
|
104 |
+
best_audio_format = None
|
105 |
+
for f in info.get('formats', []):
|
106 |
+
# Look for formats processed by FFmpegExtractAudio or good audio codecs
|
107 |
+
if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
|
108 |
+
if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
|
109 |
+
best_audio_format = f
|
110 |
+
break # Take the first good one
|
111 |
+
|
112 |
+
# Fallback if no ideal format found
|
113 |
+
if not best_audio_format:
|
114 |
+
for f in info.get('formats', []):
|
115 |
+
if f.get('acodec') != 'none':
|
116 |
+
best_audio_format = f
|
117 |
+
break # Take first available audio
|
118 |
+
|
119 |
+
if not best_audio_format or 'url' not in best_audio_format:
|
120 |
+
print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
|
121 |
+
# If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
|
122 |
+
# Let's try the download anyway, it might work for some native formats
|
123 |
+
# This path is less reliable without guaranteed ffmpeg.
|
124 |
+
error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
|
125 |
+
# This part is complex - capturing output might need more work if direct URL fetch failed.
|
126 |
+
# Let's raise an error if we couldn't get a direct URL for now.
|
127 |
+
raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
|
128 |
+
|
129 |
+
|
130 |
+
audio_url = best_audio_format['url']
|
131 |
+
format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
|
132 |
+
print(f"Found audio format: {format_note}. Downloading directly from URL...")
|
133 |
+
|
134 |
+
# Download the audio URL content into the buffer
|
135 |
+
with requests.get(audio_url, stream=True) as r:
|
136 |
+
r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
137 |
+
for chunk in r.iter_content(chunk_size=8192):
|
138 |
+
buffer.write(chunk)
|
139 |
+
|
140 |
+
audio_bytes = buffer.getvalue()
|
141 |
+
print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
|
142 |
+
if not audio_bytes:
|
143 |
+
raise ValueError("Downloaded audio data is empty.")
|
144 |
+
return audio_bytes
|
145 |
|
146 |
except yt_dlp.utils.DownloadError as e:
|
147 |
print(f"ERROR: yt-dlp failed to download or process audio: {e}")
|