Commit
·
c1ef5cd
1
Parent(s):
3faf1d8
add indentation
Browse files
main.py
CHANGED
@@ -71,86 +71,86 @@ class ProcessRequest(BaseModel):
|
|
71 |
youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
|
72 |
|
73 |
def download_audio_bytes(youtube_url: str) -> bytes:
|
74 |
-
"""
|
75 |
-
Downloads the best audio-only format from a YouTube URL using yt-dlp
|
76 |
-
and returns the raw audio data as bytes.
|
77 |
-
"""
|
78 |
-
print(f"Attempting to download audio for: {youtube_url}")
|
79 |
-
ydl_opts = {
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
}
|
92 |
-
|
93 |
-
buffer = io.BytesIO() # Create an in-memory binary buffer
|
94 |
-
|
95 |
-
try:
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
for f in info.get('formats', []):
|
107 |
-
# Look for formats processed by FFmpegExtractAudio or good audio codecs
|
108 |
-
if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
|
109 |
-
if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
|
110 |
-
best_audio_format = f
|
111 |
-
break # Take the first good one
|
112 |
-
|
113 |
-
# Fallback if no ideal format found
|
114 |
-
if not best_audio_format:
|
115 |
for f in info.get('formats', []):
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
#
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
|
72 |
|
73 |
def download_audio_bytes(youtube_url: str) -> bytes:
|
74 |
+
"""
|
75 |
+
Downloads the best audio-only format from a YouTube URL using yt-dlp
|
76 |
+
and returns the raw audio data as bytes.
|
77 |
+
"""
|
78 |
+
print(f"Attempting to download audio for: {youtube_url}")
|
79 |
+
ydl_opts = {
|
80 |
+
'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
|
81 |
+
'noplaylist': True, # Don't download playlist if URL is part of one
|
82 |
+
'quiet': True, # Suppress yt-dlp console output
|
83 |
+
'no_warnings': True,
|
84 |
+
'postprocessors': [{ # Use ffmpeg (if installed) to extract audio if needed
|
85 |
+
'key': 'FFmpegExtractAudio',
|
86 |
+
'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
|
87 |
+
'preferredquality': '128', # Lower quality = smaller file = faster processing
|
88 |
+
}],
|
89 |
+
# Limit duration - uncomment and adjust if needed to prevent very long processing
|
90 |
+
# 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
|
91 |
+
}
|
92 |
+
|
93 |
+
buffer = io.BytesIO() # Create an in-memory binary buffer
|
94 |
+
|
95 |
+
try:
|
96 |
+
# Use yt-dlp's ability to write to a file-like object
|
97 |
+
ydl_opts['outtmpl'] = '-' # Special template meaning stdout
|
98 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
99 |
+
# Trick: Use a hook to capture stdout to our buffer instead of printing
|
100 |
+
# This is complex; simpler method below is preferred if ffmpeg isn't used
|
101 |
+
# Or, a better way: get the direct audio URL first
|
102 |
+
|
103 |
+
# --- Simpler & Often Better Approach: Get URL, then download with requests ---
|
104 |
+
info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
|
105 |
+
best_audio_format = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
for f in info.get('formats', []):
|
107 |
+
# Look for formats processed by FFmpegExtractAudio or good audio codecs
|
108 |
+
if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
|
109 |
+
if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
|
110 |
+
best_audio_format = f
|
111 |
+
break # Take the first good one
|
112 |
+
|
113 |
+
# Fallback if no ideal format found
|
114 |
+
if not best_audio_format:
|
115 |
+
for f in info.get('formats', []):
|
116 |
+
if f.get('acodec') != 'none':
|
117 |
+
best_audio_format = f
|
118 |
+
break # Take first available audio
|
119 |
+
|
120 |
+
if not best_audio_format or 'url' not in best_audio_format:
|
121 |
+
print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
|
122 |
+
# If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
|
123 |
+
# Let's try the download anyway, it might work for some native formats
|
124 |
+
# This path is less reliable without guaranteed ffmpeg.
|
125 |
+
error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
|
126 |
+
# This part is complex - capturing output might need more work if direct URL fetch failed.
|
127 |
+
# Let's raise an error if we couldn't get a direct URL for now.
|
128 |
+
raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
|
129 |
+
|
130 |
+
|
131 |
+
audio_url = best_audio_format['url']
|
132 |
+
format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
|
133 |
+
print(f"Found audio format: {format_note}. Downloading directly from URL...")
|
134 |
+
|
135 |
+
# Download the audio URL content into the buffer
|
136 |
+
with requests.get(audio_url, stream=True) as r:
|
137 |
+
r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
138 |
+
for chunk in r.iter_content(chunk_size=8192):
|
139 |
+
buffer.write(chunk)
|
140 |
+
|
141 |
+
audio_bytes = buffer.getvalue()
|
142 |
+
print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
|
143 |
+
if not audio_bytes:
|
144 |
+
raise ValueError("Downloaded audio data is empty.")
|
145 |
+
return audio_bytes
|
146 |
+
|
147 |
+
except yt_dlp.utils.DownloadError as e:
|
148 |
+
print(f"ERROR: yt-dlp failed to download or process audio: {e}")
|
149 |
+
raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
|
150 |
+
except requests.exceptions.RequestException as e:
|
151 |
+
print(f"ERROR: Failed to download audio stream from URL: {e}")
|
152 |
+
raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
|
153 |
+
except Exception as e:
|
154 |
+
print(f"ERROR: Unexpected error during audio download: {e}")
|
155 |
+
# Log the full traceback here in a real app: import traceback; traceback.print_exc()
|
156 |
+
raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")
|