OmarHusseinZaki commited on
Commit
3faf1d8
·
1 Parent(s): 6716568

add method to download audio from yt

Browse files
Files changed (1) hide show
  1. main.py +92 -1
main.py CHANGED
@@ -62,4 +62,95 @@ app.add_middleware(
62
  allow_credentials=True, # Allow cookies (not strictly needed now, but good practice)
63
  allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.)
64
  allow_headers=["*"], # Allow all headers
65
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  allow_credentials=True, # Allow cookies (not strictly needed now, but good practice)
63
  allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.)
64
  allow_headers=["*"], # Allow all headers
65
+ )
66
+
67
+ # --- Data Models (Request Validation) ---
68
+
69
+ # Define the expected structure of the request body using Pydantic
70
+ class ProcessRequest(BaseModel):
71
+ youtubeUrl: str # Expecting a field named "youtubeUrl" which is a string
72
+
73
+ def download_audio_bytes(youtube_url: str) -> bytes:
74
+ """
75
+ Downloads the best audio-only format from a YouTube URL using yt-dlp
76
+ and returns the raw audio data as bytes.
77
+ """
78
+ print(f"Attempting to download audio for: {youtube_url}")
79
+ ydl_opts = {
80
+ 'format': 'bestaudio/best', # Prioritize best audio-only, fallback to best audio in general
81
+ 'noplaylist': True, # Don't download playlist if URL is part of one
82
+ 'quiet': True, # Suppress yt-dlp console output
83
+ 'no_warnings': True,
84
+ 'postprocessors': [{ # Use ffmpeg (if installed) to extract audio if needed
85
+ 'key': 'FFmpegExtractAudio',
86
+ 'preferredcodec': 'mp3', # Request MP3 format (widely compatible)
87
+ 'preferredquality': '128', # Lower quality = smaller file = faster processing
88
+ }],
89
+ # Limit duration - uncomment and adjust if needed to prevent very long processing
90
+ # 'download_ranges': yt_dlp.utils.download_range_func(None, [(0, 1200)]), # Example: Max 20 minutes (1200 seconds)
91
+ }
92
+
93
+ buffer = io.BytesIO() # Create an in-memory binary buffer
94
+
95
+ try:
96
+ # Use yt-dlp's ability to write to a file-like object
97
+ ydl_opts['outtmpl'] = '-' # Special template meaning stdout
98
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
99
+ # Trick: Use a hook to capture stdout to our buffer instead of printing
100
+ # This is complex; simpler method below is preferred if ffmpeg isn't used
101
+ # Or, a better way: get the direct audio URL first
102
+
103
+ # --- Simpler & Often Better Approach: Get URL, then download with requests ---
104
+ info = ydl.extract_info(youtube_url, download=False) # Get info without downloading yet
105
+ best_audio_format = None
106
+ for f in info.get('formats', []):
107
+ # Look for formats processed by FFmpegExtractAudio or good audio codecs
108
+ if f.get('acodec') != 'none' and f.get('vcodec') == 'none': # Audio-only
109
+ if f.get('ext') in ['mp3', 'opus', 'm4a', 'webm']: # Prefer known good audio containers/codecs
110
+ best_audio_format = f
111
+ break # Take the first good one
112
+
113
+ # Fallback if no ideal format found
114
+ if not best_audio_format:
115
+ for f in info.get('formats', []):
116
+ if f.get('acodec') != 'none':
117
+ best_audio_format = f
118
+ break # Take first available audio
119
+
120
+ if not best_audio_format or 'url' not in best_audio_format:
121
+ print("Could not find suitable audio stream URL via yt-dlp info. Direct download might fail or require ffmpeg.")
122
+ # If you *don't* have ffmpeg in the Dockerfile, the postprocessor might fail here
123
+ # Let's try the download anyway, it might work for some native formats
124
+ # This path is less reliable without guaranteed ffmpeg.
125
+ error_info = ydl.download([youtube_url]) # Try downloading directly (might need ffmpeg)
126
+ # This part is complex - capturing output might need more work if direct URL fetch failed.
127
+ # Let's raise an error if we couldn't get a direct URL for now.
128
+ raise yt_dlp.utils.DownloadError("Could not extract a direct audio URL and ffmpeg may not be available.")
129
+
130
+
131
+ audio_url = best_audio_format['url']
132
+ format_note = best_audio_format.get('format_note', best_audio_format.get('ext', 'N/A'))
133
+ print(f"Found audio format: {format_note}. Downloading directly from URL...")
134
+
135
+ # Download the audio URL content into the buffer
136
+ with requests.get(audio_url, stream=True) as r:
137
+ r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
138
+ for chunk in r.iter_content(chunk_size=8192):
139
+ buffer.write(chunk)
140
+
141
+ audio_bytes = buffer.getvalue()
142
+ print(f"Audio downloaded successfully: {len(audio_bytes) / (1024*1024):.2f} MB")
143
+ if not audio_bytes:
144
+ raise ValueError("Downloaded audio data is empty.")
145
+ return audio_bytes
146
+
147
+ except yt_dlp.utils.DownloadError as e:
148
+ print(f"ERROR: yt-dlp failed to download or process audio: {e}")
149
+ raise HTTPException(status_code=500, detail=f"Failed to download audio from YouTube: {e}")
150
+ except requests.exceptions.RequestException as e:
151
+ print(f"ERROR: Failed to download audio stream from URL: {e}")
152
+ raise HTTPException(status_code=500, detail=f"Failed to fetch audio stream: {e}")
153
+ except Exception as e:
154
+ print(f"ERROR: Unexpected error during audio download: {e}")
155
+ # Log the full traceback here in a real app: import traceback; traceback.print_exc()
156
+ raise HTTPException(status_code=500, detail=f"An unexpected error occurred during audio processing: {e}")