Spaces:
Runtime error
Runtime error
import time | |
import gradio as gr | |
from selenium.webdriver.common.by import By | |
import undetected_chromedriver as uc | |
def get_captions_selenium(video_url): | |
try: | |
# Launch browser | |
options = uc.ChromeOptions() | |
options.add_argument("--headless") | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-dev-shm-usage") | |
driver = uc.Chrome(options=options) | |
driver.get(video_url) | |
time.sleep(5) | |
# Click "..." -> "Open transcript" | |
# YouTube UI changes often; this is just an example. May need tuning. | |
# Try to find subtitles in the page source (for auto-generated) | |
page_source = driver.page_source | |
if "captionTracks" in page_source: | |
start = page_source.find("captionTracks") | |
end = page_source.find("]", start) + 1 | |
caption_json = page_source[start:end] | |
driver.quit() | |
return "✅ Found potential captions info in page source (you may need to parse this JSON)." | |
else: | |
driver.quit() | |
return "⚠️ Captions info not found in source. May not be available or blocked." | |
except Exception as e: | |
return f"❌ Error: {str(e)}" | |
# Gradio interface | |
gr.Interface( | |
fn=get_captions_selenium, | |
inputs=[gr.Textbox(label="YouTube Video URL")], | |
outputs="text", | |
title="YouTube Captions Scraper (Selenium)", | |
description="Extract captions using headless browser via Selenium." | |
).launch() | |