Spaces:

Rivalcoder
/

Youtube_Dockor

Runtime error

App Files Files Community

Rivalcoder commited on 6 days ago

Commit

cbf58a5

1 Parent(s): 033ac17

Add

Browse files

Files changed (3) hide show

Dockerfile +19 -0
app.py +44 -0
requirements.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.10-slim
+# Install Chrome
+RUN apt-get update && apt-get install -y wget gnupg unzip curl \
+ && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
+ && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' \
+ && apt-get update \
+ && apt-get install -y google-chrome-stable
+# Install dependencies
+COPY requirements.txt /app/requirements.txt
+WORKDIR /app
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY app.py /app/app.py
+# Run the app
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import time
+import gradio as gr
+from selenium.webdriver.common.by import By
+import undetected_chromedriver as uc
+def get_captions_selenium(video_url):
+    try:
+        # Launch browser
+        options = uc.ChromeOptions()
+        options.add_argument("--headless")
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        driver = uc.Chrome(options=options)
+        driver.get(video_url)
+        time.sleep(5)
+        # Click "..." -> "Open transcript"
+        # YouTube UI changes often; this is just an example. May need tuning.
+        # Try to find subtitles in the page source (for auto-generated)
+        page_source = driver.page_source
+        if "captionTracks" in page_source:
+            start = page_source.find("captionTracks")
+            end = page_source.find("]", start) + 1
+            caption_json = page_source[start:end]
+            driver.quit()
+            return "✅ Found potential captions info in page source (you may need to parse this JSON)."
+        else:
+            driver.quit()
+            return "⚠️ Captions info not found in source. May not be available or blocked."
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# Gradio interface
+gr.Interface(
+    fn=get_captions_selenium,
+    inputs=[gr.Textbox(label="YouTube Video URL")],
+    outputs="text",
+    title="YouTube Captions Scraper (Selenium)",
+    description="Extract captions using headless browser via Selenium."
+).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+selenium
+undetected-chromedriver