Spaces:

Rivalcoder
/

Youtube_Dockor

Runtime error

App Files Files Community

Rivalcoder commited on 7 days ago

Commit

5427076

1 Parent(s): 68c6dab

Add

Browse files

Files changed (2) hide show

Dockerfile +32 -27
app.py +22 -8

Dockerfile CHANGED Viewed

@@ -1,36 +1,41 @@
 FROM python:3.10-slim
-# Install Chrome and dependencies
-RUN apt-get update && apt-get install -y wget gnupg unzip curl ca-certificates fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 libnspr4 libnss3 libx11-xcb1 libxcomposite1 libxdamage1 libxrandr2 xdg-utils libu2f-udev libvulkan1
-# Install Chrome
-RUN wget -q -O google-chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
- && apt install -y ./google-chrome.deb \
- && rm google-chrome.deb
-# Install matching ChromeDriver
-RUN CHROME_VERSION=$(google-chrome --version | grep -oP '\d+\.\d+\.\d+\.\d+') \
- && CHROMEDRIVER_VERSION=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROME_VERSION") \
- && wget -O /tmp/chromedriver.zip https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VERSION}/chromedriver_linux64.zip \
- && unzip /tmp/chromedriver.zip -d /usr/local/bin/ \
- && chmod +x /usr/local/bin/chromedriver \
- && rm /tmp/chromedriver.zip
-# Set environment variables
-ENV PATH="/usr/local/bin:$PATH"
-ENV CHROME_BIN="/usr/bin/google-chrome"
-ENV CHROMEDRIVER="/usr/local/bin/chromedriver"
-# Set working dir
 WORKDIR /app
-# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy app code
 COPY app.py .
-# Run the app
-CMD ["python", "app.py"]

 FROM python:3.10-slim
+# 1) Install Chromium + driver + required libraries
+RUN apt-get update && apt-get install -y \
+    chromium \
+    chromium-driver \
+    wget \
+    gnupg \
+    unzip \
+    curl \
+    ca-certificates \
+    fonts-liberation \
+    libappindicator3-1 \
+    libasound2 \
+    libatk-bridge2.0-0 \
+    libnspr4 \
+    libnss3 \
+    libx11-xcb1 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxrandr2 \
+    xdg-utils \
+    libu2f-udev \
+    libvulkan1 \
+ && rm -rf /var/lib/apt/lists/*
+# 2) Set working directory
 WORKDIR /app
+# 3) Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# 4) Copy application code
 COPY app.py .
+# 5) Point undetected-chromedriver at system Chromium
+ENV CHROME_BINARY=/usr/bin/chromium
+# 6) Launch the Gradio app
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,18 +1,22 @@
 import time
 import gradio as gr
 from selenium.webdriver.common.by import By
 import undetected_chromedriver as uc
 def get_captions_selenium(video_url):
     try:
-        print("🚀 Launching Chrome...")
         options = uc.ChromeOptions()
-        options.add_argument("--headless=new")  # Use 'new' headless mode for Chrome 109+
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-dev-shm-usage")
         driver = uc.Chrome(options=options)
-        print("🌍 Navigating to video...")
         driver.get(video_url)
         print("⌛ Waiting for page to load...")
@@ -26,7 +30,11 @@ def get_captions_selenium(video_url):
             end = page_source.find("]", start) + 1
             caption_json = page_source[start:end]
             driver.quit()
-            return "✅ Found potential captions info in page source (you may need to parse this JSON).\n\n" + caption_json
         else:
             driver.quit()
             return "⚠️ Captions info not found in source. May not be available or blocked."
@@ -35,11 +43,17 @@ def get_captions_selenium(video_url):
         print(f"❌ Exception occurred: {e}")
         return f"❌ Error: {str(e)}"
-# Gradio interface
 gr.Interface(
     fn=get_captions_selenium,
-    inputs=[gr.Textbox(label="YouTube Video URL")],
     outputs="text",
     title="YouTube Captions Scraper (Selenium)",
-    description="Uses Selenium with undetected-chromedriver to extract captions from a YouTube video."
-).launch()

+import os
 import time
 import gradio as gr
 from selenium.webdriver.common.by import By
 import undetected_chromedriver as uc
+# Function to extract YouTube captions using a headless browser
 def get_captions_selenium(video_url):
     try:
+        print("🚀 Launching Chromium via undetected-chromedriver...")
         options = uc.ChromeOptions()
+        # Point to the system-installed Chromium binary
+        options.binary_location = os.environ.get("CHROME_BINARY", "/usr/bin/chromium")
+        options.add_argument("--headless=new")
         options.add_argument("--no-sandbox")
         options.add_argument("--disable-dev-shm-usage")
         driver = uc.Chrome(options=options)
+        print("🌍 Navigating to video URL...")
         driver.get(video_url)
         print("⌛ Waiting for page to load...")
             end = page_source.find("]", start) + 1
             caption_json = page_source[start:end]
             driver.quit()
+            return (
+                "✅ Found potential captions info.\n"
+                "(You can parse this JSON string to extract subtitles.)\n\n"
+                + caption_json
+            )
         else:
             driver.quit()
             return "⚠️ Captions info not found in source. May not be available or blocked."
         print(f"❌ Exception occurred: {e}")
         return f"❌ Error: {str(e)}"
+# Gradio interface definition
+default_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
 gr.Interface(
     fn=get_captions_selenium,
+    inputs=[
+        gr.Textbox(default=default_url, label="YouTube Video URL")
+    ],
     outputs="text",
     title="YouTube Captions Scraper (Selenium)",
+    description=(
+        "Extract captions from a YouTube video using a headless browser with "
+        "undetected-chromedriver. Logs will appear in the Space's console."
+    )
+).launch()