Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

830c9fb

verified ·

1 Parent(s): 3f2c22a

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -25

app.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import os
 import json
 import gradio as gr
 import cv2
 from google import genai
 from google.genai import types
 from google.genai.types import Part
@@ -16,7 +19,7 @@ if not GOOGLE_API_KEY:
 client = genai.Client(api_key=GOOGLE_API_KEY)
 # Use the Gemini 2.0 Flash model.
-MODEL_NAME = "gemini-2.0-flash"
 @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
 def call_gemini(video_url: str, prompt: str) -> str:
@@ -46,10 +49,40 @@ def hhmmss_to_seconds(time_str: str) -> float:
     else:
         return parts[0]
 def get_key_frames(video_url: str, analysis: str, user_query: str) -> list:
     """
     Prompt Gemini to return key frame timestamps (in HH:MM:SS) with descriptions,
-    then extract those frames from the video using OpenCV.
     Returns a list of tuples: (image_array, caption)
     """
@@ -73,27 +106,33 @@ def get_key_frames(video_url: str, analysis: str, user_query: str) -> list:
         key_frames = []
     extracted_frames = []
-    cap = cv2.VideoCapture(video_url)
-    if not cap.isOpened():
-        print("Error: Could not open video.")
-        return extracted_frames
-    for frame_obj in key_frames:
-        ts = frame_obj.get("timestamp")
-        description = frame_obj.get("description", "")
-        try:
-            seconds = hhmmss_to_seconds(ts)
-        except Exception:
-            continue
-        # Set video position (in milliseconds)
-        cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
-        ret, frame = cap.read()
-        if ret:
-            # Convert BGR to RGB
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            caption = f"{ts}: {description}"
-            extracted_frames.append((frame_rgb, caption))
-    cap.release()
     return extracted_frames
 def analyze_video(video_url: str, user_query: str) -> (str, list):
@@ -157,11 +196,10 @@ def gradio_interface(video_url: str, user_query: str) -> (str, list):
         return "Please provide a valid video URL.", []
     return analyze_video(video_url, user_query)
-# Define the Gradio interface with two inputs and two outputs.
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
-        gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube direct link or video file URL)"),
         gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[

 import os
 import json
+import tempfile
+import requests
 import gradio as gr
 import cv2
+from pytube import YouTube
 from google import genai
 from google.genai import types
 from google.genai.types import Part
 client = genai.Client(api_key=GOOGLE_API_KEY)
 # Use the Gemini 2.0 Flash model.
+MODEL_NAME = "gemini-2.0-flash-001"
 @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
 def call_gemini(video_url: str, prompt: str) -> str:
     else:
         return parts[0]
+def download_video(video_url: str) -> str:
+    """
+    Download the video from a URL (either YouTube or direct link) and return the local file path.
+    """
+    local_file = None
+    if "youtube.com" in video_url or "youtu.be" in video_url:
+        yt = YouTube(video_url)
+        stream = yt.streams.filter(file_extension="mp4", progressive=True).first()
+        if stream is None:
+            raise ValueError("No suitable mp4 stream found on YouTube.")
+        temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+        stream.stream_to_buffer(temp_file)
+        temp_file.flush()
+        local_file = temp_file.name
+        temp_file.close()
+    else:
+        # Assume it's a direct link to a video file, download using requests.
+        response = requests.get(video_url, stream=True)
+        if response.status_code == 200:
+            temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    temp_file.write(chunk)
+            temp_file.flush()
+            local_file = temp_file.name
+            temp_file.close()
+        else:
+            raise ValueError("Failed to download video, status code: " + str(response.status_code))
+    return local_file
 def get_key_frames(video_url: str, analysis: str, user_query: str) -> list:
     """
     Prompt Gemini to return key frame timestamps (in HH:MM:SS) with descriptions,
+    then extract those frames from the downloaded video file using OpenCV.
     Returns a list of tuples: (image_array, caption)
     """
         key_frames = []
     extracted_frames = []
+    local_path = None
+    try:
+        local_path = download_video(video_url)
+        cap = cv2.VideoCapture(local_path)
+        if not cap.isOpened():
+            print("Error: Could not open video from local file.")
+            return extracted_frames
+        for frame_obj in key_frames:
+            ts = frame_obj.get("timestamp")
+            description = frame_obj.get("description", "")
+            try:
+                seconds = hhmmss_to_seconds(ts)
+            except Exception:
+                continue
+            # Set video position (in milliseconds)
+            cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
+            ret, frame = cap.read()
+            if ret:
+                # Convert BGR to RGB
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                caption = f"{ts}: {description}"
+                extracted_frames.append((frame_rgb, caption))
+        cap.release()
+    finally:
+        if local_path and os.path.exists(local_path):
+            os.remove(local_path)
     return extracted_frames
 def analyze_video(video_url: str, user_query: str) -> (str, list):
         return "Please provide a valid video URL.", []
     return analyze_video(video_url, user_query)
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
+        gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link or direct video file URL)"),
         gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[