Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

03c6357

verified ·

1 Parent(s): 8f4cdd0

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -10

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 import gradio as gr
 import cv2
 from google import genai
-from google.genai import types
 from google.genai.types import Part
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -22,7 +21,8 @@ MODEL_NAME = "gemini-2.0-flash-001"
 def call_gemini(video_file: str, prompt: str) -> str:
     """
     Call the Gemini model with the provided video file and prompt.
-    The video file is read as bytes and passed with MIME type "video/mp4".
     """
     with open(video_file, "rb") as f:
         file_bytes = f.read()
@@ -30,7 +30,7 @@ def call_gemini(video_file: str, prompt: str) -> str:
         model=MODEL_NAME,
         contents=[
             Part(file_data=file_bytes, mime_type="video/mp4"),
-            prompt,
         ],
     )
     return response.text
@@ -67,11 +67,11 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
     try:
         key_frames_response = call_gemini(video_file, prompt)
-        # Attempt to parse the output as JSON.
         key_frames = json.loads(key_frames_response)
         if not isinstance(key_frames, list):
             key_frames = []
     except Exception as e:
         key_frames = []
     extracted_frames = []
@@ -87,11 +87,9 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
             seconds = hhmmss_to_seconds(ts)
         except Exception:
             continue
-        # Set video position (in milliseconds)
         cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
         ret, frame = cap.read()
         if ret:
-            # Convert BGR to RGB for proper display
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             caption = f"{ts}: {description}"
             extracted_frames.append((frame_rgb, caption))
@@ -136,10 +134,7 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
             analysis += f"\n[Error during iteration {i+1}: {e}]"
             break
-    # Create a Markdown report
     markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
-    # Get key frames based on the analysis and optional query.
     key_frames_gallery = get_key_frames(video_file, analysis, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
@@ -162,7 +157,7 @@ def gradio_interface(video_file, user_query: str) -> (str, list):
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
-        gr.Video(label="Upload Video File"),  # Removed the 'type' parameter.
         gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[

 import gradio as gr
 import cv2
 from google import genai
 from google.genai.types import Part
 from tenacity import retry, stop_after_attempt, wait_random_exponential
 def call_gemini(video_file: str, prompt: str) -> str:
     """
     Call the Gemini model with the provided video file and prompt.
+    The video file is read as bytes and passed with MIME type "video/mp4",
+    and the prompt is wrapped as text.
     """
     with open(video_file, "rb") as f:
         file_bytes = f.read()
         model=MODEL_NAME,
         contents=[
             Part(file_data=file_bytes, mime_type="video/mp4"),
+            Part(text=prompt)
         ],
     )
     return response.text
     try:
         key_frames_response = call_gemini(video_file, prompt)
         key_frames = json.loads(key_frames_response)
         if not isinstance(key_frames, list):
             key_frames = []
     except Exception as e:
+        print("Error in key frame extraction:", e)
         key_frames = []
     extracted_frames = []
             seconds = hhmmss_to_seconds(ts)
         except Exception:
             continue
         cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
         ret, frame = cap.read()
         if ret:
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             caption = f"{ts}: {description}"
             extracted_frames.append((frame_rgb, caption))
             analysis += f"\n[Error during iteration {i+1}: {e}]"
             break
     markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
     key_frames_gallery = get_key_frames(video_file, analysis, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
+        gr.Video(label="Upload Video File"),
         gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
     ],
     outputs=[