Spaces:

codelion
/

videoanalysis

Sleeping

App Files Files Community

codelion commited on Apr 2

Commit

63595a8

verified ·

1 Parent(s): 4938676

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -36

app.py CHANGED Viewed

@@ -48,12 +48,12 @@ def hhmmss_to_seconds(time_str: str) -> float:
     else:
         return parts[0]
-def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
     """
-    Ask Gemini to list key timestamps and descriptions for the video.
-    The model is instructed to output one line per event in the format:
     HH:MM:SS - description
-    We then parse these lines and extract the corresponding frames using OpenCV.
     Returns a list of tuples: (image_array, caption)
     """
@@ -61,9 +61,9 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
         "List the key timestamps in the video and a brief description of the important event at that time. "
         "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
     )
-    prompt += f" Video Summary: {analysis}"
     if user_query:
-        prompt += f" Additional focus: {user_query}"
     try:
         key_frames_response = call_gemini(video_file, prompt)
@@ -103,43 +103,29 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
-    Perform iterative video analysis on the uploaded file.
-    Iteratively refine the summary with simpler prompts, then ask for key timestamps.
     Returns:
-      - A Markdown report (string) summarizing the video.
       - A gallery list of key frames (each as a tuple of (image, caption)).
     """
-    analysis = ""
-    num_iterations = 3
-    for i in range(num_iterations):
-        if i == 0:
-            prompt = "Give a detailed summary of the video."
-            if user_query:
-                prompt += f" Also focus on: {user_query}"
-        elif i == 1:
-            prompt = f"Based on the summary: \"{analysis}\", provide additional details about important events and anomalies in the video."
-            if user_query:
-                prompt += f" Also focus on: {user_query}"
-        else:
-            prompt = f"Refine and consolidate the analysis: \"{analysis}\" into a final summary."
-        try:
-            analysis = call_gemini(video_file, prompt)
-        except Exception as e:
-            analysis += f"\n[Error during iteration {i+1}: {e}]"
-            break
-    markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
-    key_frames_gallery = get_key_frames(video_file, analysis, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
     else:
         markdown_report += "\n**Key Frames Extracted:**\n"
         for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
             markdown_report += f"- **Frame {idx}:** {caption}\n"
     return markdown_report, key_frames_gallery
 def gradio_interface(video_file, user_query: str) -> (str, list):
@@ -163,9 +149,9 @@ iface = gr.Interface(
     ],
     title="AI Video Analysis and Summariser Agent",
     description=(
-        "This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
-        "to iteratively analyze an uploaded video for insights. Provide a video file and, optionally, "
-        "a query to guide the analysis. The tool returns a Markdown report along with a gallery of key frame images."
     )
 )

     else:
         return parts[0]
+def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
     """
+    Ask Gemini to output key timestamps and descriptions in plain text.
+    The prompt instructs the model to output one line per event in the format:
     HH:MM:SS - description
+    We then parse these lines and extract frames using OpenCV.
     Returns a list of tuples: (image_array, caption)
     """
         "List the key timestamps in the video and a brief description of the important event at that time. "
         "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
     )
+    prompt += f" Video Summary: {summary}"
     if user_query:
+        prompt += f" Focus on: {user_query}"
     try:
         key_frames_response = call_gemini(video_file, prompt)
 def analyze_video(video_file: str, user_query: str) -> (str, list):
     """
+    Perform a single-step video analysis on the uploaded file.
+    First, call Gemini to get a brief summary of the video.
+    Then, ask Gemini for key timestamps and descriptions.
     Returns:
+      - A Markdown report as a string.
       - A gallery list of key frames (each as a tuple of (image, caption)).
     """
+    summary_prompt = "Summarize this video in a few sentences, focusing on any security or surveillance insights."
+    if user_query:
+        summary_prompt += f" Also focus on: {user_query}"
+    try:
+        summary = call_gemini(video_file, summary_prompt)
+    except Exception as e:
+        summary = f"[Error in summary extraction: {e}]"
+    markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
+    key_frames_gallery = get_key_frames(video_file, summary, user_query)
     if not key_frames_gallery:
         markdown_report += "\n*No key frames were extracted.*\n"
     else:
         markdown_report += "\n**Key Frames Extracted:**\n"
         for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
             markdown_report += f"- **Frame {idx}:** {caption}\n"
     return markdown_report, key_frames_gallery
 def gradio_interface(video_file, user_query: str) -> (str, list):
     ],
     title="AI Video Analysis and Summariser Agent",
     description=(
+        "This tool uses Google's Gemini 2.0 Flash model via AI Studio to analyze an uploaded video. "
+        "It returns a brief summary and extracts key frames based on that summary. "
+        "Provide a video file and, optionally, a query to guide the analysis."
     )
 )