Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -48,12 +48,12 @@ def hhmmss_to_seconds(time_str: str) -> float:
|
|
48 |
else:
|
49 |
return parts[0]
|
50 |
|
51 |
-
def get_key_frames(video_file: str,
|
52 |
"""
|
53 |
-
Ask Gemini to
|
54 |
-
The
|
55 |
HH:MM:SS - description
|
56 |
-
We then parse these lines and extract
|
57 |
|
58 |
Returns a list of tuples: (image_array, caption)
|
59 |
"""
|
@@ -61,9 +61,9 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
|
|
61 |
"List the key timestamps in the video and a brief description of the important event at that time. "
|
62 |
"Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
|
63 |
)
|
64 |
-
prompt += f" Video Summary: {
|
65 |
if user_query:
|
66 |
-
prompt += f"
|
67 |
|
68 |
try:
|
69 |
key_frames_response = call_gemini(video_file, prompt)
|
@@ -103,43 +103,29 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
|
|
103 |
|
104 |
def analyze_video(video_file: str, user_query: str) -> (str, list):
|
105 |
"""
|
106 |
-
Perform
|
107 |
-
|
|
|
108 |
|
109 |
Returns:
|
110 |
-
- A Markdown report
|
111 |
- A gallery list of key frames (each as a tuple of (image, caption)).
|
112 |
"""
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
prompt = f"Based on the summary: \"{analysis}\", provide additional details about important events and anomalies in the video."
|
123 |
-
if user_query:
|
124 |
-
prompt += f" Also focus on: {user_query}"
|
125 |
-
else:
|
126 |
-
prompt = f"Refine and consolidate the analysis: \"{analysis}\" into a final summary."
|
127 |
-
|
128 |
-
try:
|
129 |
-
analysis = call_gemini(video_file, prompt)
|
130 |
-
except Exception as e:
|
131 |
-
analysis += f"\n[Error during iteration {i+1}: {e}]"
|
132 |
-
break
|
133 |
-
|
134 |
-
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
|
135 |
-
key_frames_gallery = get_key_frames(video_file, analysis, user_query)
|
136 |
if not key_frames_gallery:
|
137 |
markdown_report += "\n*No key frames were extracted.*\n"
|
138 |
else:
|
139 |
markdown_report += "\n**Key Frames Extracted:**\n"
|
140 |
for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
|
141 |
markdown_report += f"- **Frame {idx}:** {caption}\n"
|
142 |
-
|
143 |
return markdown_report, key_frames_gallery
|
144 |
|
145 |
def gradio_interface(video_file, user_query: str) -> (str, list):
|
@@ -163,9 +149,9 @@ iface = gr.Interface(
|
|
163 |
],
|
164 |
title="AI Video Analysis and Summariser Agent",
|
165 |
description=(
|
166 |
-
"This
|
167 |
-
"
|
168 |
-
"
|
169 |
)
|
170 |
)
|
171 |
|
|
|
48 |
else:
|
49 |
return parts[0]
|
50 |
|
51 |
+
def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
|
52 |
"""
|
53 |
+
Ask Gemini to output key timestamps and descriptions in plain text.
|
54 |
+
The prompt instructs the model to output one line per event in the format:
|
55 |
HH:MM:SS - description
|
56 |
+
We then parse these lines and extract frames using OpenCV.
|
57 |
|
58 |
Returns a list of tuples: (image_array, caption)
|
59 |
"""
|
|
|
61 |
"List the key timestamps in the video and a brief description of the important event at that time. "
|
62 |
"Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
|
63 |
)
|
64 |
+
prompt += f" Video Summary: {summary}"
|
65 |
if user_query:
|
66 |
+
prompt += f" Focus on: {user_query}"
|
67 |
|
68 |
try:
|
69 |
key_frames_response = call_gemini(video_file, prompt)
|
|
|
103 |
|
104 |
def analyze_video(video_file: str, user_query: str) -> (str, list):
|
105 |
"""
|
106 |
+
Perform a single-step video analysis on the uploaded file.
|
107 |
+
First, call Gemini to get a brief summary of the video.
|
108 |
+
Then, ask Gemini for key timestamps and descriptions.
|
109 |
|
110 |
Returns:
|
111 |
+
- A Markdown report as a string.
|
112 |
- A gallery list of key frames (each as a tuple of (image, caption)).
|
113 |
"""
|
114 |
+
summary_prompt = "Summarize this video in a few sentences, focusing on any security or surveillance insights."
|
115 |
+
if user_query:
|
116 |
+
summary_prompt += f" Also focus on: {user_query}"
|
117 |
+
try:
|
118 |
+
summary = call_gemini(video_file, summary_prompt)
|
119 |
+
except Exception as e:
|
120 |
+
summary = f"[Error in summary extraction: {e}]"
|
121 |
+
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
|
122 |
+
key_frames_gallery = get_key_frames(video_file, summary, user_query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
if not key_frames_gallery:
|
124 |
markdown_report += "\n*No key frames were extracted.*\n"
|
125 |
else:
|
126 |
markdown_report += "\n**Key Frames Extracted:**\n"
|
127 |
for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
|
128 |
markdown_report += f"- **Frame {idx}:** {caption}\n"
|
|
|
129 |
return markdown_report, key_frames_gallery
|
130 |
|
131 |
def gradio_interface(video_file, user_query: str) -> (str, list):
|
|
|
149 |
],
|
150 |
title="AI Video Analysis and Summariser Agent",
|
151 |
description=(
|
152 |
+
"This tool uses Google's Gemini 2.0 Flash model via AI Studio to analyze an uploaded video. "
|
153 |
+
"It returns a brief summary and extracts key frames based on that summary. "
|
154 |
+
"Provide a video file and, optionally, a query to guide the analysis."
|
155 |
)
|
156 |
)
|
157 |
|