codelion commited on
Commit
63595a8
·
verified ·
1 Parent(s): 4938676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -36
app.py CHANGED
@@ -48,12 +48,12 @@ def hhmmss_to_seconds(time_str: str) -> float:
48
  else:
49
  return parts[0]
50
 
51
- def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
52
  """
53
- Ask Gemini to list key timestamps and descriptions for the video.
54
- The model is instructed to output one line per event in the format:
55
  HH:MM:SS - description
56
- We then parse these lines and extract the corresponding frames using OpenCV.
57
 
58
  Returns a list of tuples: (image_array, caption)
59
  """
@@ -61,9 +61,9 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
61
  "List the key timestamps in the video and a brief description of the important event at that time. "
62
  "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
63
  )
64
- prompt += f" Video Summary: {analysis}"
65
  if user_query:
66
- prompt += f" Additional focus: {user_query}"
67
 
68
  try:
69
  key_frames_response = call_gemini(video_file, prompt)
@@ -103,43 +103,29 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
103
 
104
  def analyze_video(video_file: str, user_query: str) -> (str, list):
105
  """
106
- Perform iterative video analysis on the uploaded file.
107
- Iteratively refine the summary with simpler prompts, then ask for key timestamps.
 
108
 
109
  Returns:
110
- - A Markdown report (string) summarizing the video.
111
  - A gallery list of key frames (each as a tuple of (image, caption)).
112
  """
113
- analysis = ""
114
- num_iterations = 3
115
-
116
- for i in range(num_iterations):
117
- if i == 0:
118
- prompt = "Give a detailed summary of the video."
119
- if user_query:
120
- prompt += f" Also focus on: {user_query}"
121
- elif i == 1:
122
- prompt = f"Based on the summary: \"{analysis}\", provide additional details about important events and anomalies in the video."
123
- if user_query:
124
- prompt += f" Also focus on: {user_query}"
125
- else:
126
- prompt = f"Refine and consolidate the analysis: \"{analysis}\" into a final summary."
127
-
128
- try:
129
- analysis = call_gemini(video_file, prompt)
130
- except Exception as e:
131
- analysis += f"\n[Error during iteration {i+1}: {e}]"
132
- break
133
-
134
- markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
135
- key_frames_gallery = get_key_frames(video_file, analysis, user_query)
136
  if not key_frames_gallery:
137
  markdown_report += "\n*No key frames were extracted.*\n"
138
  else:
139
  markdown_report += "\n**Key Frames Extracted:**\n"
140
  for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
141
  markdown_report += f"- **Frame {idx}:** {caption}\n"
142
-
143
  return markdown_report, key_frames_gallery
144
 
145
  def gradio_interface(video_file, user_query: str) -> (str, list):
@@ -163,9 +149,9 @@ iface = gr.Interface(
163
  ],
164
  title="AI Video Analysis and Summariser Agent",
165
  description=(
166
- "This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
167
- "to iteratively analyze an uploaded video for insights. Provide a video file and, optionally, "
168
- "a query to guide the analysis. The tool returns a Markdown report along with a gallery of key frame images."
169
  )
170
  )
171
 
 
48
  else:
49
  return parts[0]
50
 
51
+ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
52
  """
53
+ Ask Gemini to output key timestamps and descriptions in plain text.
54
+ The prompt instructs the model to output one line per event in the format:
55
  HH:MM:SS - description
56
+ We then parse these lines and extract frames using OpenCV.
57
 
58
  Returns a list of tuples: (image_array, caption)
59
  """
 
61
  "List the key timestamps in the video and a brief description of the important event at that time. "
62
  "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
63
  )
64
+ prompt += f" Video Summary: {summary}"
65
  if user_query:
66
+ prompt += f" Focus on: {user_query}"
67
 
68
  try:
69
  key_frames_response = call_gemini(video_file, prompt)
 
103
 
104
  def analyze_video(video_file: str, user_query: str) -> (str, list):
105
  """
106
+ Perform a single-step video analysis on the uploaded file.
107
+ First, call Gemini to get a brief summary of the video.
108
+ Then, ask Gemini for key timestamps and descriptions.
109
 
110
  Returns:
111
+ - A Markdown report as a string.
112
  - A gallery list of key frames (each as a tuple of (image, caption)).
113
  """
114
+ summary_prompt = "Summarize this video in a few sentences, focusing on any security or surveillance insights."
115
+ if user_query:
116
+ summary_prompt += f" Also focus on: {user_query}"
117
+ try:
118
+ summary = call_gemini(video_file, summary_prompt)
119
+ except Exception as e:
120
+ summary = f"[Error in summary extraction: {e}]"
121
+ markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
122
+ key_frames_gallery = get_key_frames(video_file, summary, user_query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  if not key_frames_gallery:
124
  markdown_report += "\n*No key frames were extracted.*\n"
125
  else:
126
  markdown_report += "\n**Key Frames Extracted:**\n"
127
  for idx, (img, caption) in enumerate(key_frames_gallery, start=1):
128
  markdown_report += f"- **Frame {idx}:** {caption}\n"
 
129
  return markdown_report, key_frames_gallery
130
 
131
  def gradio_interface(video_file, user_query: str) -> (str, list):
 
149
  ],
150
  title="AI Video Analysis and Summariser Agent",
151
  description=(
152
+ "This tool uses Google's Gemini 2.0 Flash model via AI Studio to analyze an uploaded video. "
153
+ "It returns a brief summary and extracts key frames based on that summary. "
154
+ "Provide a video file and, optionally, a query to guide the analysis."
155
  )
156
  )
157