codelion commited on
Commit
c137e5c
·
verified ·
1 Parent(s): d638712

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -29
app.py CHANGED
@@ -1,28 +1,32 @@
1
  import os
2
- import json
3
  import gradio as gr
4
  import cv2
5
  from google import genai
6
  from google.genai.types import Part
7
  from tenacity import retry, stop_after_attempt, wait_random_exponential
8
 
9
- # Retrieve API key from environment variables.
10
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
11
  if not GOOGLE_API_KEY:
12
  raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
13
 
14
- # Initialize the Gemini API client via AI Studio.
15
  client = genai.Client(api_key=GOOGLE_API_KEY)
16
 
17
- # Use the Gemini 2.0 Flash model.
18
- MODEL_NAME = "gemini-2.0-flash-001"
19
 
20
  @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
21
  def call_gemini(video_file: str, prompt: str) -> str:
22
  """
23
- Call the Gemini model with the provided video file and prompt.
24
- The video file is read as bytes and passed with MIME type "video/mp4",
25
- and the prompt is wrapped as a text part.
 
 
 
 
 
26
  """
27
  with open(video_file, "rb") as f:
28
  file_bytes = f.read()
@@ -37,17 +41,31 @@ def call_gemini(video_file: str, prompt: str) -> str:
37
 
38
  def safe_call_gemini(video_file: str, prompt: str) -> str:
39
  """
40
- Wrapper for call_gemini that catches exceptions and returns a fallback string.
 
 
 
 
 
 
 
41
  """
42
  try:
43
  return call_gemini(video_file, prompt)
44
  except Exception as e:
45
- print("Gemini call failed:", e)
46
- return "No summary available."
 
47
 
48
  def hhmmss_to_seconds(time_str: str) -> float:
49
  """
50
  Convert a HH:MM:SS formatted string into seconds.
 
 
 
 
 
 
51
  """
52
  parts = time_str.strip().split(":")
53
  parts = [float(p) for p in parts]
@@ -60,23 +78,28 @@ def hhmmss_to_seconds(time_str: str) -> float:
60
 
61
  def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
62
  """
63
- Ask Gemini to output key timestamps and descriptions as plain text.
64
- The prompt instructs the model to output one line per event in the format:
65
- HH:MM:SS - description
66
- We then parse these lines and extract the corresponding frames using OpenCV.
 
 
67
 
68
- Returns a list of tuples: (image_array, caption)
 
69
  """
70
  prompt = (
71
  "List the key timestamps in the video and a brief description of the event at that time. "
72
- "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
73
  )
74
  prompt += f" Video Summary: {summary}"
75
  if user_query:
76
  prompt += f" Focus on: {user_query}"
77
 
78
- # Use the safe call to get a response or fallback text.
79
  key_frames_response = safe_call_gemini(video_file, prompt)
 
 
 
80
  lines = key_frames_response.strip().split("\n")
81
  key_frames = []
82
  for line in lines:
@@ -110,13 +133,14 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
110
 
111
  def analyze_video(video_file: str, user_query: str) -> (str, list):
112
  """
113
- Perform video analysis on the uploaded file.
114
- First, call Gemini with a simple prompt to get a brief summary.
115
- Then, call Gemini to list key timestamps and descriptions.
 
 
116
 
117
  Returns:
118
- - A Markdown report summarizing the video.
119
- - A gallery list of key frames (each as a tuple of (image, caption)).
120
  """
121
  summary_prompt = "Summarize this video."
122
  if user_query:
@@ -135,18 +159,28 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
135
 
136
  def gradio_interface(video_file, user_query: str) -> (str, list):
137
  """
138
- Gradio interface function that accepts an uploaded video file and an optional query,
139
- then returns a Markdown report and a gallery of key frame images with captions.
 
 
 
 
 
 
140
  """
141
- if not video_file:
142
  return "Please upload a valid video file.", []
 
 
143
  return analyze_video(video_file, user_query)
144
 
 
145
  iface = gr.Interface(
146
  fn=gradio_interface,
147
  inputs=[
148
  gr.Video(label="Upload Video File"),
149
- gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
 
150
  ],
151
  outputs=[
152
  gr.Markdown(label="Security & Surveillance Analysis Report"),
@@ -154,11 +188,11 @@ iface = gr.Interface(
154
  ],
155
  title="AI Video Analysis and Summariser Agent",
156
  description=(
157
- "This tool uses Google's Gemini 2.0 Flash model via AI Studio to analyze an uploaded video. "
158
  "It returns a brief summary and extracts key frames based on that summary. "
159
  "Provide a video file and, optionally, a query to guide the analysis."
160
  )
161
  )
162
 
163
  if __name__ == "__main__":
164
- iface.launch()
 
1
  import os
 
2
  import gradio as gr
3
  import cv2
4
  from google import genai
5
  from google.genai.types import Part
6
  from tenacity import retry, stop_after_attempt, wait_random_exponential
7
 
8
+ # Retrieve API key from environment variables
9
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
10
  if not GOOGLE_API_KEY:
11
  raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
12
 
13
+ # Initialize the Gemini API client
14
  client = genai.Client(api_key=GOOGLE_API_KEY)
15
 
16
+ # Define the model name
17
+ MODEL_NAME = "gemini-2.0-flash"
18
 
19
  @retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
20
  def call_gemini(video_file: str, prompt: str) -> str:
21
  """
22
+ Call the Gemini model with a video file and prompt.
23
+
24
+ Args:
25
+ video_file (str): Path to the video file
26
+ prompt (str): Text prompt to guide the analysis
27
+
28
+ Returns:
29
+ str: Response text from the Gemini API
30
  """
31
  with open(video_file, "rb") as f:
32
  file_bytes = f.read()
 
41
 
42
  def safe_call_gemini(video_file: str, prompt: str) -> str:
43
  """
44
+ Wrapper for call_gemini that catches exceptions and returns error messages.
45
+
46
+ Args:
47
+ video_file (str): Path to the video file
48
+ prompt (str): Text prompt for the API
49
+
50
+ Returns:
51
+ str: API response or error message
52
  """
53
  try:
54
  return call_gemini(video_file, prompt)
55
  except Exception as e:
56
+ error_msg = f"Gemini call failed: {str(e)}"
57
+ print(error_msg)
58
+ return error_msg
59
 
60
  def hhmmss_to_seconds(time_str: str) -> float:
61
  """
62
  Convert a HH:MM:SS formatted string into seconds.
63
+
64
+ Args:
65
+ time_str (str): Time string in HH:MM:SS format
66
+
67
+ Returns:
68
+ float: Time in seconds
69
  """
70
  parts = time_str.strip().split(":")
71
  parts = [float(p) for p in parts]
 
78
 
79
  def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
80
  """
81
+ Extract key frames from the video based on timestamps provided by Gemini.
82
+
83
+ Args:
84
+ video_file (str): Path to the video file
85
+ summary (str): Video summary to provide context
86
+ user_query (str): Optional user query to focus the analysis
87
 
88
+ Returns:
89
+ list: List of tuples (image_array, caption)
90
  """
91
  prompt = (
92
  "List the key timestamps in the video and a brief description of the event at that time. "
93
+ "Output one line per event in the format: HH:MM:SS - description. Do not include any extra text."
94
  )
95
  prompt += f" Video Summary: {summary}"
96
  if user_query:
97
  prompt += f" Focus on: {user_query}"
98
 
 
99
  key_frames_response = safe_call_gemini(video_file, prompt)
100
+ if "Gemini call failed" in key_frames_response:
101
+ return []
102
+
103
  lines = key_frames_response.strip().split("\n")
104
  key_frames = []
105
  for line in lines:
 
133
 
134
  def analyze_video(video_file: str, user_query: str) -> (str, list):
135
  """
136
+ Analyze the video and generate a summary and key frames.
137
+
138
+ Args:
139
+ video_file (str): Path to the video file
140
+ user_query (str): Optional query to guide the analysis
141
 
142
  Returns:
143
+ tuple: (Markdown report, list of key frames)
 
144
  """
145
  summary_prompt = "Summarize this video."
146
  if user_query:
 
159
 
160
  def gradio_interface(video_file, user_query: str) -> (str, list):
161
  """
162
+ Gradio interface function to process video and return results.
163
+
164
+ Args:
165
+ video_file (str): Path to the uploaded video file
166
+ user_query (str): Optional query to guide analysis
167
+
168
+ Returns:
169
+ tuple: (Markdown report, gallery of key frames)
170
  """
171
+ if not video_file or not os.path.exists(video_file):
172
  return "Please upload a valid video file.", []
173
+ if not video_file.lower().endswith('.mp4'):
174
+ return "Please upload an MP4 video file.", []
175
  return analyze_video(video_file, user_query)
176
 
177
+ # Define the Gradio interface
178
  iface = gr.Interface(
179
  fn=gradio_interface,
180
  inputs=[
181
  gr.Video(label="Upload Video File"),
182
+ gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis",
183
+ placeholder="e.g., focus on unusual movements near the entrance")
184
  ],
185
  outputs=[
186
  gr.Markdown(label="Security & Surveillance Analysis Report"),
 
188
  ],
189
  title="AI Video Analysis and Summariser Agent",
190
  description=(
191
+ "This tool uses Google's Gemini 2.0 Flash model to analyze an uploaded video. "
192
  "It returns a brief summary and extracts key frames based on that summary. "
193
  "Provide a video file and, optionally, a query to guide the analysis."
194
  )
195
  )
196
 
197
  if __name__ == "__main__":
198
+ iface.launch()