codelion commited on
Commit
03c6357
·
verified ·
1 Parent(s): 8f4cdd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import gradio as gr
4
  import cv2
5
  from google import genai
6
- from google.genai import types
7
  from google.genai.types import Part
8
  from tenacity import retry, stop_after_attempt, wait_random_exponential
9
 
@@ -22,7 +21,8 @@ MODEL_NAME = "gemini-2.0-flash-001"
22
  def call_gemini(video_file: str, prompt: str) -> str:
23
  """
24
  Call the Gemini model with the provided video file and prompt.
25
- The video file is read as bytes and passed with MIME type "video/mp4".
 
26
  """
27
  with open(video_file, "rb") as f:
28
  file_bytes = f.read()
@@ -30,7 +30,7 @@ def call_gemini(video_file: str, prompt: str) -> str:
30
  model=MODEL_NAME,
31
  contents=[
32
  Part(file_data=file_bytes, mime_type="video/mp4"),
33
- prompt,
34
  ],
35
  )
36
  return response.text
@@ -67,11 +67,11 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
67
 
68
  try:
69
  key_frames_response = call_gemini(video_file, prompt)
70
- # Attempt to parse the output as JSON.
71
  key_frames = json.loads(key_frames_response)
72
  if not isinstance(key_frames, list):
73
  key_frames = []
74
  except Exception as e:
 
75
  key_frames = []
76
 
77
  extracted_frames = []
@@ -87,11 +87,9 @@ def get_key_frames(video_file: str, analysis: str, user_query: str) -> list:
87
  seconds = hhmmss_to_seconds(ts)
88
  except Exception:
89
  continue
90
- # Set video position (in milliseconds)
91
  cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
92
  ret, frame = cap.read()
93
  if ret:
94
- # Convert BGR to RGB for proper display
95
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
96
  caption = f"{ts}: {description}"
97
  extracted_frames.append((frame_rgb, caption))
@@ -136,10 +134,7 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
136
  analysis += f"\n[Error during iteration {i+1}: {e}]"
137
  break
138
 
139
- # Create a Markdown report
140
  markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
141
-
142
- # Get key frames based on the analysis and optional query.
143
  key_frames_gallery = get_key_frames(video_file, analysis, user_query)
144
  if not key_frames_gallery:
145
  markdown_report += "\n*No key frames were extracted.*\n"
@@ -162,7 +157,7 @@ def gradio_interface(video_file, user_query: str) -> (str, list):
162
  iface = gr.Interface(
163
  fn=gradio_interface,
164
  inputs=[
165
- gr.Video(label="Upload Video File"), # Removed the 'type' parameter.
166
  gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
167
  ],
168
  outputs=[
 
3
  import gradio as gr
4
  import cv2
5
  from google import genai
 
6
  from google.genai.types import Part
7
  from tenacity import retry, stop_after_attempt, wait_random_exponential
8
 
 
21
  def call_gemini(video_file: str, prompt: str) -> str:
22
  """
23
  Call the Gemini model with the provided video file and prompt.
24
+ The video file is read as bytes and passed with MIME type "video/mp4",
25
+ and the prompt is wrapped as text.
26
  """
27
  with open(video_file, "rb") as f:
28
  file_bytes = f.read()
 
30
  model=MODEL_NAME,
31
  contents=[
32
  Part(file_data=file_bytes, mime_type="video/mp4"),
33
+ Part(text=prompt)
34
  ],
35
  )
36
  return response.text
 
67
 
68
  try:
69
  key_frames_response = call_gemini(video_file, prompt)
 
70
  key_frames = json.loads(key_frames_response)
71
  if not isinstance(key_frames, list):
72
  key_frames = []
73
  except Exception as e:
74
+ print("Error in key frame extraction:", e)
75
  key_frames = []
76
 
77
  extracted_frames = []
 
87
  seconds = hhmmss_to_seconds(ts)
88
  except Exception:
89
  continue
 
90
  cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
91
  ret, frame = cap.read()
92
  if ret:
 
93
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
94
  caption = f"{ts}: {description}"
95
  extracted_frames.append((frame_rgb, caption))
 
134
  analysis += f"\n[Error during iteration {i+1}: {e}]"
135
  break
136
 
 
137
  markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
 
 
138
  key_frames_gallery = get_key_frames(video_file, analysis, user_query)
139
  if not key_frames_gallery:
140
  markdown_report += "\n*No key frames were extracted.*\n"
 
157
  iface = gr.Interface(
158
  fn=gradio_interface,
159
  inputs=[
160
+ gr.Video(label="Upload Video File"),
161
  gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
162
  ],
163
  outputs=[