codelion commited on
Commit
80f741f
·
verified ·
1 Parent(s): d38e256

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -39
app.py CHANGED
@@ -56,49 +56,65 @@ def hhmmss_to_seconds(timestamp: str) -> float:
56
  Returns:
57
  float: Time in seconds
58
  """
59
- h, m, s = map(float, timestamp.split(":"))
60
- return h * 3600 + m * 60 + s
 
 
 
61
 
62
- def extract_key_frames(video_file: str, key_frames_json: str) -> list:
63
  """
64
- Extract key frames from the video based on JSON data.
65
 
66
  Args:
67
  video_file (str): Path to the video file
68
- key_frames_json (str): JSON string with key frames data
69
 
70
  Returns:
71
  list: List of tuples (image, caption)
72
  """
 
 
 
 
 
 
73
  try:
74
- key_frames = json.loads(key_frames_json)
 
75
  if not isinstance(key_frames, list):
76
- raise ValueError("Key frames data must be a list of objects.")
77
-
78
- extracted_frames = []
79
- cap = cv2.VideoCapture(video_file)
80
- if not cap.isOpened():
81
- raise ValueError("Could not open video file.")
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- for frame in key_frames:
84
- timestamp = frame.get("timecode", frame.get("timestamp", ""))
85
- title = frame.get("title", frame.get("caption", "Untitled"))
86
- if not timestamp:
87
- continue
88
-
89
- seconds = hhmmss_to_seconds(timestamp)
90
- cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
91
- ret, frame_img = cap.read()
92
- if ret:
93
- frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
94
- caption = f"{timestamp}: {title}"
95
- extracted_frames.append((frame_rgb, caption))
96
 
97
- cap.release()
98
- return extracted_frames
99
- except Exception as e:
100
- print(f"Error extracting frames: {str(e)}")
101
- return []
 
 
 
 
102
 
103
  def analyze_video(video_file: str, user_query: str) -> tuple[str, list]:
104
  """
@@ -132,10 +148,11 @@ def analyze_video(video_file: str, user_query: str) -> tuple[str, list]:
132
  )
133
  summary = summary_response.text
134
 
135
- # Step 2: Extract key frames in an agentic loop
136
  key_frames_prompt = (
137
  "Identify key frames in this video and return them as a JSON array. "
138
- "Each object should have 'timecode' (in HH:MM:SS format) and 'title' describing the scene."
 
139
  )
140
  if user_query:
141
  key_frames_prompt += f" Focus on: {user_query}"
@@ -144,23 +161,19 @@ def analyze_video(video_file: str, user_query: str) -> tuple[str, list]:
144
  model=MODEL_NAME,
145
  contents=[video_file_obj, key_frames_prompt]
146
  )
147
- key_frames_json = key_frames_response.text
148
-
149
- # Parse and extract frames
150
- key_frames = extract_key_frames(video_file, key_frames_json)
151
 
152
- # Generate Markdown report
153
  markdown_report = (
154
  "## Video Analysis Report\n\n"
155
  f"**Summary:**\n{summary}\n"
156
- f"**Video URI:** {video_file_obj.uri}\n"
157
  )
158
  if key_frames:
159
  markdown_report += "\n**Key Frames Identified:**\n"
160
  for i, (_, caption) in enumerate(key_frames, 1):
161
  markdown_report += f"- Frame {i}: {caption}\n"
162
  else:
163
- markdown_report += "\n*No key frames extracted.*\n"
164
 
165
  return markdown_report, key_frames
166
 
 
56
  Returns:
57
  float: Time in seconds
58
  """
59
+ try:
60
+ h, m, s = map(float, timestamp.split(":"))
61
+ return h * 3600 + m * 60 + s
62
+ except ValueError:
63
+ return 0.0 # Default to 0 if parsing fails
64
 
65
+ def extract_key_frames(video_file: str, key_frames_response: str) -> list:
66
  """
67
+ Extract key frames from the video based on Gemini API response.
68
 
69
  Args:
70
  video_file (str): Path to the video file
71
+ key_frames_response (str): Raw response from Gemini API
72
 
73
  Returns:
74
  list: List of tuples (image, caption)
75
  """
76
+ extracted_frames = []
77
+ cap = cv2.VideoCapture(video_file)
78
+ if not cap.isOpened():
79
+ print("Error: Could not open video file.")
80
+ return extracted_frames
81
+
82
  try:
83
+ # Try parsing as JSON
84
+ key_frames = json.loads(key_frames_response)
85
  if not isinstance(key_frames, list):
86
+ raise ValueError("Response is not a list.")
87
+ except json.JSONDecodeError as e:
88
+ print(f"JSON parsing failed: {str(e)}. Raw response: {key_frames_response}")
89
+ # Fallback: Attempt to parse plain text with timecodes (e.g., "00:00:03 - Scene")
90
+ key_frames = []
91
+ lines = key_frames_response.strip().split("\n")
92
+ for line in lines:
93
+ if " - " in line:
94
+ timestamp, title = line.split(" - ", 1)
95
+ key_frames.append({"timecode": timestamp.strip(), "title": title.strip()})
96
+ elif ":" in line and len(line.split(":")) == 3: # Rough check for HH:MM:SS
97
+ key_frames.append({"timecode": line.strip(), "title": "Untitled"})
98
+
99
+ for frame in key_frames:
100
+ timestamp = frame.get("timecode", frame.get("timestamp", ""))
101
+ title = frame.get("title", frame.get("caption", "Untitled"))
102
+ if not timestamp:
103
+ continue
104
 
105
+ seconds = hhmmss_to_seconds(timestamp)
106
+ if seconds == 0.0: # Skip invalid timestamps
107
+ continue
 
 
 
 
 
 
 
 
 
 
108
 
109
+ cap.set(cv2.CAP_PROP_POS_MSEC, seconds * 1000)
110
+ ret, frame_img = cap.read()
111
+ if ret:
112
+ frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
113
+ caption = f"{timestamp}: {title}"
114
+ extracted_frames.append((frame_rgb, caption))
115
+
116
+ cap.release()
117
+ return extracted_frames
118
 
119
  def analyze_video(video_file: str, user_query: str) -> tuple[str, list]:
120
  """
 
148
  )
149
  summary = summary_response.text
150
 
151
+ # Step 2: Extract key frames
152
  key_frames_prompt = (
153
  "Identify key frames in this video and return them as a JSON array. "
154
+ "Each object must have 'timecode' (in HH:MM:SS format) and 'title' describing the scene. "
155
+ "Ensure the response is valid JSON."
156
  )
157
  if user_query:
158
  key_frames_prompt += f" Focus on: {user_query}"
 
161
  model=MODEL_NAME,
162
  contents=[video_file_obj, key_frames_prompt]
163
  )
164
+ key_frames = extract_key_frames(video_file, key_frames_response.text)
 
 
 
165
 
166
+ # Generate Markdown report (without Video URI)
167
  markdown_report = (
168
  "## Video Analysis Report\n\n"
169
  f"**Summary:**\n{summary}\n"
 
170
  )
171
  if key_frames:
172
  markdown_report += "\n**Key Frames Identified:**\n"
173
  for i, (_, caption) in enumerate(key_frames, 1):
174
  markdown_report += f"- Frame {i}: {caption}\n"
175
  else:
176
+ markdown_report += "\n*No key frames extracted. The model may not have returned valid JSON.*\n"
177
 
178
  return markdown_report, key_frames
179