videoanalysis / app.py
codelion's picture
Create app.py
f8aaa9d verified
raw
history blame
3.1 kB
import os
import gradio as gr
from google import genai
from google.genai import types
from google.genai.types import Part
from tenacity import retry, stop_after_attempt, wait_random_exponential
# Retrieve API key from environment variable
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
# Initialize the Gemini API client via AI Studio
client = genai.Client(api_key=GOOGLE_API_KEY)
# Use the Gemini 2.0 Flash model as required
MODEL_NAME = "gemini-2.0-flash-001"
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
def call_gemini(video_url: str, prompt: str) -> str:
"""
Call the Gemini model with the provided video URL and prompt.
The video is referenced by its URI (expecting a publicly accessible URL) and passed as a Part.
"""
response = client.models.generate_content(
model=MODEL_NAME,
contents=[
Part.from_uri(file_uri=video_url, mime_type="video/webm"),
prompt,
],
)
return response.text
def analyze_video(video_url: str) -> str:
"""
Perform iterative, agentic video analysis.
In each iteration, the Gemini model refines its analysis based on previous output.
"""
analysis = ""
num_iterations = 3
for i in range(num_iterations):
if i == 0:
prompt = (
"You are a video analysis agent focusing on security and surveillance. "
"Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies."
)
else:
prompt = (
f"Based on the previous analysis: \"{analysis}\". "
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
"and any details that may help a security team understand the situation better."
)
try:
analysis = call_gemini(video_url, prompt)
except Exception as e:
analysis += f"\n[Error during iteration {i+1}: {e}]"
break # Exit if an error occurs
return analysis
def gradio_interface(video_url: str) -> str:
"""
Gradio interface function that takes a video URL and returns the analysis.
"""
if not video_url:
return "Please provide a valid video URL."
return analyze_video(video_url)
# Define and launch the Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
outputs=gr.Textbox(label="Security & Surveillance Analysis"),
title="AI Video Analysis and Summariser Agent",
description=(
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
"to iteratively analyze a video for security and surveillance insights. It makes repeated "
"LLM calls to refine its analysis of the video content."
)
)
if __name__ == "__main__":
iface.launch()