Spaces:
Sleeping
Sleeping
File size: 3,100 Bytes
f8aaa9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import os
import gradio as gr
from google import genai
from google.genai import types
from google.genai.types import Part
from tenacity import retry, stop_after_attempt, wait_random_exponential
# Retrieve API key from environment variable
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
# Initialize the Gemini API client via AI Studio
client = genai.Client(api_key=GOOGLE_API_KEY)
# Use the Gemini 2.0 Flash model as required
MODEL_NAME = "gemini-2.0-flash-001"
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
def call_gemini(video_url: str, prompt: str) -> str:
"""
Call the Gemini model with the provided video URL and prompt.
The video is referenced by its URI (expecting a publicly accessible URL) and passed as a Part.
"""
response = client.models.generate_content(
model=MODEL_NAME,
contents=[
Part.from_uri(file_uri=video_url, mime_type="video/webm"),
prompt,
],
)
return response.text
def analyze_video(video_url: str) -> str:
"""
Perform iterative, agentic video analysis.
In each iteration, the Gemini model refines its analysis based on previous output.
"""
analysis = ""
num_iterations = 3
for i in range(num_iterations):
if i == 0:
prompt = (
"You are a video analysis agent focusing on security and surveillance. "
"Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies."
)
else:
prompt = (
f"Based on the previous analysis: \"{analysis}\". "
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
"and any details that may help a security team understand the situation better."
)
try:
analysis = call_gemini(video_url, prompt)
except Exception as e:
analysis += f"\n[Error during iteration {i+1}: {e}]"
break # Exit if an error occurs
return analysis
def gradio_interface(video_url: str) -> str:
"""
Gradio interface function that takes a video URL and returns the analysis.
"""
if not video_url:
return "Please provide a valid video URL."
return analyze_video(video_url)
# Define and launch the Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
outputs=gr.Textbox(label="Security & Surveillance Analysis"),
title="AI Video Analysis and Summariser Agent",
description=(
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
"to iteratively analyze a video for security and surveillance insights. It makes repeated "
"LLM calls to refine its analysis of the video content."
)
)
if __name__ == "__main__":
iface.launch()
|