Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from google import genai | |
from google.genai import types | |
from google.genai.types import Part | |
from tenacity import retry, stop_after_attempt, wait_random_exponential | |
# Retrieve API key from environment variable | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.") | |
# Initialize the Gemini API client via AI Studio | |
client = genai.Client(api_key=GOOGLE_API_KEY) | |
# Use the Gemini 2.0 Flash model as required | |
MODEL_NAME = "gemini-2.0-flash-001" | |
def call_gemini(video_url: str, prompt: str) -> str: | |
""" | |
Call the Gemini model with the provided video URL and prompt. | |
The video is referenced by its URI (expecting a publicly accessible URL) and passed as a Part. | |
""" | |
response = client.models.generate_content( | |
model=MODEL_NAME, | |
contents=[ | |
Part.from_uri(file_uri=video_url, mime_type="video/webm"), | |
prompt, | |
], | |
) | |
return response.text | |
def analyze_video(video_url: str) -> str: | |
""" | |
Perform iterative, agentic video analysis. | |
In each iteration, the Gemini model refines its analysis based on previous output. | |
""" | |
analysis = "" | |
num_iterations = 3 | |
for i in range(num_iterations): | |
if i == 0: | |
prompt = ( | |
"You are a video analysis agent focusing on security and surveillance. " | |
"Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies." | |
) | |
else: | |
prompt = ( | |
f"Based on the previous analysis: \"{analysis}\". " | |
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, " | |
"and any details that may help a security team understand the situation better." | |
) | |
try: | |
analysis = call_gemini(video_url, prompt) | |
except Exception as e: | |
analysis += f"\n[Error during iteration {i+1}: {e}]" | |
break # Exit if an error occurs | |
return analysis | |
def gradio_interface(video_url: str) -> str: | |
""" | |
Gradio interface function that takes a video URL and returns the analysis. | |
""" | |
if not video_url: | |
return "Please provide a valid video URL." | |
return analyze_video(video_url) | |
# Define and launch the Gradio interface | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"), | |
outputs=gr.Textbox(label="Security & Surveillance Analysis"), | |
title="AI Video Analysis and Summariser Agent", | |
description=( | |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio " | |
"to iteratively analyze a video for security and surveillance insights. It makes repeated " | |
"LLM calls to refine its analysis of the video content." | |
) | |
) | |
if __name__ == "__main__": | |
iface.launch() | |