File size: 3,100 Bytes
f8aaa9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import gradio as gr
from google import genai
from google.genai import types
from google.genai.types import Part
from tenacity import retry, stop_after_attempt, wait_random_exponential

# Retrieve API key from environment variable
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("Please set the GOOGLE_API_KEY environment variable.")

# Initialize the Gemini API client via AI Studio
client = genai.Client(api_key=GOOGLE_API_KEY)

# Use the Gemini 2.0 Flash model as required
MODEL_NAME = "gemini-2.0-flash-001"

@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
def call_gemini(video_url: str, prompt: str) -> str:
    """
    Call the Gemini model with the provided video URL and prompt.
    The video is referenced by its URI (expecting a publicly accessible URL) and passed as a Part.
    """
    response = client.models.generate_content(
        model=MODEL_NAME,
        contents=[
            Part.from_uri(file_uri=video_url, mime_type="video/webm"),
            prompt,
        ],
    )
    return response.text

def analyze_video(video_url: str) -> str:
    """
    Perform iterative, agentic video analysis.
    In each iteration, the Gemini model refines its analysis based on previous output.
    """
    analysis = ""
    num_iterations = 3

    for i in range(num_iterations):
        if i == 0:
            prompt = (
                "You are a video analysis agent focusing on security and surveillance. "
                "Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies."
            )
        else:
            prompt = (
                f"Based on the previous analysis: \"{analysis}\". "
                "Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
                "and any details that may help a security team understand the situation better."
            )
        try:
            analysis = call_gemini(video_url, prompt)
        except Exception as e:
            analysis += f"\n[Error during iteration {i+1}: {e}]"
            break  # Exit if an error occurs
    return analysis

def gradio_interface(video_url: str) -> str:
    """
    Gradio interface function that takes a video URL and returns the analysis.
    """
    if not video_url:
        return "Please provide a valid video URL."
    return analyze_video(video_url)

# Define and launch the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
    outputs=gr.Textbox(label="Security & Surveillance Analysis"),
    title="AI Video Analysis and Summariser Agent",
    description=(
        "This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
        "to iteratively analyze a video for security and surveillance insights. It makes repeated "
        "LLM calls to refine its analysis of the video content."
    )
)

if __name__ == "__main__":
    iface.launch()