Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,26 +1,28 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
|
|
|
|
3 |
from google import genai
|
4 |
from google.genai import types
|
5 |
from google.genai.types import Part
|
6 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
7 |
|
8 |
-
# Retrieve API key from environment
|
9 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
10 |
if not GOOGLE_API_KEY:
|
11 |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
|
12 |
|
13 |
-
# Initialize the Gemini API client via AI Studio
|
14 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
15 |
|
16 |
-
# Use the Gemini 2.0 Flash model
|
17 |
MODEL_NAME = "gemini-2.0-flash-001"
|
18 |
|
19 |
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
|
20 |
def call_gemini(video_url: str, prompt: str) -> str:
|
21 |
"""
|
22 |
Call the Gemini model with the provided video URL and prompt.
|
23 |
-
The video is
|
24 |
"""
|
25 |
response = client.models.generate_content(
|
26 |
model=MODEL_NAME,
|
@@ -31,51 +33,88 @@ def call_gemini(video_url: str, prompt: str) -> str:
|
|
31 |
)
|
32 |
return response.text
|
33 |
|
34 |
-
def
|
35 |
"""
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
analysis = ""
|
40 |
num_iterations = 3
|
41 |
|
42 |
for i in range(num_iterations):
|
|
|
|
|
|
|
|
|
43 |
if i == 0:
|
44 |
-
prompt =
|
45 |
-
"You are a video analysis agent focusing on security and surveillance. "
|
46 |
-
"Provide a detailed summary of the video, highlighting any key events, suspicious activities, or anomalies."
|
47 |
-
)
|
48 |
else:
|
49 |
-
prompt = (
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
54 |
try:
|
55 |
analysis = call_gemini(video_url, prompt)
|
56 |
except Exception as e:
|
57 |
analysis += f"\n[Error during iteration {i+1}: {e}]"
|
58 |
-
break
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
def gradio_interface(video_url: str) -> str:
|
62 |
"""
|
63 |
-
Gradio interface function that takes a video URL and
|
|
|
64 |
"""
|
65 |
if not video_url:
|
66 |
-
return "Please provide a valid video URL."
|
67 |
-
return analyze_video(video_url)
|
68 |
|
69 |
-
# Define
|
70 |
iface = gr.Interface(
|
71 |
fn=gradio_interface,
|
72 |
-
inputs=
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
title="AI Video Analysis and Summariser Agent",
|
75 |
description=(
|
76 |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
|
77 |
-
"to iteratively analyze a video for security and surveillance insights.
|
78 |
-
"
|
|
|
79 |
)
|
80 |
)
|
81 |
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from collections import Counter
|
5 |
from google import genai
|
6 |
from google.genai import types
|
7 |
from google.genai.types import Part
|
8 |
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
9 |
|
10 |
+
# Retrieve API key from environment variables.
|
11 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
12 |
if not GOOGLE_API_KEY:
|
13 |
raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
|
14 |
|
15 |
+
# Initialize the Gemini API client via AI Studio using the API key.
|
16 |
client = genai.Client(api_key=GOOGLE_API_KEY)
|
17 |
|
18 |
+
# Use the Gemini 2.0 Flash model.
|
19 |
MODEL_NAME = "gemini-2.0-flash-001"
|
20 |
|
21 |
@retry(wait=wait_random_exponential(multiplier=1, max=60), stop=stop_after_attempt(3))
|
22 |
def call_gemini(video_url: str, prompt: str) -> str:
|
23 |
"""
|
24 |
Call the Gemini model with the provided video URL and prompt.
|
25 |
+
The video URL is passed as a URI part with MIME type "video/webm".
|
26 |
"""
|
27 |
response = client.models.generate_content(
|
28 |
model=MODEL_NAME,
|
|
|
33 |
)
|
34 |
return response.text
|
35 |
|
36 |
+
def generate_chart(analysis_text: str) -> plt.Figure:
|
37 |
"""
|
38 |
+
Create a simple bar chart based on the frequency of selected keywords in the analysis.
|
39 |
+
"""
|
40 |
+
# Define keywords of interest
|
41 |
+
keywords = ["suspicious", "anomaly", "incident", "alert", "object", "movement"]
|
42 |
+
# Lowercase the analysis text and split into words
|
43 |
+
words = analysis_text.lower().split()
|
44 |
+
# Count occurrences for each keyword
|
45 |
+
counter = Counter({kw: words.count(kw) for kw in keywords})
|
46 |
+
|
47 |
+
# Create a bar chart using matplotlib
|
48 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
49 |
+
ax.bar(counter.keys(), counter.values(), color="skyblue")
|
50 |
+
ax.set_title("Keyword Frequency in Analysis")
|
51 |
+
ax.set_ylabel("Count")
|
52 |
+
ax.set_xlabel("Keyword")
|
53 |
+
plt.tight_layout()
|
54 |
+
return fig
|
55 |
+
|
56 |
+
def analyze_video(video_url: str, user_query: str) -> (str, plt.Figure):
|
57 |
+
"""
|
58 |
+
Perform iterative (agentic) video analysis.
|
59 |
+
The analysis is refined over several iterations, incorporating the user query if provided.
|
60 |
+
Returns a Markdown report and a matplotlib chart.
|
61 |
"""
|
62 |
analysis = ""
|
63 |
num_iterations = 3
|
64 |
|
65 |
for i in range(num_iterations):
|
66 |
+
base_prompt = "You are a video analysis agent focusing on security and surveillance. Provide a detailed summary of the video, highlighting key events, suspicious activities, or anomalies."
|
67 |
+
if user_query:
|
68 |
+
base_prompt += f" Also, focus on the following query: {user_query}"
|
69 |
+
|
70 |
if i == 0:
|
71 |
+
prompt = base_prompt
|
|
|
|
|
|
|
72 |
else:
|
73 |
+
prompt = (f"Based on the previous analysis: \"{analysis}\". "
|
74 |
+
"Provide further elaboration and refined insights, focusing on potential security threats, anomalous events, "
|
75 |
+
"and details that would help a security team understand the situation better. ")
|
76 |
+
if user_query:
|
77 |
+
prompt += f"Remember to focus on: {user_query}"
|
78 |
+
|
79 |
try:
|
80 |
analysis = call_gemini(video_url, prompt)
|
81 |
except Exception as e:
|
82 |
analysis += f"\n[Error during iteration {i+1}: {e}]"
|
83 |
+
break
|
84 |
+
|
85 |
+
# Create a Markdown report (adding headings and bullet points if desired)
|
86 |
+
markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{analysis}\n"
|
87 |
+
|
88 |
+
# Generate a chart visualization based on the analysis text.
|
89 |
+
chart_fig = generate_chart(analysis)
|
90 |
+
return markdown_report, chart_fig
|
91 |
|
92 |
+
def gradio_interface(video_url: str, user_query: str) -> (str, any):
|
93 |
"""
|
94 |
+
Gradio interface function that takes a video URL and an optional query,
|
95 |
+
then returns a Markdown report and a visualization chart.
|
96 |
"""
|
97 |
if not video_url:
|
98 |
+
return "Please provide a valid video URL.", None
|
99 |
+
return analyze_video(video_url, user_query)
|
100 |
|
101 |
+
# Define the Gradio interface with two inputs and two outputs.
|
102 |
iface = gr.Interface(
|
103 |
fn=gradio_interface,
|
104 |
+
inputs=[
|
105 |
+
gr.Textbox(label="Video URL (publicly accessible, e.g., YouTube link)"),
|
106 |
+
gr.Textbox(label="Analysis Query (optional): guide the focus of the analysis", placeholder="e.g., focus on unusual movements near the entrance")
|
107 |
+
],
|
108 |
+
outputs=[
|
109 |
+
gr.Markdown(label="Security & Surveillance Analysis Report"),
|
110 |
+
gr.Plot(label="Visualization: Keyword Frequency")
|
111 |
+
],
|
112 |
title="AI Video Analysis and Summariser Agent",
|
113 |
description=(
|
114 |
"This agentic video analysis tool uses Google's Gemini 2.0 Flash model via AI Studio "
|
115 |
+
"to iteratively analyze a video for security and surveillance insights. Provide a video URL and, optionally, "
|
116 |
+
"a query to guide the analysis. The tool returns a detailed Markdown report along with a bar chart visualization "
|
117 |
+
"of keyword frequency."
|
118 |
)
|
119 |
)
|
120 |
|