File size: 3,666 Bytes
f8aaa9d
78aee58
f8aaa9d
 
78aee58
f8aaa9d
c137e5c
f8aaa9d
 
78aee58
f8aaa9d
c137e5c
f8aaa9d
78aee58
f8aaa9d
78aee58
d638712
78aee58
c137e5c
 
 
 
 
78aee58
d638712
 
78aee58
 
 
 
 
 
 
 
 
 
 
d638712
78aee58
001b623
78aee58
0f96bc2
78aee58
c137e5c
 
 
 
7c2c622
 
78aee58
f8aaa9d
78aee58
c137e5c
78aee58
c137e5c
78aee58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8aaa9d
c137e5c
f8aaa9d
78aee58
0f96bc2
b3e97a9
78aee58
 
0f96bc2
78aee58
 
f8aaa9d
78aee58
 
 
f8aaa9d
 
 
 
78aee58
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import time
import gradio as gr
from google import genai
from google.genai import types

# Retrieve API key from environment variables
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("Please set the GOOGLE_API_KEY environment variable with your Google Cloud API key.")

# Initialize the Gemini API client
client = genai.Client(api_key=GOOGLE_API_KEY)
MODEL_NAME = "gemini-2.5-pro-exp-03-25"  # Model from the notebook that supports video analysis

def upload_and_process_video(video_file: str) -> types.File:
    """
    Upload a video file to the Gemini API and wait for processing.
    
    Args:
        video_file (str): Path to the video file
    
    Returns:
        types.File: Processed video file object
    """
    try:
        video_file_obj = client.files.upload(file=video_file)
        while video_file_obj.state == "PROCESSING":
            print(f"Processing {video_file}...")
            time.sleep(10)
            video_file_obj = client.files.get(name=video_file_obj.name)
        
        if video_file_obj.state == "FAILED":
            raise ValueError(f"Video processing failed: {video_file_obj.state}")
        
        print(f"Video processing complete: {video_file_obj.uri}")
        return video_file_obj
    except Exception as e:
        raise Exception(f"Error uploading video: {str(e)}")

def analyze_video(video_file: str, user_query: str) -> str:
    """
    Analyze the video using the Gemini API and return a summary.
    
    Args:
        video_file (str): Path to the video file
        user_query (str): Optional query to guide the analysis
    
    Returns:
        str: Markdown-formatted report
    """
    # Validate input
    if not video_file or not os.path.exists(video_file):
        return "Please upload a valid video file."
    if not video_file.lower().endswith('.mp4'):
        return "Please upload an MP4 video file."

    try:
        # Upload and process the video
        video_file_obj = upload_and_process_video(video_file)

        # Prepare prompt
        prompt = "Provide a detailed summary of this video."
        if user_query:
            prompt += f" Focus on: {user_query}"

        # Analyze video with Gemini API
        response = client.models.generate_content(
            model=MODEL_NAME,
            contents=[
                video_file_obj,  # Pass the processed video file object
                prompt
            ]
        )
        summary = response.text

        # Generate Markdown report
        markdown_report = (
            "## Video Analysis Report\n\n"
            f"**Summary:**\n{summary}\n"
        )
        return markdown_report

    except Exception as e:
        error_msg = (
            "## Video Analysis Report\n\n"
            f"**Error:** Unable to analyze video.\n"
            f"Details: {str(e)}\n"
        )
        return error_msg

# Define the Gradio interface
iface = gr.Interface(
    fn=analyze_video,
    inputs=[
        gr.Video(label="Upload Video File (MP4)"),  # Removed type="filepath"
        gr.Textbox(label="Analysis Query (optional)", 
                  placeholder="e.g., focus on main events or themes")
    ],
    outputs=gr.Markdown(label="Video Analysis Report"),
    title="AI Video Analysis Agent with Gemini",
    description=(
        "Upload an MP4 video to get a summary using Google's Gemini API. "
        "This tool analyzes the video content directly without audio or frame extraction. "
        "Optionally, provide a query to guide the analysis."
    )
)

if __name__ == "__main__":
    # Launch with share=True to create a public link
    iface.launch(share=True)