import gradio as gr
from ultralytics import YOLO
import cv2
from gtts import gTTS
import numpy as np
import tempfile
import os

# Load YOLOv8 model
model = YOLO("yolov8n.pt")  # Make sure the YOLOv8 model file is in the same directory

# Function to process the video frame and detect objects
def detect_objects(image):
    # Perform object detection
    results = model(image)
    annotated_frame = results[0].plot()  # Annotate the frame with bounding boxes
    
    # Extract detected object labels
    detected_objects = [model.names[int(box.cls)] for box in results[0].boxes]
    if detected_objects:
        objects_text = ", ".join(set(detected_objects))
        # Generate audio alert for detected objects
        tts = gTTS(f"Detected: {objects_text}", lang="en")
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_file.name)
        return annotated_frame, temp_file.name
    return annotated_frame, None

# Gradio Interface
def process_frame(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    annotated_frame, audio_file = detect_objects(image)
    if audio_file:
        return annotated_frame, audio_file
    else:
        return annotated_frame, None

# Gradio interface for real-time webcam feed
webcam = gr.Interface(
    fn=process_frame,
    inputs=gr.Image(source="webcam", tool="editor", type="numpy"),
    outputs=[
        gr.Image(label="Detected Objects"), 
        gr.Audio(label="Audio Alert (if any)")
    ],
    live=True,  # Enable live streaming from webcam
)

# Launch Gradio App
webcam.launch()