File size: 2,860 Bytes
3ee5b52
a225f19
 
 
 
 
 
3ee5b52
a225f19
 
 
 
 
 
 
509cf5b
 
a225f19
 
 
 
 
 
 
509cf5b
 
a225f19
 
509cf5b
a225f19
 
 
 
 
04a2bd6
a225f19
509cf5b
 
a225f19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509cf5b
 
a225f19
 
 
 
 
 
509cf5b
a225f19
 
 
 
 
 
 
 
04a2bd6
 
a225f19
3ee5b52
a225f19
509cf5b
 
 
3ee5b52
a225f19
3ee5b52
509cf5b
a225f19
 
 
 
509cf5b
 
 
 
a225f19
 
3ee5b52
a225f19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import cv2
import random
from ultralytics import YOLO
from gtts import gTTS
from datetime import datetime, timedelta
import gradio as gr

# Load YOLOv8 model
yolo = YOLO("yolov8n.pt")

# Audio alert settings
alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"}
last_alert_time = {}
alert_cooldown = timedelta(seconds=10)

# Create audio alert as downloadable file
def generate_audio_alert(label, position):
    phrases = [
        f"Be careful, there's a {label} on your {position}.",
        f"Watch out! {label} detected on your {position}.",
        f"Alert! A {label} is on your {position}.",
    ]
    caution_note = random.choice(phrases)

    # Save audio alert as an MP3 file
    temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
    tts = gTTS(caution_note)
    tts.save(temp_file_path)
    return temp_file_path

# Process a single frame
def process_frame(image, enable_audio):
    results = yolo(image)
    result = results[0]

    detected_objects = {}
    audio_files = []

    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        label = result.names[int(box.cls[0])]

        if enable_audio and label in alert_categories:
            frame_center_x = image.shape[1] // 2
            obj_center_x = (x1 + x2) // 2
            position = "left" if obj_center_x < frame_center_x else "right"

            detected_objects[label] = position

            current_time = datetime.now()
            if (
                label not in last_alert_time
                or current_time - last_alert_time[label] > alert_cooldown
            ):
                audio_file = generate_audio_alert(label, position)
                audio_files.append(audio_file)
                last_alert_time[label] = current_time

        # Draw bounding boxes
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return image, audio_files

# Gradio interface function
def object_detection_webcam(enable_audio):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        return "Error: Unable to access the camera."

    while True:
        ret, frame = cap.read()
        if not ret:
            return "Error: Unable to read from camera."

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        processed_frame, audio_files = process_frame(frame, enable_audio)

        yield {"image": processed_frame, "audio": audio_files}

    cap.release()

# Gradio UI
def gradio_app():
    return gr.Interface(
        fn=object_detection_webcam,
        inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)],
        outputs=[
            gr.Image(label="Processed Frame"),
            gr.File(label="Audio Alerts"),
        ],
        live=True,
    )

gradio_app().launch()