Spaces:
Sleeping
Sleeping
File size: 2,860 Bytes
3ee5b52 a225f19 3ee5b52 a225f19 509cf5b a225f19 509cf5b a225f19 509cf5b a225f19 04a2bd6 a225f19 509cf5b a225f19 509cf5b a225f19 509cf5b a225f19 04a2bd6 a225f19 3ee5b52 a225f19 509cf5b 3ee5b52 a225f19 3ee5b52 509cf5b a225f19 509cf5b a225f19 3ee5b52 a225f19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import cv2
import random
from ultralytics import YOLO
from gtts import gTTS
from datetime import datetime, timedelta
import gradio as gr
# Load YOLOv8 model
yolo = YOLO("yolov8n.pt")
# Audio alert settings
alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"}
last_alert_time = {}
alert_cooldown = timedelta(seconds=10)
# Create audio alert as downloadable file
def generate_audio_alert(label, position):
phrases = [
f"Be careful, there's a {label} on your {position}.",
f"Watch out! {label} detected on your {position}.",
f"Alert! A {label} is on your {position}.",
]
caution_note = random.choice(phrases)
# Save audio alert as an MP3 file
temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
tts = gTTS(caution_note)
tts.save(temp_file_path)
return temp_file_path
# Process a single frame
def process_frame(image, enable_audio):
results = yolo(image)
result = results[0]
detected_objects = {}
audio_files = []
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
label = result.names[int(box.cls[0])]
if enable_audio and label in alert_categories:
frame_center_x = image.shape[1] // 2
obj_center_x = (x1 + x2) // 2
position = "left" if obj_center_x < frame_center_x else "right"
detected_objects[label] = position
current_time = datetime.now()
if (
label not in last_alert_time
or current_time - last_alert_time[label] > alert_cooldown
):
audio_file = generate_audio_alert(label, position)
audio_files.append(audio_file)
last_alert_time[label] = current_time
# Draw bounding boxes
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return image, audio_files
# Gradio interface function
def object_detection_webcam(enable_audio):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
return "Error: Unable to access the camera."
while True:
ret, frame = cap.read()
if not ret:
return "Error: Unable to read from camera."
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
processed_frame, audio_files = process_frame(frame, enable_audio)
yield {"image": processed_frame, "audio": audio_files}
cap.release()
# Gradio UI
def gradio_app():
return gr.Interface(
fn=object_detection_webcam,
inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)],
outputs=[
gr.Image(label="Processed Frame"),
gr.File(label="Audio Alerts"),
],
live=True,
)
gradio_app().launch()
|