practisebook's picture
Update app.py
509cf5b verified
import cv2
import random
from ultralytics import YOLO
from gtts import gTTS
from datetime import datetime, timedelta
import gradio as gr
# Load YOLOv8 model
yolo = YOLO("yolov8n.pt")
# Audio alert settings
alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"}
last_alert_time = {}
alert_cooldown = timedelta(seconds=10)
# Create audio alert as downloadable file
def generate_audio_alert(label, position):
phrases = [
f"Be careful, there's a {label} on your {position}.",
f"Watch out! {label} detected on your {position}.",
f"Alert! A {label} is on your {position}.",
]
caution_note = random.choice(phrases)
# Save audio alert as an MP3 file
temp_file_path = f"audio_alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
tts = gTTS(caution_note)
tts.save(temp_file_path)
return temp_file_path
# Process a single frame
def process_frame(image, enable_audio):
results = yolo(image)
result = results[0]
detected_objects = {}
audio_files = []
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
label = result.names[int(box.cls[0])]
if enable_audio and label in alert_categories:
frame_center_x = image.shape[1] // 2
obj_center_x = (x1 + x2) // 2
position = "left" if obj_center_x < frame_center_x else "right"
detected_objects[label] = position
current_time = datetime.now()
if (
label not in last_alert_time
or current_time - last_alert_time[label] > alert_cooldown
):
audio_file = generate_audio_alert(label, position)
audio_files.append(audio_file)
last_alert_time[label] = current_time
# Draw bounding boxes
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return image, audio_files
# Gradio interface function
def object_detection_webcam(enable_audio):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
return "Error: Unable to access the camera."
while True:
ret, frame = cap.read()
if not ret:
return "Error: Unable to read from camera."
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
processed_frame, audio_files = process_frame(frame, enable_audio)
yield {"image": processed_frame, "audio": audio_files}
cap.release()
# Gradio UI
def gradio_app():
return gr.Interface(
fn=object_detection_webcam,
inputs=[gr.Checkbox(label="Enable Audio Alerts", value=False)],
outputs=[
gr.Image(label="Processed Frame"),
gr.File(label="Audio Alerts"),
],
live=True,
)
gradio_app().launch()