import streamlit as st from ultralytics import YOLO import cv2 from gtts import gTTS import os import tempfile # Load YOLOv8 model @st.cache_resource def load_model(): return YOLO('yolov8n.pt') # Automatically downloads YOLOv8 pre-trained model model = load_model() # Streamlit app title st.title("Real-Time Object Detection for Blind Assistance") st.write("This application detects objects in real-time from a webcam feed and provides audio feedback.") # Start detection button start_detection = st.button("Start Real-Time Detection") if start_detection: # Open webcam st.write("Starting webcam... Press 'q' to stop.") cap = cv2.VideoCapture(0) # 0 for default webcam, change if multiple webcams are connected # Check if webcam is opened if not cap.isOpened(): st.write("Error: Could not open webcam.") else: stframe = st.empty() # Placeholder for video frames while cap.isOpened(): ret, frame = cap.read() if not ret: break # Perform object detection results = model(frame) # Get detected object names detected_objects = [model.names[int(box.cls)] for box in results[0].boxes] # Generate audio feedback if detected_objects: objects_text = ", ".join(set(detected_objects)) summary_text = f"Detected: {objects_text}." st.write(summary_text) # Display detected objects in Streamlit # Convert text to speech with gTTS tts = gTTS(text=summary_text, lang='en') audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(audio_file.name) os.system(f"mpg123 {audio_file.name}") # Play the audio summary # Annotate frame with bounding boxes annotated_frame = results[0].plot() # Display the annotated frame in Streamlit stframe.image(annotated_frame, channels="BGR", use_column_width=True) # Break the loop on 'q' key press if cv2.waitKey(1) & 0xFF == ord('q'): break # Release webcam and close OpenCV windows cap.release() cv2.destroyAllWindows() st.write("Real-Time Detection Stopped.")