Spaces:

Kazel
/

collarvision

Sleeping

App Files Files Community

collarvision / app.py

Kazel

Update app.py

5e6f79d verified about 1 month ago

raw

history blame contribute delete

1.85 kB

	import gradio as gr
	import cv2
	import threading
	import torch
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import spaces

	# Initialize the webcam
	cap = cv2.VideoCapture(0)

	# Load the Hugging Face model and processor
	processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
	@spaces.GPU
	def query_the_image(query: str, image_data: bytes):
	try:
	image = Image.open(io.BytesIO(image_data)).convert("RGB")
	inputs = processor(image, query, return_tensors="pt").to(model.device)
	output = model.generate(**inputs)
	answer = processor.decode(output[0], skip_special_tokens=True)
	return answer
	except Exception as e:
	return f"Error: {e}"
	@spaces.GPU
	def get_frame():
	ret, frame = cap.read()
	if not ret:
	return None
	_, buffer = cv2.imencode('.jpg', frame)
	return buffer.tobytes()
	@spaces.GPU
	def process_image(prompt):
	frame_data = get_frame()
	if frame_data:
	return query_the_image(prompt, frame_data)
	return "Error capturing image"
	@spaces.GPU
	def video_feed():
	while True:
	ret, frame = cap.read()
	if ret:
	yield cv2.imencode('.jpg', frame)[1].tobytes()
	else:
	break

	gui = gr.Blocks()
	with gui:
	gr.Markdown("# Live Video AI Assistant")
	with gr.Row():
	video_component = gr.Video()
	threading.Thread(target=video_feed, daemon=True).start()
	prompt = gr.Textbox(label="Enter your safety policy for the AI to analyse each frame in real time")
	response = gr.Textbox(label="AI Response")
	btn = gr.Button("Ask")
	btn.click(process_image, inputs=prompt, outputs=response)

	gui.launch()