File size: 2,729 Bytes
0cef02f
 
2d6fc22
0cef02f
9b1ccc9
80a5ad2
2d6fc22
0cef02f
9b1ccc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d6fc22
9cbc798
 
2d6fc22
 
 
 
 
 
9cbc798
2d6fc22
 
 
9332803
f1e3f4b
 
9332803
f1e3f4b
 
9332803
f1e3f4b
 
9332803
 
 
 
 
 
9b1ccc9
2ec7210
80a5ad2
9332803
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import cv2
import torch
import numpy as np
from diffusers import StableDiffusionPipeline,AutoPipelineForImage2Image,AutoencoderTiny
from transformers import AutoProcessor, AutoModel, AutoTokenizer
from PIL import Image

# 
# 
# def predict(prompt, frame):
#     generator = torch.manual_seed(params.seed)
#     steps = params.steps
#     strength = params.strength
#     if int(steps * strength) < 1:
#         steps = math.ceil(1 / max(0.10, strength))
# 
#     prompt = params.prompt
#     prompt_embeds = None
#     
#     results = self.pipe(
#         image=frame,
#         prompt_embeds=prompt_embeds,
#         prompt=prompt,
#         negative_prompt=params.negative_prompt,
#         generator=generator,
#         strength=strength,
#         num_inference_steps=steps,
#         guidance_scale=1.1,
#         width=params.width,
#         height=params.height,
#         output_type="pil",
#         )
# 
#     nsfw_content_detected = (
#         results.nsfw_content_detected[0]
#         if "nsfw_content_detected" in results
#         else False
#     )
#     if nsfw_content_detected:
#         return None
#     result_image = results.images[0]
# 
#     return result_image
# 
# def process_frame(frame, prompt="A futuristic landscape"):
#     """Process a single frame using the real-time latent consistency model."""
#     
#     # Convert frame to PIL image
#     image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((512, 512))
#     
#     # Apply Real-Time Latent Consistency Model
#     result = realtime_pipe(prompt=prompt, image=image, strength=0.5, guidance_scale=7.5).images[0]
#     return np.array(result)

def video_stream(prompt):
    """Captures video feed from webcam and sends to the AI model."""
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame = process_frame(frame, prompt)
        yield frame  # Return processed frame
    cap.release()


# Create Gradio App
with gr.Blocks() as demo:
    gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)")
    
    with gr.Row():
        webcam_feed = gr.Camera(streaming=True, label="Live Webcam")
        processed_image = gr.Image(label="AI-Enhanced Webcam Feed")
    
    with gr.Row():
        canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output")
    
    prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape")
    start_button = gr.Button("Start Real-Time AI Enhancement")
    
    #start_button.click(fn=video_stream, inputs=[prompt_input], outputs=[processed_image, canvas_output])

demo.launch(share=True)