ShoeGenv2 / app.py
MaxMilan1
kjvb
0bbe8f6
raw
history blame
1.92 kB
import spaces
import gradio as gr
import torch
from diffusers import DiffusionPipeline, AutoencoderKL
import rembg
from io import BytesIO
import PIL.Image as Image
import cv2
import numpy
model_id = "dataautogpt3/OpenDalleV1.1"
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(model_id,
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16")
pipe.to("cuda")
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_image(prompt, neg_prompt):
prompt += "no background, side view, minimalist shot"
image_bytes = pipe(prompt, negative_prompt=neg_prompt)
print(image_bytes)
pil_image = Image.open(BytesIO(image_bytes))
opencvImage = cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)
cv2_image = rembg.remove(opencvImage)
return pil_image, cv2_image
_TITLE = "Shoe Generator"
with gr.Blocks(_TITLE) as ShoeGen:
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Enter a discription of a shoe")
neg_prompt = gr.Textbox(label="Enter a negative prompt", value="low quality, watermark, ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, closed eyes, text, logo")
button_gen = gr.Button("Generate Image")
with gr.Column():
image = gr.Image(label="Generated Image")
image2 = gr.Image(label="Generated Image without background", show_download_button=True)
button_gen.click(generate_image, inputs=[prompt], outputs=[image, image2])
ShoeGen.launch()