Spaces:

cwhuh
/

ponix-generator

Running on Zero

File size: 6,406 Bytes

import gradio as gr
import numpy as np
import random
import spaces
import torch
from diffusers import  DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images

from llm_wrapper import run_gemini
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
import subprocess


subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)


dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=dtype).to(device)
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, vae=taef1).to(device)

# PONIX mode load
pipe.load_lora_weights('cwhuh/ponix-generator-v0.1.0', weight_name='pytorch_lora_weights.safetensors')
embedding_path = hf_hub_download(repo_id='cwhuh/ponix-generator-v0.1.0', filename='./ponix-generator-v0.1.0_emb.safetensors', repo_type="model")
state_dict = load_file(embedding_path)
pipe.load_textual_inversion(state_dict["clip_l"], token=["<s0>", "<s1>", "<s2>"], text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer)

torch.cuda.empty_cache()

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe)

@spaces.GPU(duration=50)
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)

    refined_prompt = run_gemini(
        target_prompt=prompt,
        prompt_in_path="prompt.json",
    )
    print(f"Refined prompt: {refined_prompt}")
    
    for img in pipe.flux_pipe_call_that_returns_an_iterable_of_images(
            prompt=refined_prompt,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            width=width,
            height=height,
            generator=generator,
            output_type="pil",
            good_vae=good_vae,
        ):
            yield img, seed
    
examples = [
    "기계공학과(로켓) 포닉스",
    "바이올린을 연주하는 포닉스",
    "물리학을 연구하는 포닉스",
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 580px;
}

.footer {
    text-align: center;
    margin-top: 20px;
    font-size: 0.8em;
    color: #666;
}
"""

with gr.Blocks(css=css, theme="soft") as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""# 🌊 [POSTECH] PONIX Generator
[[based on FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] 
        """)
        
        with gr.Group():
            gr.Markdown("""
### 🔍 사용 가이드
- 생성하고 싶은 이미지를 한글로 간단하게 작성해주세요.
- 이미지는 노이즈에서 점차적으로 생성됩니다. (40~50초 소요)
- 문의는 이메일로 부탁드립니다: [email protected]
            """)
        
        with gr.Group():
            prompt = gr.Text(
                label="프롬프트 입력",
                max_lines=1,
                placeholder="원하는 포닉스 이미지를 한글로 설명해주세요",
                container=True,
            )
            
            run_button = gr.Button("🚀 생성하기", variant="primary")
        
        result = gr.Image(label="생성된 이미지")
        
        with gr.Accordion("🛠️ 고급 설정", open=False):
            with gr.Group():
                use_prompt_refinement = gr.Checkbox(
                    label="프롬프트 자동 개선", 
                    value=True,
                    info="AI가 입력한 프롬프트를 자동으로 개선합니다."
                )
                
                with gr.Row():
                    seed = gr.Slider(
                        label="시드 값",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=0,
                    )
                    
                    randomize_seed = gr.Checkbox(label="랜덤 시드 사용", value=True)
                
                with gr.Row():
                    width = gr.Slider(
                        label="너비",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                    
                    height = gr.Slider(
                        label="높이",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="가이던스 스케일",
                        minimum=1,
                        maximum=15,
                        step=0.1,
                        value=3.5,
                    )
      
                    num_inference_steps = gr.Slider(
                        label="추론 단계 수",
                        minimum=1,
                        maximum=50,
                        step=1,
                        value=28,
                    )
        
        gr.Markdown("### 예시 프롬프트")
        gr.Examples(
            examples = examples,
            fn = infer,
            inputs = [prompt],
            outputs = [result, seed],
            cache_examples="lazy"
        )
        
        gr.HTML("""
        <div class="footer">
            PONIX Generator by 허채원 | POSTECH
        </div>
        """)

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn = infer,
        inputs = [prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs = [result, seed]
    )

demo.launch()