Spaces:

sakakuto
/

skyrels

Paused

App Files Files Community

sakakuto commited on Feb 18

Commit

ab9e6f4

1 Parent(s): 4800fce

Add application file

Browse files

Files changed (5) hide show

README.md +6 -5
app_i2v.py +104 -0
app_t2v.py +99 -0
requirements.txt +8 -0
video_model.py +12 -0

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: Skyrels
-emoji: 📉
-colorFrom: green
-colorTo: red
 sdk: gradio
-sdk_version: 5.16.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Test Video
+emoji: 🐨
+colorFrom: yellow
+colorTo: yellow
 sdk: gradio
+sdk_version: 5.9.1
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app_i2v.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import spaces
+import gradio as gr
+import time
+import torch
+import gc
+import tempfile
+from diffusers.utils import export_to_video, load_image
+from video_model import i2v_pipe
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def create_demo() -> gr.Blocks:
+    @spaces.GPU(duration=60)
+    def image_to_video(
+        image_path: str,
+        prompt: str,
+        negative_prompt: str,
+        width: int = 768,
+        height: int = 512,
+        num_frames: int = 121,
+        frame_rate: int = 25,
+        num_inference_steps: int = 30,
+        seed: int = 8,
+        progress=gr.Progress(),
+    ):
+        generator = torch.Generator(device=device).manual_seed(seed)
+        input_image = load_image(image_path)
+        run_task_time = 0
+        time_cost_str = ''
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        try:
+            with torch.no_grad():
+                video = i2v_pipe(
+                    image=input_image,
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    generator=generator,
+                    width=width,
+                    height=height,
+                    num_frames=num_frames,
+                    num_inference_steps=num_inference_steps,
+                ).frames[0]
+        finally:
+            torch.cuda.empty_cache()
+            gc.collect()
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        output_path = tempfile.mktemp(suffix=".mp4")
+        export_to_video(video, output_path, fps=frame_rate)
+        del video
+        torch.cuda.empty_cache()
+        return output_path, time_cost_str
+    def get_time_cost(run_task_time, time_cost_str):
+        now_time = int(time.time()*1000)
+        if run_task_time == 0:
+            time_cost_str = 'start'
+        else:
+            if time_cost_str != '':
+                time_cost_str += f'-->'
+            time_cost_str += f'{now_time - run_task_time}'
+        run_task_time = now_time
+        return run_task_time, time_cost_str
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column():
+                i2vid_image_path = gr.File(label="Input Image")
+                i2vid_prompt = gr.Textbox(
+                    label="Enter Your Prompt",
+                    placeholder="Describe the video you want to generate (minimum 50 characters)...",
+                    value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
+                    lines=5,
+                )
+                i2vid_negative_prompt = gr.Textbox(
+                    label="Enter Negative Prompt",
+                    placeholder="Describe what you don't want in the video...",
+                    value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
+                    lines=2,
+                )
+                i2vid_generate = gr.Button(
+                    "Generate Video",
+                    variant="primary",
+                    size="lg",
+                )
+            with gr.Column():
+                i2vid_output = gr.Video(label="Generated Output")
+                i2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
+        i2vid_generate.click(
+            fn=image_to_video,
+            inputs=[i2vid_image_path, i2vid_prompt, i2vid_negative_prompt],
+            outputs=[i2vid_output, i2vid_generated_cost],
+        )
+    return demo

app_t2v.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import spaces
+import gradio as gr
+import time
+import torch
+import gc
+import tempfile
+from diffusers.utils import export_to_video
+from video_model import t2v_pipe
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def create_demo() -> gr.Blocks:
+    @spaces.GPU(duration=60)
+    def text_to_video(
+        prompt: str,
+        negative_prompt: str,
+        width: int = 768,
+        height: int = 512,
+        num_frames: int = 121,
+        frame_rate: int = 25,
+        num_inference_steps: int = 30,
+        seed: int = 8,
+        progress: gr.Progress = gr.Progress(),
+    ):
+        generator = torch.Generator(device=device).manual_seed(seed)
+        run_task_time = 0
+        time_cost_str = ''
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        try:
+            with torch.no_grad():
+                video = t2v_pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    generator=generator,
+                    width=width,
+                    height=height,
+                    num_frames=num_frames,
+                    num_inference_steps=num_inference_steps,
+                ).frames[0]
+        finally:
+            torch.cuda.empty_cache()
+            gc.collect()
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        output_path = tempfile.mktemp(suffix=".mp4")
+        export_to_video(video, output_path, fps=frame_rate)
+        del video
+        torch.cuda.empty_cache()
+        return output_path, time_cost_str
+    def get_time_cost(run_task_time, time_cost_str):
+        now_time = int(time.time()*1000)
+        if run_task_time == 0:
+            time_cost_str = 'start'
+        else:
+            if time_cost_str != '':
+                time_cost_str += f'-->'
+            time_cost_str += f'{now_time - run_task_time}'
+        run_task_time = now_time
+        return run_task_time, time_cost_str
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column():
+                txt2vid_prompt = gr.Textbox(
+                    label="Enter Your Prompt",
+                    placeholder="Describe the video you want to generate (minimum 50 characters)...",
+                    value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
+                    lines=5,
+                )
+                txt2vid_negative_prompt = gr.Textbox(
+                    label="Enter Negative Prompt",
+                    placeholder="Describe what you don't want in the video...",
+                    value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
+                    lines=2,
+                )
+                txt2vid_generate = gr.Button(
+                    "Generate Video",
+                    variant="primary",
+                    size="lg",
+                )
+            with gr.Column():
+                txt2vid_output = gr.Video(label="Generated Output")
+                txt2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
+        txt2vid_generate.click(
+            fn=text_to_video,
+            inputs=[txt2vid_prompt, txt2vid_negative_prompt],
+            outputs=[txt2vid_output, txt2vid_generated_cost],
+        )
+    return demo

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+torch
+torchvision
+diffusers
+transformers
+accelerate
+mediapipe
+spaces

video_model.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import torch
+from diffusers import LTXPipeline, LTXImageToVideoPipeline
+device = "cuda" if torch.cuda.is_available() else "cpu"
+t2v_pipe = LTXPipeline.from_pretrained("Skywork/SkyReels-V1-Hunyuan-T2V", torch_dtype=torch.bfloat16)
+t2v_pipe.to(device)
+i2v_pipe = LTXImageToVideoPipeline.from_pipe(t2v_pipe)
+i2v_pipe.to(device)