TRELLIS_TextTo3D_Try2

Running on Zero

App Files Files Community

dkatz2391 commited on 8 days ago

Commit

ef798fd

verified ·

1 Parent(s): ab1a5ae

gemini inside cursor state change

Browse files

Files changed (1) hide show

app.py +151 -41

app.py CHANGED Viewed

@@ -39,7 +39,8 @@ def start_session(req: gr.Request):
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    shutil.rmtree(user_dir)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
@@ -68,15 +69,16 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
         opacity_bias=state['gaussian']['opacity_bias'],
         scaling_activation=state['gaussian']['scaling_activation'],
     )
-    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
-    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
-    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
-    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
-    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
     mesh = edict(
-        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
-        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
     )
     return gs, mesh
@@ -98,9 +100,9 @@ def text_to_3d(
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     req: gr.Request,
-) -> Tuple[dict, str]:
     """
-    Convert an text prompt to a 3D model.
     Args:
         prompt (str): The text prompt.
         seed (int): The random seed.
@@ -109,11 +111,14 @@ def text_to_3d(
         slat_guidance_strength (float): The guidance strength for structured latent generation.
         slat_sampling_steps (int): The number of sampling steps for structured latent generation.
     Returns:
-        dict: The information of the generated 3D model.
-        str: The path to the video of the 3D model.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
     outputs = pipeline.run(
         prompt,
         seed=seed,
@@ -127,19 +132,58 @@ def text_to_3d(
             "cfg_strength": slat_guidance_strength,
         },
     )
-    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
-    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
-    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
-    video_path = os.path.join(user_dir, 'sample.mp4')
-    imageio.mimsave(video_path, video, fps=15)
     # Create the state object and ensure it's JSON serializable for API calls
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     # Convert to serializable format
     serializable_state = json.loads(json.dumps(state, cls=NumpyEncoder))
     torch.cuda.empty_cache()
-    return serializable_state, video_path
 @spaces.GPU(duration=90)
@@ -150,50 +194,76 @@ def extract_glb(
     req: gr.Request,
 ) -> Tuple[str, str]:
     """
-    Extract a GLB file from the 3D model.
     Args:
         state (dict): The state of the generated 3D model.
         mesh_simplify (float): The mesh simplification factor.
         texture_size (int): The texture resolution.
     Returns:
-        str: The path to the extracted GLB file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
     gs, mesh = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = os.path.join(user_dir, 'sample.glb')
     glb.export(glb_path)
     torch.cuda.empty_cache()
     return glb_path, glb_path
 @spaces.GPU
 def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     """
-    Extract a Gaussian file from the 3D model.
     Args:
         state (dict): The state of the generated 3D model.
     Returns:
-        str: The path to the extracted Gaussian file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
     gs, _ = unpack_state(state)
     gaussian_path = os.path.join(user_dir, 'sample.ply')
     gs.save_ply(gaussian_path)
     torch.cuda.empty_cache()
     return gaussian_path, gaussian_path
-output_buf = gr.State()
-video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## Text to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
     * Type a text prompt and click "Generate" to create a 3D asset.
-    * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
     """)
     with gr.Row():
@@ -219,6 +289,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
             with gr.Row():
                 extract_glb_btn = gr.Button("Extract GLB", interactive=False)
                 extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
             gr.Markdown("""
@@ -226,63 +297,102 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                         """)
         with gr.Column():
-            video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
             model_output = gr.Model3D(label="Extracted GLB/Gaussian", height=300)
             with gr.Row():
                 download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
                 download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
     output_buf = gr.State()
-    # Handlers
     demo.load(start_session)
     demo.unload(end_session)
     generate_btn.click(
         get_seed,
         inputs=[randomize_seed, seed],
         outputs=[seed],
     ).then(
         text_to_3d,
         inputs=[text_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
-        outputs=[output_buf, video_output],
     ).then(
-        lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
         outputs=[extract_glb_btn, extract_gs_btn],
     )
-    video_output.clear(
         lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
         outputs=[extract_glb_btn, extract_gs_btn],
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
-        outputs=[model_output, download_glb],
     ).then(
-        lambda: gr.Button(interactive=True),
         outputs=[download_glb],
     )
     extract_gs_btn.click(
         extract_gaussian,
         inputs=[output_buf],
-        outputs=[model_output, download_gs],
     ).then(
-        lambda: gr.Button(interactive=True),
         outputs=[download_gs],
     )
     model_output.clear(
-        lambda: gr.Button(interactive=False),
-        outputs=[download_glb],
     )
-# Launch the Gradio app
 if __name__ == "__main__":
-    pipeline = TrellisTextTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-text-xlarge")
-    pipeline.cuda()
-    demo.launch()

 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    # Use shutil.rmtree with ignore_errors=True for robustness
+    shutil.rmtree(user_dir, ignore_errors=True)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
         opacity_bias=state['gaussian']['opacity_bias'],
         scaling_activation=state['gaussian']['scaling_activation'],
     )
+    # Ensure tensors are created on the correct device ('cuda')
+    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda', dtype=torch.float32)
+    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda', dtype=torch.float32)
+    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda', dtype=torch.float32)
+    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda', dtype=torch.float32)
+    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda', dtype=torch.float32)
     mesh = edict(
+        vertices=torch.tensor(state['mesh']['vertices'], device='cuda', dtype=torch.float32),
+        faces=torch.tensor(state['mesh']['faces'], device='cuda', dtype=torch.int64), # Faces are usually integers
     )
     return gs, mesh
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     req: gr.Request,
+) -> dict: # MODIFIED: Now returns only the state dict
     """
+    Convert a text prompt to a 3D model state object.
     Args:
         prompt (str): The text prompt.
         seed (int): The random seed.
         slat_guidance_strength (float): The guidance strength for structured latent generation.
         slat_sampling_steps (int): The number of sampling steps for structured latent generation.
     Returns:
+        dict: The JSON-serializable state object containing the generated 3D model info.
     """
+    # Ensure user directory exists (redundant if start_session is always called, but safe)
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True)
+    print(f"[{req.session_hash}] Running text_to_3d for prompt: {prompt}") # Add logging
     outputs = pipeline.run(
         prompt,
         seed=seed,
             "cfg_strength": slat_guidance_strength,
         },
     )
+    # REMOVED: Video rendering logic moved to render_preview_video
+    # video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+    # video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+    # video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+    # video_path = os.path.join(user_dir, 'sample.mp4')
+    # imageio.mimsave(video_path, video, fps=15)
     # Create the state object and ensure it's JSON serializable for API calls
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     # Convert to serializable format
     serializable_state = json.loads(json.dumps(state, cls=NumpyEncoder))
+    print(f"[{req.session_hash}] text_to_3d completed. Returning state.") # Add logging
     torch.cuda.empty_cache()
+    return serializable_state # MODIFIED: Return only state
+# --- NEW FUNCTION ---
+@spaces.GPU
+def render_preview_video(state: dict, req: gr.Request) -> str:
+    """
+    Renders a preview video from the provided state object.
+    Args:
+        state (dict): The state object containing Gaussian and mesh data.
+        req (gr.Request): Gradio request object for session hash.
+    Returns:
+        str: The path to the rendered video file.
+    """
+    if not state:
+        print(f"[{req.session_hash}] render_preview_video called with empty state. Returning None.")
+        # Consider returning a placeholder or raising an error if state is required
+        return None
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True) # Ensure directory exists
+    print(f"[{req.session_hash}] Unpacking state for video rendering.") # Add logging
+    gs, mesh = unpack_state(state)
+    print(f"[{req.session_hash}] Rendering video...") # Add logging
+    video = render_utils.render_video(gs, num_frames=120)['color']
+    video_geo = render_utils.render_video(mesh, num_frames=120)['normal']
+    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+    video_path = os.path.join(user_dir, 'preview_sample.mp4') # Use a distinct name
+    print(f"[{req.session_hash}] Saving video to {video_path}") # Add logging
+    imageio.mimsave(video_path, video, fps=15)
+    torch.cuda.empty_cache()
+    return video_path
+# --- END NEW FUNCTION ---
 @spaces.GPU(duration=90)
     req: gr.Request,
 ) -> Tuple[str, str]:
     """
+    Extract a GLB file from the 3D model state.
     Args:
         state (dict): The state of the generated 3D model.
         mesh_simplify (float): The mesh simplification factor.
         texture_size (int): The texture resolution.
     Returns:
+        str: The path to the extracted GLB file (for Model3D component).
+        str: The path to the extracted GLB file (for DownloadButton).
     """
+    if not state:
+       print(f"[{req.session_hash}] extract_glb called with empty state. Returning None.")
+       return None, None # Return Nones if state is missing
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
+    print(f"[{req.session_hash}] Unpacking state for GLB extraction.") # Add logging
     gs, mesh = unpack_state(state)
+    print(f"[{req.session_hash}] Extracting GLB (simplify={mesh_simplify}, texture={texture_size})...") # Add logging
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = os.path.join(user_dir, 'sample.glb')
+    print(f"[{req.session_hash}] Saving GLB to {glb_path}") # Add logging
     glb.export(glb_path)
     torch.cuda.empty_cache()
+    # Return the same path for both Model3D and DownloadButton components
     return glb_path, glb_path
 @spaces.GPU
 def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     """
+    Extract a Gaussian PLY file from the 3D model state.
     Args:
         state (dict): The state of the generated 3D model.
     Returns:
+        str: The path to the extracted Gaussian file (for Model3D component).
+        str: The path to the extracted Gaussian file (for DownloadButton).
     """
+    if not state:
+       print(f"[{req.session_hash}] extract_gaussian called with empty state. Returning None.")
+       return None, None # Return Nones if state is missing
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
+    print(f"[{req.session_hash}] Unpacking state for Gaussian extraction.") # Add logging
     gs, _ = unpack_state(state)
     gaussian_path = os.path.join(user_dir, 'sample.ply')
+    print(f"[{req.session_hash}] Saving Gaussian PLY to {gaussian_path}") # Add logging
     gs.save_ply(gaussian_path)
     torch.cuda.empty_cache()
+    # Return the same path for both Model3D and DownloadButton components
     return gaussian_path, gaussian_path
+# State object to hold the generated model info between steps
+output_buf = gr.State()
+# Video component placeholder (will be populated by render_preview_video)
+# video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300) # Defined later inside the Blocks
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## Text to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
     * Type a text prompt and click "Generate" to create a 3D asset.
+    * The preview video will appear after generation.
+    * If you find the generated 3D asset satisfactory, click "Extract GLB" or "Extract Gaussian" to extract the file and download it.
     """)
     with gr.Row():
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
             with gr.Row():
+                # Buttons start non-interactive, enabled after generation
                 extract_glb_btn = gr.Button("Extract GLB", interactive=False)
                 extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
             gr.Markdown("""
                         """)
         with gr.Column():
+            # Define UI components here
+            video_output = gr.Video(label="Generated 3D Asset Preview", autoplay=True, loop=True, height=300)
             model_output = gr.Model3D(label="Extracted GLB/Gaussian", height=300)
             with gr.Row():
+                 # Buttons start non-interactive, enabled after extraction
                 download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
                 download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
+    # Define the state buffer here, outside the component definitions but inside the Blocks scope
     output_buf = gr.State()
+    # --- Handlers ---
     demo.load(start_session)
     demo.unload(end_session)
+    # --- MODIFIED UI CHAIN ---
+    # 1. Get Seed
+    # 2. Run text_to_3d -> outputs state to output_buf
+    # 3. Run render_preview_video (using state from output_buf) -> outputs video to video_output
+    # 4. Enable extraction buttons
     generate_btn.click(
         get_seed,
         inputs=[randomize_seed, seed],
         outputs=[seed],
+        queue=True # Use queue for potentially long-running steps
     ).then(
         text_to_3d,
         inputs=[text_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
+        outputs=[output_buf], # text_to_3d now ONLY outputs state
+        api_name="text_to_3d" # Keep API name consistent if needed
     ).then(
+        render_preview_video, # NEW step: Render video from state
+        inputs=[output_buf],
+        outputs=[video_output],
+        api_name="render_preview_video" # Assign API name if you want to call this separately
+    ).then(
+        lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]), # Enable extraction buttons
         outputs=[extract_glb_btn, extract_gs_btn],
     )
+    # Clear video and disable extraction buttons if prompt is cleared or generation restarted
+    # (Consider adding logic to clear prompt on successful generation if desired)
+    text_prompt.change( # Example: Clear video if prompt changes
+         lambda: (None, gr.Button(interactive=False), gr.Button(interactive=False)),
+         outputs=[video_output, extract_glb_btn, extract_gs_btn]
+    )
+    video_output.clear( # This might be redundant if text_prompt.change handles it
         lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
         outputs=[extract_glb_btn, extract_gs_btn],
     )
+    # --- Extraction Handlers ---
+    # GLB Extraction: Takes state from output_buf, outputs model and download path
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
+        outputs=[model_output, download_glb], # Outputs to Model3D and DownloadButton path
+        api_name="extract_glb"
     ).then(
+        lambda: gr.Button(interactive=True), # Enable download button
         outputs=[download_glb],
     )
+    # Gaussian Extraction: Takes state from output_buf, outputs model and download path
     extract_gs_btn.click(
         extract_gaussian,
         inputs=[output_buf],
+        outputs=[model_output, download_gs], # Outputs to Model3D and DownloadButton path
+        api_name="extract_gaussian"
     ).then(
+        lambda: gr.Button(interactive=True), # Enable download button
         outputs=[download_gs],
     )
+    # Clear model and disable download buttons if video/state is cleared
     model_output.clear(
+        lambda: (gr.Button(interactive=False), gr.Button(interactive=False)),
+        outputs=[download_glb, download_gs], # Disable both download buttons
     )
+# --- Launch the Gradio app ---
 if __name__ == "__main__":
+    print("Loading Trellis pipeline...")
+    # Consider adding error handling for pipeline loading
+    try:
+        pipeline = TrellisTextTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-text-xlarge")
+        pipeline.cuda()
+        print("Pipeline loaded successfully.")
+    except Exception as e:
+        print(f"Error loading pipeline: {e}")
+        # Optionally exit or provide a fallback UI
+        sys.exit(1)
+    print("Launching Gradio demo...")
+    # Enable queue for handling multiple users/requests
+    # Set share=True if you need a public link (requires login for private spaces)
+    demo.queue().launch()