Spaces:

YiftachEde
/

Sharp-It

Running on Zero

App Files Files Community

YiftachEde commited on Mar 2

Commit

b266eca

1 Parent(s): 776d5b3

fix

Browse files

Files changed (1) hide show

app.py +173 -126

app.py CHANGED Viewed

@@ -237,62 +237,78 @@ class ShapERenderer:
         print("Shap-E models initialized!")
     def ensure_models_loaded(self):
-        if self.model is None:
-            self.xm = load_model('transmitter', device=self.device)
-            self.model = load_model('text300M', device=self.device)
-            self.diffusion = diffusion_from_config(load_config('diffusion'))
     def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
-        self.ensure_models_loaded()
-        # Generate latents using the text-to-3D model
-        batch_size = 1
-        guidance_scale = float(guidance_scale)
-        latents = sample_latents(
-            batch_size=batch_size,
-            model=self.model,
-            diffusion=self.diffusion,
-            guidance_scale=guidance_scale,
-            model_kwargs=dict(texts=[prompt] * batch_size),
-            progress=True,
-            clip_denoised=True,
-            use_fp16=True,
-            use_karras=True,
-            karras_steps=num_steps,
-            sigma_min=1e-3,
-            sigma_max=160,
-            s_churn=0,
-        )
-        # Render the 6 views we need with specific viewing angles
-        size = 320  # Size of each rendered image
-        images = []
-        # Define our 6 specific camera positions to match refine.py
-        azimuths = [30, 90, 150, 210, 270, 330]
-        elevations = [20, -10, 20, -10, 20, -10]
-        for i, (azimuth, elevation) in enumerate(zip(azimuths, elevations)):
-            cameras = create_custom_cameras(size, self.device, azimuths=[azimuth], elevations=[elevation], fov_degrees=30, distance=3.0)
-            rendered_image = decode_latent_images(
-                self.xm,
-                latents[0],
-                cameras=cameras,
-                rendering_mode='stf'
-            )
-            images.append(rendered_image[0])
-        # Convert images to uint8
-        images = [np.array(image) for image in images]
-        # Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
-        layout = np.zeros((960, 640, 3), dtype=np.uint8)
-        for i, img in enumerate(images):
-            row = i // 2  # Now 3 images per row
-            col = i % 2   # Now 3 images per row
-            layout[row*320:(row+1)*320, col*320:(col+1)*320] = img
-        return Image.fromarray(layout), images
 class RefinerInterface:
     def __init__(self):
@@ -304,70 +320,88 @@ class RefinerInterface:
     def ensure_models_loaded(self):
         if self.pipeline is None:
-            self.pipeline, self.model, self.infer_config = load_models()
     def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
         """Main refinement function"""
-        self.ensure_models_loaded()
-        # Process image and get refined output
-        input_image = Image.fromarray(input_image)
-        # Rotate the layout if needed (if we're getting a 640x960 layout but pipeline expects 960x640)
-        if input_image.width == 960 and input_image.height == 640:
-            # Transpose the image to get 960x640 layout
-            input_array = np.array(input_image)
-            new_layout = np.zeros((960, 640, 3), dtype=np.uint8)
-            # Rearrange from 2x3 to 3x2
             for i in range(6):
-                src_row = i // 3
-                src_col = i % 3
                 dst_row = i // 2
                 dst_col = i % 2
-                new_layout[dst_row*320:(dst_row+1)*320, dst_col*320:(dst_col+1)*320] = \
-                    input_array[src_row*320:(src_row+1)*320, src_col*320:(src_col+1)*320]
-            input_image = Image.fromarray(new_layout)
-        # Process with the pipeline (expects 960x640)
-        refined_output_960x640 = self.pipeline.refine(
-            input_image,
-            prompt=prompt,
-            num_inference_steps=int(steps),
-            guidance_scale=guidance_scale
-        ).images[0]
-        # Generate mesh using the 960x640 format
-        vertices, faces, vertex_colors = create_mesh(
-            refined_output_960x640,
-            self.model,
-            self.infer_config
-        )
-        # Save temporary mesh file
-        os.makedirs("temp", exist_ok=True)
-        temp_obj = os.path.join("temp", "refined_mesh.obj")
-        save_obj(vertices, faces, vertex_colors, temp_obj)
-        # Convert the output to 640x960 for display
-        refined_array = np.array(refined_output_960x640)
-        display_layout = np.zeros((960, 640, 3), dtype=np.uint8)
-        # Rearrange from 3x2 to 2x3
-        for i in range(6):
-            src_row = i // 2
-            src_col = i % 2
-            dst_row = i // 2
-            dst_col = i % 2
-            display_layout[dst_row*320:(dst_row+1)*320, dst_col*320:(dst_col+1)*320] = \
-                refined_array[src_row*320:(src_row+1)*320, src_col*320:(src_col+1)*320]
-        refined_output_640x960 = Image.fromarray(display_layout)
-        return refined_output_640x960, temp_obj
 def create_demo():
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -420,19 +454,20 @@ def create_demo():
                     label="Refinement Guidance Scale"
                 )
                 refine_btn = gr.Button("Refine")
         # Second row: Image panels side by side
         with gr.Row():
             # Outputs - Images side by side
             shape_output = gr.Image(
                 label="Generated Views",
-                width=640,  # Swapped dimensions
-                height=960   # Swapped dimensions
             )
             refined_output = gr.Image(
                 label="Refined Output",
-                width=640,  # Swapped dimensions
-                height=960   # Swapped dimensions
             )
         # Third row: 3D mesh panel below
@@ -441,37 +476,49 @@ def create_demo():
             mesh_output = gr.Model3D(
                 label="3D Mesh",
                 clear_color=[1.0, 1.0, 1.0, 1.0],
-                # width=1280,  # Full width
-                # height=600   # Taller for better visualization
             )
         # Set up event handlers
-        @spaces.GPU(duration=60)
         def generate(prompt, guidance_scale, num_steps):
-            with torch.no_grad():
-                layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
-            return layout
-        @spaces.GPU(duration=60)
         def refine(input_image, prompt, steps, guidance_scale):
-            refined_img, mesh_path = refiner.refine_model(
-                input_image,
-                prompt,
-                steps,
-                guidance_scale
-            )
-            return refined_img, mesh_path
         generate_btn.click(
             fn=generate,
             inputs=[shape_prompt, shape_guidance, shape_steps],
-            outputs=[shape_output]
         )
         refine_btn.click(
             fn=refine,
             inputs=[shape_output, refine_prompt, refine_steps, refine_guidance],
-            outputs=[refined_output, mesh_output]
         )
     return demo

         print("Shap-E models initialized!")
     def ensure_models_loaded(self):
+        if self.xm is None:
+            try:
+                torch.cuda.empty_cache()  # Clear GPU memory before loading
+                self.xm = load_model('transmitter', device=self.device)
+                self.model = load_model('text300M', device=self.device)
+                self.diffusion = diffusion_from_config(load_config('diffusion'))
+            except Exception as e:
+                print(f"Error loading models: {e}")
+                raise
     def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
+        try:
+            self.ensure_models_loaded()
+            torch.cuda.empty_cache()  # Clear GPU memory before generation
+            # Generate latents using the text-to-3D model
+            batch_size = 1
+            guidance_scale = float(guidance_scale)
+            with torch.cuda.amp.autocast():  # Use automatic mixed precision
+                latents = sample_latents(
+                    batch_size=batch_size,
+                    model=self.model,
+                    diffusion=self.diffusion,
+                    guidance_scale=guidance_scale,
+                    model_kwargs=dict(texts=[prompt] * batch_size),
+                    progress=True,
+                    clip_denoised=True,
+                    use_fp16=True,
+                    use_karras=True,
+                    karras_steps=num_steps,
+                    sigma_min=1e-3,
+                    sigma_max=160,
+                    s_churn=0,
+                )
+            # Render the 6 views we need with specific viewing angles
+            size = 320  # Size of each rendered image
+            images = []
+            # Define our 6 specific camera positions to match refine.py
+            azimuths = [30, 90, 150, 210, 270, 330]
+            elevations = [20, -10, 20, -10, 20, -10]
+            for i, (azimuth, elevation) in enumerate(zip(azimuths, elevations)):
+                cameras = create_custom_cameras(size, self.device, azimuths=[azimuth], elevations=[elevation], fov_degrees=30, distance=3.0)
+                with torch.cuda.amp.autocast():  # Use automatic mixed precision
+                    rendered_image = decode_latent_images(
+                        self.xm,
+                        latents[0],
+                        cameras=cameras,
+                        rendering_mode='stf'
+                    )
+                images.append(rendered_image[0])
+                torch.cuda.empty_cache()  # Clear GPU memory after each view
+            # Convert images to uint8
+            images = [np.array(image) for image in images]
+            # Create 2x3 grid layout (640x960)
+            layout = np.zeros((960, 640, 3), dtype=np.uint8)
+            for i, img in enumerate(images):
+                row = i // 2
+                col = i % 2
+                layout[row*320:(row+1)*320, col*320:(col+1)*320] = img
+            return Image.fromarray(layout), images
+        except Exception as e:
+            print(f"Error in generate_views: {e}")
+            torch.cuda.empty_cache()  # Clear GPU memory on error
+            raise
 class RefinerInterface:
     def __init__(self):
     def ensure_models_loaded(self):
         if self.pipeline is None:
+            try:
+                torch.cuda.empty_cache()  # Clear GPU memory before loading
+                self.pipeline, self.model, self.infer_config = load_models()
+            except Exception as e:
+                print(f"Error loading models: {e}")
+                raise
     def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
         """Main refinement function"""
+        try:
+            self.ensure_models_loaded()
+            torch.cuda.empty_cache()  # Clear GPU memory before processing
+            # Process image and get refined output
+            input_image = Image.fromarray(input_image)
+            # Rotate the layout if needed (if we're getting a 640x960 layout but pipeline expects 960x640)
+            if input_image.width == 960 and input_image.height == 640:
+                # Transpose the image to get 960x640 layout
+                input_array = np.array(input_image)
+                new_layout = np.zeros((960, 640, 3), dtype=np.uint8)
+                # Rearrange from 2x3 to 3x2
+                for i in range(6):
+                    src_row = i // 3
+                    src_col = i % 3
+                    dst_row = i // 2
+                    dst_col = i % 2
+                    new_layout[dst_row*320:(dst_row+1)*320, dst_col*320:(dst_col+1)*320] = \
+                        input_array[src_row*320:(src_row+1)*320, src_col*320:(src_col+1)*320]
+                input_image = Image.fromarray(new_layout)
+            # Process with the pipeline (expects 960x640)
+            with torch.cuda.amp.autocast():  # Use automatic mixed precision
+                refined_output_960x640 = self.pipeline.refine(
+                    input_image,
+                    prompt=prompt,
+                    num_inference_steps=int(steps),
+                    guidance_scale=guidance_scale
+                ).images[0]
+            torch.cuda.empty_cache()  # Clear GPU memory after refinement
+            # Generate mesh using the 960x640 format
+            with torch.cuda.amp.autocast():  # Use automatic mixed precision
+                vertices, faces, vertex_colors = create_mesh(
+                    refined_output_960x640,
+                    self.model,
+                    self.infer_config
+                )
+            torch.cuda.empty_cache()  # Clear GPU memory after mesh generation
+            # Save temporary mesh file
+            os.makedirs("temp", exist_ok=True)
+            temp_obj = os.path.join("temp", "refined_mesh.obj")
+            save_obj(vertices, faces, vertex_colors, temp_obj)
+            # Convert the output to 640x960 for display
+            refined_array = np.array(refined_output_960x640)
+            display_layout = np.zeros((960, 640, 3), dtype=np.uint8)
+            # Rearrange from 3x2 to 2x3
             for i in range(6):
+                src_row = i // 2
+                src_col = i % 2
                 dst_row = i // 2
                 dst_col = i % 2
+                display_layout[dst_row*320:(dst_row+1)*320, dst_col*320:(dst_col+1)*320] = \
+                    refined_array[src_row*320:(src_row+1)*320, src_col*320:(src_col+1)*320]
+            refined_output_640x960 = Image.fromarray(display_layout)
+            return refined_output_640x960, temp_obj
+        except Exception as e:
+            print(f"Error in refine_model: {e}")
+            torch.cuda.empty_cache()  # Clear GPU memory on error
+            raise
 def create_demo():
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                     label="Refinement Guidance Scale"
                 )
                 refine_btn = gr.Button("Refine")
+                error_output = gr.Textbox(label="Status/Error Messages", interactive=False)
         # Second row: Image panels side by side
         with gr.Row():
             # Outputs - Images side by side
             shape_output = gr.Image(
                 label="Generated Views",
+                width=640,
+                height=960
             )
             refined_output = gr.Image(
                 label="Refined Output",
+                width=640,
+                height=960
             )
         # Third row: 3D mesh panel below
             mesh_output = gr.Model3D(
                 label="3D Mesh",
                 clear_color=[1.0, 1.0, 1.0, 1.0],
             )
         # Set up event handlers
+        @spaces.GPU(duration=120)  # Increased duration to 120 seconds
         def generate(prompt, guidance_scale, num_steps):
+            try:
+                torch.cuda.empty_cache()  # Clear GPU memory before starting
+                with torch.no_grad():
+                    layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
+                return layout, None  # Return None for error message
+            except Exception as e:
+                torch.cuda.empty_cache()  # Clear GPU memory on error
+                error_msg = f"Error during generation: {str(e)}"
+                print(error_msg)
+                return None, error_msg
+        @spaces.GPU(duration=120)  # Increased duration to 120 seconds
         def refine(input_image, prompt, steps, guidance_scale):
+            try:
+                torch.cuda.empty_cache()  # Clear GPU memory before starting
+                refined_img, mesh_path = refiner.refine_model(
+                    input_image,
+                    prompt,
+                    steps,
+                    guidance_scale
+                )
+                return refined_img, mesh_path, None  # Return None for error message
+            except Exception as e:
+                torch.cuda.empty_cache()  # Clear GPU memory on error
+                error_msg = f"Error during refinement: {str(e)}"
+                print(error_msg)
+                return None, None, error_msg
         generate_btn.click(
             fn=generate,
             inputs=[shape_prompt, shape_guidance, shape_steps],
+            outputs=[shape_output, error_output]
         )
         refine_btn.click(
             fn=refine,
             inputs=[shape_output, refine_prompt, refine_steps, refine_guidance],
+            outputs=[refined_output, mesh_output, error_output]
         )
     return demo