File size: 7,063 Bytes
5318c78
65328d9
5318c78
886e812
5318c78
2fb1810
5318c78
 
 
e350327
e8d0048
5318c78
a1c8d5a
 
 
886e812
e8d0048
5318c78
 
a1c8d5a
 
 
 
 
5318c78
 
e8d0048
5318c78
 
 
 
 
 
 
 
e8d0048
a1c8d5a
 
 
 
e8d0048
 
 
a1c8d5a
e8d0048
 
 
 
 
 
 
 
5318c78
886e812
5318c78
 
 
e8d0048
 
a1c8d5a
e8d0048
5318c78
 
 
e8d0048
5318c78
 
 
ee37e55
e8d0048
 
 
 
886e812
5318c78
 
 
e8d0048
 
 
 
 
 
 
 
e350327
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import spaces
from src.utils import *
from src.backend import *

theme = gr.themes.Soft(primary_hue="cyan", secondary_hue="zinc", font=[gr.themes.GoogleFont("Source Sans 3", weights=(400, 600)),'arial'])

with gr.Blocks(theme=theme) as demo:
    with gr.Column(elem_classes="header"):
        gr.HTML('<img src="https://huggingface.co/spaces/mikonvergence/COP-GEN-Beta/resolve/main/logos/COP-GEN-logo.png" style="max-width: 90%; width:300px;">')
        gr.Markdown("# 🔵 COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails")
        gr.Markdown("### Miguel Espinosa, Valerio Marsocci, Yuru Jia, Elliot J. Crowley, Mikolaj Czerkawski")
        gr.HTML('<a href="https://github.com/miquel-espinosa/COP-GEN-Beta" class="text-decoration-none site-link"><img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/miquel-espinosa/COP-GEN-Beta"></a>')
        #gr.Markdown('[[Website](https://miquel-espinosa.github.io/cop-gen-beta/)] [GitHub](https://github.com/miquel-espinosa/COP-GEN-Beta)] [[Model](https://huggingface.co/mespinosami/COP-GEN-Beta) [[Dataset](https://huggingface.co/Major-TOM)]')
        gr.Markdown('> ## ⚠️ NOTE: This is a prototype Beta model of COP-GEN. It is based on image thumbnails of [Major TOM](https://huggingface.co/Major-TOM) and does not yet support raw source data. The hillshade visualisation is used for elevation. The full model COP-GEN is coming soon.')
    
    with gr.Column(elem_classes="Main app"):

        with gr.Accordion("Instructions", open=False) as abstract:
            gr.Markdown("1. **Generate**: Click the `🏭 Generate` button to synthesize the output **without any conditions**. The outputs will be shown below - and that's it, you've generated your first sample! 🧑‍🎨️"+
                        "<br/>2. **Optionally, define input**:  If you want to condition your generation, you can upload your thumbnails manually or you can `🔄 Load` a random sample from Major TOM by clicking the button."+
                        "<br/>3. **Select conditions**: Each input image can be used as a **conditioning** when it's loaded into the inputs panel. The modalities you wish to generate should have no content in the input panel (you can empty each element by clicking `x` in the top right corner of the image)."+
                        "<br/>4. **Additional Options**: You can control the number of generation steps (higher number might produce better quality, but will take more time), or set a fixed seed (for reproducible results)."+
                       "<br/>5. You can also reuse any of the generated samples as input to the model by clicking `♻️ Reuse`")
        
        with gr.Column():
            generate_button = gr.Button("🏭 Generate", variant="primary")

            gr.Markdown("## Outputs")
            with gr.Row():
                s2l1c_output = gr.Image(label="S2 L1C (Optical - Top of Atmosphere)", interactive=False)
                s2l2a_output = gr.Image(label="S2 L2A (Optical - Bottom of Atmosphere)", interactive=False)
                s1rtc_output = gr.Image(label="S1 RTC (SAR)", interactive=False)
                dem_output = gr.Image(label="DEM (Elevation)", interactive=False)

            with gr.Row():
                s2l1c_reuse_button = gr.Button("♻️ Reuse S2 L1C as input ⤵️", variant="primary")
                s2l2a_reuse_button = gr.Button("♻️ Reuse S2 L2A as input ⤵️", variant="primary")
                s1rtc_reuse_button = gr.Button("♻️ Reuse S1 RTC as input ⤵️", variant="primary")
                dem_reuse_button = gr.Button("♻️ Reuse DEM as input ⤵️", variant="primary")

            gr.Markdown("---")
            with gr.Row():
                gr.Markdown("## (Optional) Input Conditions")
                load_button = gr.Button("🔄 Load a random sample from Major TOM 🗺", variant="secondary")
            with gr.Row():
                s2l1c_input = gr.Image(label="S2 L1C (Optical - Top of Atmosphere)", interactive=True)
                s2l2a_input = gr.Image(label="S2 L2A (Optical - Bottom of Atmosphere)", interactive=True)
                s1rtc_input = gr.Image(label="S1 RTC (SAR)", interactive=True)
                dem_input = gr.Image(label="DEM (Elevation)", interactive=True)
            gr.Markdown('### Ready? Go back up and press `🏭 Generate` again!')

        with gr.Accordion("Advanced Options", open=False) as advanced_options:
            num_inference_steps_slider = gr.Slider(minimum=10, maximum=1000, step=10, value=10, label="Inference Steps")
            with gr.Row():
                seed_number = gr.Number(value=6378, label="Seed")
                seed_checkbox = gr.Checkbox(value=True, label="Random")
        
        with gr.Accordion("Abstract", open=False) as abstract:
            gr.HTML('<a href="https://arxiv.org/abs/2504.08548/" class="text-decoration-none site-link"><img alt="Arxiv Link" src="https://img.shields.io/badge/Open_Access-arxiv:2504.08548-b31b1b"></a>')
            gr.Markdown("In remote sensing, multi-modal data from various sensors capturing the same scene offers rich opportunities, but learning a unified representation across these modalities remains a significant challenge. Traditional methods have often been limited to single or dual-modality approaches. In this paper, we introduce COP-GEN-Beta, a generative diffusion model trained on optical, radar, and elevation data from the Major TOM dataset. What sets COP-GEN-Beta apart is its ability to map any subset of modalities to any other, enabling zero-shot modality translation after training. This is achieved through a sequence-based diffusion transformer, where each modality is controlled by its own timestep embedding. We extensively evaluate COP-GEN-Beta on thumbnail images from the Major TOM dataset, demonstrating its effectiveness in generating high-quality samples. Qualitative and quantitative evaluations validate the model's performance, highlighting its potential as a powerful pre-trained model for future remote sensing tasks.")
            
        load_button.click(
            fn=sample_shuffle,
            outputs=[s2l1c_input,s2l2a_input,s1rtc_input,dem_input]
        )
            
        generate_button.click(
            fn=generate_output,
            inputs=[s2l1c_input,
                    s2l2a_input,
                    s1rtc_input,
                    dem_input,
                    num_inference_steps_slider, seed_number, seed_checkbox],
            outputs=[s2l1c_output, s2l2a_output, s1rtc_output, dem_output],
        )

        def pass_value(value):
            return value

        s2l1c_reuse_button.click(fn=pass_value, inputs=[s2l1c_output],outputs=[s2l1c_input])
        s2l2a_reuse_button.click(fn=pass_value, inputs=[s2l2a_output],outputs=[s2l2a_input])
        s1rtc_reuse_button.click(fn=pass_value, inputs=[s1rtc_output],outputs=[s1rtc_input])
        dem_reuse_button.click(fn=pass_value, inputs=[dem_output],outputs=[dem_input])

demo.queue().launch(share=True)