Spaces:
Running
on
Zero
Running
on
Zero
mikonvergence
commited on
Commit
·
e8d0048
1
Parent(s):
e965dcb
improved interface
Browse files- .gitignore +2 -0
- app.py +43 -34
- src/backend.py +6 -1
- src/utils.py +2 -10
.gitignore
CHANGED
@@ -2,3 +2,5 @@ __pycache__/
|
|
2 |
.ipynb_checkpoints/
|
3 |
*.ipynb
|
4 |
images/
|
|
|
|
|
|
2 |
.ipynb_checkpoints/
|
3 |
*.ipynb
|
4 |
images/
|
5 |
+
models/
|
6 |
+
*.jpg
|
app.py
CHANGED
@@ -7,41 +7,22 @@ theme = gr.themes.Soft(primary_hue="cyan", secondary_hue="zinc", font=[gr.themes
|
|
7 |
|
8 |
with gr.Blocks(theme=theme) as demo:
|
9 |
with gr.Column(elem_classes="header"):
|
10 |
-
gr.
|
|
|
11 |
gr.Markdown("### Miguel Espinosa, Valerio Marsocci, Yuru Jia, Elliot J. Crowley, Mikolaj Czerkawski")
|
12 |
gr.Markdown('[[Website](https://miquel-espinosa.github.io/cop-gen-beta/)] [[GitHub](https://github.com/miquel-espinosa/COP-GEN-Beta)] [[Model](https://huggingface.co/mespinosami/COP-GEN-Beta)] [[Dataset](https://huggingface.co/Major-TOM)]')
|
13 |
-
|
14 |
-
gr.Markdown('⚠️ NOTE: This is a protoype Beta model of COP-GEN. It is based on image thumbnails of Major TOM and does not yet support raw source data. The hillshade visualisation is used for elevation. The full model COP-GEN is coming soon.')
|
15 |
|
16 |
-
with gr.Column(elem_classes="
|
17 |
-
|
18 |
-
with gr.Accordion("Abstract", open=False) as abstract:
|
19 |
-
gr.Markdown("In remote sensing, multi-modal data from various sensors capturing the same scene offers rich opportunities, but learning a unified representation across these modalities remains a significant challenge. Traditional methods have often been limited to single or dual-modality approaches. In this paper, we introduce COP-GEN-Beta, a generative diffusion model trained on optical, radar, and elevation data from the Major TOM dataset. What sets COP-GEN-Beta apart is its ability to map any subset of modalities to any other, enabling zero-shot modality translation after training. This is achieved through a sequence-based diffusion transformer, where each modality is controlled by its own timestep embedding. We extensively evaluate COP-GEN-Beta on thumbnail images from the Major TOM dataset, demonstrating its effectiveness in generating high-quality samples. Qualitative and quantitative evaluations validate the model's performance, highlighting its potential as a powerful pre-trained model for future remote sensing tasks.") # Replace with your abstract text
|
20 |
|
21 |
with gr.Accordion("Instructions", open=False) as abstract:
|
22 |
-
gr.Markdown("1. **
|
23 |
-
gr.Markdown("2. **
|
24 |
-
gr.Markdown("3. **
|
|
|
25 |
|
26 |
with gr.Column():
|
27 |
-
|
28 |
-
gr.Markdown("## Inputs (Optional)")
|
29 |
-
load_button = gr.Button("Load a random sample from Major TOM 🗺", variant="secondary")
|
30 |
-
with gr.Row():
|
31 |
-
with gr.Column():
|
32 |
-
s2l1c_input = gr.Image(label="S2 L1C (Optical - Top of Atmosphere)", interactive=True)
|
33 |
-
s2l1c_active = gr.Checkbox(value=False, label="Active", interactive=True)
|
34 |
-
with gr.Column():
|
35 |
-
s2l2a_input = gr.Image(label="S2 L2A (Optical - Bottom of Atmosphere)", interactive=True)
|
36 |
-
s2l2a_active = gr.Checkbox(value=False, label="Active", interactive=True)
|
37 |
-
with gr.Column():
|
38 |
-
s1rtc_input = gr.Image(label="S1 RTC (SAR)", interactive=True)
|
39 |
-
s1rtc_active = gr.Checkbox(value=False, label="Active", interactive=True)
|
40 |
-
with gr.Column():
|
41 |
-
dem_input = gr.Image(label="DEM (Elevation)", interactive=True)
|
42 |
-
dem_active = gr.Checkbox(value=False, label="Active", interactive=True)
|
43 |
-
|
44 |
-
generate_button = gr.Button("Generate", variant="primary")
|
45 |
|
46 |
gr.Markdown("## Outputs")
|
47 |
with gr.Row():
|
@@ -50,25 +31,53 @@ with gr.Blocks(theme=theme) as demo:
|
|
50 |
s1rtc_output = gr.Image(label="S1 RTC (SAR)", interactive=False)
|
51 |
dem_output = gr.Image(label="DEM (Elevation)", interactive=False)
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
with gr.Accordion("Advanced Options", open=False) as advanced_options:
|
54 |
num_inference_steps_slider = gr.Slider(minimum=10, maximum=1000, step=10, value=10, label="Inference Steps")
|
55 |
with gr.Row():
|
56 |
seed_number = gr.Number(value=6378, label="Seed")
|
57 |
seed_checkbox = gr.Checkbox(value=True, label="Random")
|
|
|
|
|
|
|
58 |
|
59 |
load_button.click(
|
60 |
fn=sample_shuffle,
|
61 |
-
outputs=[s2l1c_input,
|
62 |
)
|
63 |
|
64 |
generate_button.click(
|
65 |
fn=generate_output,
|
66 |
-
inputs=[s2l1c_input,
|
67 |
-
s2l2a_input,
|
68 |
-
s1rtc_input,
|
69 |
-
dem_input,
|
70 |
num_inference_steps_slider, seed_number, seed_checkbox],
|
71 |
outputs=[s2l1c_output, s2l2a_output, s1rtc_output, dem_output],
|
72 |
)
|
73 |
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
with gr.Blocks(theme=theme) as demo:
|
9 |
with gr.Column(elem_classes="header"):
|
10 |
+
gr.HTML('<img src="file/logos/COP-GEN-logo.png" style="max-width: 300px;">')
|
11 |
+
gr.Markdown("# 🔵 COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails")
|
12 |
gr.Markdown("### Miguel Espinosa, Valerio Marsocci, Yuru Jia, Elliot J. Crowley, Mikolaj Czerkawski")
|
13 |
gr.Markdown('[[Website](https://miquel-espinosa.github.io/cop-gen-beta/)] [[GitHub](https://github.com/miquel-espinosa/COP-GEN-Beta)] [[Model](https://huggingface.co/mespinosami/COP-GEN-Beta)] [[Dataset](https://huggingface.co/Major-TOM)]')
|
14 |
+
gr.Markdown('> ## ⚠️ NOTE: This is a prototype Beta model of COP-GEN. It is based on image thumbnails of Major TOM and does not yet support raw source data. The hillshade visualisation is used for elevation. The full model COP-GEN is coming soon.')
|
|
|
15 |
|
16 |
+
with gr.Column(elem_classes="Main app"):
|
|
|
|
|
|
|
17 |
|
18 |
with gr.Accordion("Instructions", open=False) as abstract:
|
19 |
+
gr.Markdown("1. **Generate**: Click the `🏭 Generate` button to synthesize the output. The outputs will be shown below.")
|
20 |
+
gr.Markdown("2. **Define input**: If you want to condition your generation, you can upload your thumbnails manually or you can `🔄 Load` a random sample from Major TOM by clicking the button.")
|
21 |
+
gr.Markdown("3. **Select conditions**: Each input image can be used as a **conditioning** by selecting the `Active` checkbox. If no checkbox is selected, then you will still executy **unconditional generation**.")
|
22 |
+
gr.Markdown("4. **Additional Options**: You can control the number of generation steps (higher number might produce better quality, but will take more time), or set a fixed seed (for reproducible results).")
|
23 |
|
24 |
with gr.Column():
|
25 |
+
generate_button = gr.Button("🏭 Generate", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
gr.Markdown("## Outputs")
|
28 |
with gr.Row():
|
|
|
31 |
s1rtc_output = gr.Image(label="S1 RTC (SAR)", interactive=False)
|
32 |
dem_output = gr.Image(label="DEM (Elevation)", interactive=False)
|
33 |
|
34 |
+
with gr.Row():
|
35 |
+
s2l1c_reuse_button = gr.Button("Reuse S2 L1C as input ⤵️", variant="primary")
|
36 |
+
s2l2a_reuse_button = gr.Button("Reuse S2 L2A as input ⤵️", variant="primary")
|
37 |
+
s1rtc_reuse_button = gr.Button("Reuse S1 RTC as input ⤵️", variant="primary")
|
38 |
+
dem_reuse_button = gr.Button("Reuse DEM as input ⤵️", variant="primary")
|
39 |
+
|
40 |
+
gr.Markdown("---")
|
41 |
+
with gr.Row():
|
42 |
+
gr.Markdown("## Input Conditions (Optional)")
|
43 |
+
load_button = gr.Button("🔄 Load a random sample from Major TOM 🗺", variant="secondary")
|
44 |
+
with gr.Row():
|
45 |
+
s2l1c_input = gr.Image(label="S2 L1C (Optical - Top of Atmosphere)", interactive=True)
|
46 |
+
s2l2a_input = gr.Image(label="S2 L2A (Optical - Bottom of Atmosphere)", interactive=True)
|
47 |
+
s1rtc_input = gr.Image(label="S1 RTC (SAR)", interactive=True)
|
48 |
+
dem_input = gr.Image(label="DEM (Elevation)", interactive=True)
|
49 |
+
gr.Markdown('### Ready? Go back up and press `🏭 Generate` again!')
|
50 |
+
|
51 |
with gr.Accordion("Advanced Options", open=False) as advanced_options:
|
52 |
num_inference_steps_slider = gr.Slider(minimum=10, maximum=1000, step=10, value=10, label="Inference Steps")
|
53 |
with gr.Row():
|
54 |
seed_number = gr.Number(value=6378, label="Seed")
|
55 |
seed_checkbox = gr.Checkbox(value=True, label="Random")
|
56 |
+
|
57 |
+
with gr.Accordion("Abstract", open=False) as abstract:
|
58 |
+
gr.Markdown("In remote sensing, multi-modal data from various sensors capturing the same scene offers rich opportunities, but learning a unified representation across these modalities remains a significant challenge. Traditional methods have often been limited to single or dual-modality approaches. In this paper, we introduce COP-GEN-Beta, a generative diffusion model trained on optical, radar, and elevation data from the Major TOM dataset. What sets COP-GEN-Beta apart is its ability to map any subset of modalities to any other, enabling zero-shot modality translation after training. This is achieved through a sequence-based diffusion transformer, where each modality is controlled by its own timestep embedding. We extensively evaluate COP-GEN-Beta on thumbnail images from the Major TOM dataset, demonstrating its effectiveness in generating high-quality samples. Qualitative and quantitative evaluations validate the model's performance, highlighting its potential as a powerful pre-trained model for future remote sensing tasks.")
|
59 |
|
60 |
load_button.click(
|
61 |
fn=sample_shuffle,
|
62 |
+
outputs=[s2l1c_input,s2l2a_input,s1rtc_input,dem_input]
|
63 |
)
|
64 |
|
65 |
generate_button.click(
|
66 |
fn=generate_output,
|
67 |
+
inputs=[s2l1c_input,
|
68 |
+
s2l2a_input,
|
69 |
+
s1rtc_input,
|
70 |
+
dem_input,
|
71 |
num_inference_steps_slider, seed_number, seed_checkbox],
|
72 |
outputs=[s2l1c_output, s2l2a_output, s1rtc_output, dem_output],
|
73 |
)
|
74 |
|
75 |
+
def pass_value(value):
|
76 |
+
return value
|
77 |
+
|
78 |
+
s2l1c_reuse_button.click(fn=pass_value, inputs=[s2l1c_output],outputs=[s2l1c_input])
|
79 |
+
s2l2a_reuse_button.click(fn=pass_value, inputs=[s2l2a_output],outputs=[s2l2a_input])
|
80 |
+
s1rtc_reuse_button.click(fn=pass_value, inputs=[s1rtc_output],outputs=[s1rtc_input])
|
81 |
+
dem_reuse_button.click(fn=pass_value, inputs=[dem_output],outputs=[dem_input])
|
82 |
+
|
83 |
+
demo.queue().launch(share=True, allowed_paths=["logos/"])
|
src/backend.py
CHANGED
@@ -226,10 +226,15 @@ def custom_inference(images, generate_modalities, condition_modalities, num_infe
|
|
226 |
|
227 |
return results
|
228 |
|
229 |
-
def generate_output(s2l1c_input,
|
230 |
|
231 |
seed = seed_number if not ignore_seed else None
|
232 |
|
|
|
|
|
|
|
|
|
|
|
233 |
images=[]
|
234 |
condition_modalities=[]
|
235 |
if s2l2a_active:
|
|
|
226 |
|
227 |
return results
|
228 |
|
229 |
+
def generate_output(s2l1c_input, s2l2a_input, s1rtc_input, dem_input, num_inference_steps_slider, seed_number, ignore_seed):
|
230 |
|
231 |
seed = seed_number if not ignore_seed else None
|
232 |
|
233 |
+
s2l2a_active = s2l2a_input is not None
|
234 |
+
s2l1c_active = s2l1c_input is not None
|
235 |
+
s1rtc_active = s1rtc_input is not None
|
236 |
+
dem_active = dem_input is not None
|
237 |
+
|
238 |
images=[]
|
239 |
condition_modalities=[]
|
240 |
if s2l2a_active:
|
src/utils.py
CHANGED
@@ -145,7 +145,7 @@ def resize_and_crop(images, image_size=(1068, 1068), crop_size=(256, 256)):
|
|
145 |
|
146 |
return [img.resize(image_size).crop((left, top, right, bottom)) for img in images]
|
147 |
|
148 |
-
def sample_shuffle(
|
149 |
"""
|
150 |
Randomly selects a 'grid_cell', retrieves corresponding images, and optionally prepares them for an interface.
|
151 |
|
@@ -160,15 +160,7 @@ def sample_shuffle(interface=True):
|
|
160 |
"""
|
161 |
grid_cell = grid_cell_df.sample().iloc[0]
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
if not interface:
|
166 |
-
return images
|
167 |
-
else:
|
168 |
-
out = []
|
169 |
-
for el in images:
|
170 |
-
out += [el, True]
|
171 |
-
return out
|
172 |
|
173 |
@spaces.GPU
|
174 |
def generate_output():
|
|
|
145 |
|
146 |
return [img.resize(image_size).crop((left, top, right, bottom)) for img in images]
|
147 |
|
148 |
+
def sample_shuffle():
|
149 |
"""
|
150 |
Randomly selects a 'grid_cell', retrieves corresponding images, and optionally prepares them for an interface.
|
151 |
|
|
|
160 |
"""
|
161 |
grid_cell = grid_cell_df.sample().iloc[0]
|
162 |
|
163 |
+
return resize_and_crop(get_images(grid_cell))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
@spaces.GPU
|
166 |
def generate_output():
|