Spaces:
Build error
Build error
File size: 6,670 Bytes
a952d46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# -*- coding: utf-8 -*-
"""run_AIFS_v1.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/#fileId=https%3A//huggingface.co/ecmwf/aifs-single-1.0/blob/94409c197d36d39c380467c6f3130a2be6eb722b/run_AIFS_v1.ipynb
This notebook runs ECMWF's aifs-single-v1 data-driven model, using ECMWF's [open data](https://www.ecmwf.int/en/forecasts/datasets/open-data) dataset and the [anemoi-inference](https://anemoi-inference.readthedocs.io/en/latest/apis/level1.html) package.
# 1. Install Required Packages and Imports
"""
# Uncomment the lines below to install the required packages
# !pip install -q anemoi-inference[huggingface]==0.4.9 anemoi-models==0.3.1
# !pip install -q earthkit-regrid==0.4.0 ecmwf-opendata
# !pip install -q flash_attn
import datetime
from collections import defaultdict
import numpy as np
import earthkit.data as ekd
import earthkit.regrid as ekr
from anemoi.inference.runners.simple import SimpleRunner
from anemoi.inference.outputs.printer import print_state
from ecmwf.opendata import Client as OpendataClient
"""# 2. Retrieve Initial Conditions from ECMWF Open Data
### List of parameters to retrieve form ECMWF open data
"""
PARAM_SFC = ["10u", "10v", "2d", "2t", "msl", "skt", "sp", "tcw", "lsm", "z", "slor", "sdor"]
PARAM_SOIL =["vsw","sot"]
PARAM_PL = ["gh", "t", "u", "v", "w", "q"]
LEVELS = [1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100, 50]
SOIL_LEVELS = [1,2]
"""### Select a date"""
DATE = OpendataClient().latest()
print("Initial date is", DATE)
"""### Get the data from the ECMWF Open Data API"""
def get_open_data(param, levelist=[]):
fields = defaultdict(list)
# Get the data for the current date and the previous date
for date in [DATE - datetime.timedelta(hours=6), DATE]:
data = ekd.from_source("ecmwf-open-data", date=date, param=param, levelist=levelist)
for f in data:
# Open data is between -180 and 180, we need to shift it to 0-360
assert f.to_numpy().shape == (721,1440)
values = np.roll(f.to_numpy(), -f.shape[1] // 2, axis=1)
# Interpolate the data to from 0.25 to N320
values = ekr.interpolate(values, {"grid": (0.25, 0.25)}, {"grid": "N320"})
# Add the values to the list
name = f"{f.metadata('param')}_{f.metadata('levelist')}" if levelist else f.metadata("param")
fields[name].append(values)
# Create a single matrix for each parameter
for param, values in fields.items():
fields[param] = np.stack(values)
return fields
"""### Get Input Fields"""
fields = {}
"""#### Add the single levels fields"""
fields.update(get_open_data(param=PARAM_SFC))
soil=get_open_data(param=PARAM_SOIL,levelist=SOIL_LEVELS)
"""Soil parameters have been renamed since training this model, we need to rename to the original names"""
mapping = {'sot_1': 'stl1', 'sot_2': 'stl2',
'vsw_1': 'swvl1','vsw_2': 'swvl2'}
for k,v in soil.items():
fields[mapping[k]]=v
"""#### Add the pressure levels fields"""
fields.update(get_open_data(param=PARAM_PL, levelist=LEVELS))
"""#### Convert geopotential height into geopotential"""
# Transform GH to Z
for level in LEVELS:
gh = fields.pop(f"gh_{level}")
fields[f"z_{level}"] = gh * 9.80665
"""### Create Initial State"""
input_state = dict(date=DATE, fields=fields)
"""# 3. Load the Model and Run the Forecast
### Download the Model's Checkpoint from Hugging Face & create a Runner
"""
checkpoint = {"huggingface":"ecmwf/aifs-single-1.0"}
checkpoint = 'aifs-single-mse-1.0.ckpt'
"""To reduce the memory usage of the model once can set certain environment variables, like the number of chunks of the model's mapper.
Please refer to:
- https://anemoi.readthedocs.io/projects/models/en/latest/modules/layers.html#anemoi-inference-num-chunks
- https://pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf
for more information. To do so, you can use the code below:
```
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'
os.environ['ANEMOI_INFERENCE_NUM_CHUNKS']='16'
```
"""
runner = SimpleRunner(checkpoint, device="cuda")
"""** Note - changing the device from GPU to CPU**
- Running the transformer model used on the CPU is tricky, it depends on the FlashAttention library which only supports Nvidia and AMD GPUs, and is optimised for performance and memory usage
- In newer versions of anemoi-models, v0.4.2 and above, there is an option to switch off flash attention and uses Pytorchs Scaled Dot Product Attention (SDPA). The code snippet below shows how to overwrite a model from a checkpoint to use SDPA. Unfortunately it's not optimised for memory usage in the same way, leading to much greater memory usage. Please refer to https://github.com/ecmwf/anemoi-inference/issues/119 for more details
#### Run the forecast
"""
for state in runner.run(input_state=input_state, lead_time=12):
print_state(state)
"""**Note**
Due to the non-determinism of GPUs, users will be unable to exactly reproduce an official AIFS forecast when running AIFS Single themselves.
If you want to enforece determinism at GPU level, you can do so enforcing the following settings:
```
#First in your terminal
export CUBLAS_WORKSPACE_CONFIG=:4096:8
#And then before running inference:
import torch
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)
```
Using the above will lead to a significant increase in runtime. Additionally, the input conditions here are provided by open data. The reprojection performed on open data differs from the one carried out at the operational level, hence small differences in the forecast are expected.
# 4. Inspect the generated forecast
#### Plot a field
"""
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.tri as tri
def fix(lons):
# Shift the longitudes from 0-360 to -180-180
return np.where(lons > 180, lons - 360, lons)
latitudes = state["latitudes"]
longitudes = state["longitudes"]
values = state["fields"]["100u"]
fig, ax = plt.subplots(figsize=(11, 6), subplot_kw={"projection": ccrs.PlateCarree()})
ax.coastlines()
ax.add_feature(cfeature.BORDERS, linestyle=":")
triangulation = tri.Triangulation(fix(longitudes), latitudes)
contour=ax.tricontourf(triangulation, values, levels=20, transform=ccrs.PlateCarree(), cmap="RdBu")
cbar = fig.colorbar(contour, ax=ax, orientation="vertical", shrink=0.7, label="100u")
plt.title("100m winds (100u) at {}".format(state["date"]))
plt.show() |