Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
"""run_AIFS_v1.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/#fileId=https%3A//huggingface.co/ecmwf/aifs-single-1.0/blob/94409c197d36d39c380467c6f3130a2be6eb722b/run_AIFS_v1.ipynb | |
This notebook runs ECMWF's aifs-single-v1 data-driven model, using ECMWF's [open data](https://www.ecmwf.int/en/forecasts/datasets/open-data) dataset and the [anemoi-inference](https://anemoi-inference.readthedocs.io/en/latest/apis/level1.html) package. | |
# 1. Install Required Packages and Imports | |
""" | |
# Uncomment the lines below to install the required packages | |
# !pip install -q anemoi-inference[huggingface]==0.4.9 anemoi-models==0.3.1 | |
# !pip install -q earthkit-regrid==0.4.0 ecmwf-opendata | |
# !pip install -q flash_attn | |
import datetime | |
from collections import defaultdict | |
import numpy as np | |
import earthkit.data as ekd | |
import earthkit.regrid as ekr | |
from anemoi.inference.runners.simple import SimpleRunner | |
from anemoi.inference.outputs.printer import print_state | |
from ecmwf.opendata import Client as OpendataClient | |
"""# 2. Retrieve Initial Conditions from ECMWF Open Data | |
### List of parameters to retrieve form ECMWF open data | |
""" | |
PARAM_SFC = ["10u", "10v", "2d", "2t", "msl", "skt", "sp", "tcw", "lsm", "z", "slor", "sdor"] | |
PARAM_SOIL =["vsw","sot"] | |
PARAM_PL = ["gh", "t", "u", "v", "w", "q"] | |
LEVELS = [1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100, 50] | |
SOIL_LEVELS = [1,2] | |
"""### Select a date""" | |
DATE = OpendataClient().latest() | |
print("Initial date is", DATE) | |
"""### Get the data from the ECMWF Open Data API""" | |
def get_open_data(param, levelist=[]): | |
fields = defaultdict(list) | |
# Get the data for the current date and the previous date | |
for date in [DATE - datetime.timedelta(hours=6), DATE]: | |
data = ekd.from_source("ecmwf-open-data", date=date, param=param, levelist=levelist) | |
for f in data: | |
# Open data is between -180 and 180, we need to shift it to 0-360 | |
assert f.to_numpy().shape == (721,1440) | |
values = np.roll(f.to_numpy(), -f.shape[1] // 2, axis=1) | |
# Interpolate the data to from 0.25 to N320 | |
values = ekr.interpolate(values, {"grid": (0.25, 0.25)}, {"grid": "N320"}) | |
# Add the values to the list | |
name = f"{f.metadata('param')}_{f.metadata('levelist')}" if levelist else f.metadata("param") | |
fields[name].append(values) | |
# Create a single matrix for each parameter | |
for param, values in fields.items(): | |
fields[param] = np.stack(values) | |
return fields | |
"""### Get Input Fields""" | |
fields = {} | |
"""#### Add the single levels fields""" | |
fields.update(get_open_data(param=PARAM_SFC)) | |
soil=get_open_data(param=PARAM_SOIL,levelist=SOIL_LEVELS) | |
"""Soil parameters have been renamed since training this model, we need to rename to the original names""" | |
mapping = {'sot_1': 'stl1', 'sot_2': 'stl2', | |
'vsw_1': 'swvl1','vsw_2': 'swvl2'} | |
for k,v in soil.items(): | |
fields[mapping[k]]=v | |
"""#### Add the pressure levels fields""" | |
fields.update(get_open_data(param=PARAM_PL, levelist=LEVELS)) | |
"""#### Convert geopotential height into geopotential""" | |
# Transform GH to Z | |
for level in LEVELS: | |
gh = fields.pop(f"gh_{level}") | |
fields[f"z_{level}"] = gh * 9.80665 | |
"""### Create Initial State""" | |
input_state = dict(date=DATE, fields=fields) | |
"""# 3. Load the Model and Run the Forecast | |
### Download the Model's Checkpoint from Hugging Face & create a Runner | |
""" | |
checkpoint = {"huggingface":"ecmwf/aifs-single-1.0"} | |
checkpoint = 'aifs-single-mse-1.0.ckpt' | |
"""To reduce the memory usage of the model once can set certain environment variables, like the number of chunks of the model's mapper. | |
Please refer to: | |
- https://anemoi.readthedocs.io/projects/models/en/latest/modules/layers.html#anemoi-inference-num-chunks | |
- https://pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf | |
for more information. To do so, you can use the code below: | |
``` | |
import os | |
os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True' | |
os.environ['ANEMOI_INFERENCE_NUM_CHUNKS']='16' | |
``` | |
""" | |
runner = SimpleRunner(checkpoint, device="cuda") | |
"""** Note - changing the device from GPU to CPU** | |
- Running the transformer model used on the CPU is tricky, it depends on the FlashAttention library which only supports Nvidia and AMD GPUs, and is optimised for performance and memory usage | |
- In newer versions of anemoi-models, v0.4.2 and above, there is an option to switch off flash attention and uses Pytorchs Scaled Dot Product Attention (SDPA). The code snippet below shows how to overwrite a model from a checkpoint to use SDPA. Unfortunately it's not optimised for memory usage in the same way, leading to much greater memory usage. Please refer to https://github.com/ecmwf/anemoi-inference/issues/119 for more details | |
#### Run the forecast | |
""" | |
for state in runner.run(input_state=input_state, lead_time=12): | |
print_state(state) | |
"""**Note** | |
Due to the non-determinism of GPUs, users will be unable to exactly reproduce an official AIFS forecast when running AIFS Single themselves. | |
If you want to enforece determinism at GPU level, you can do so enforcing the following settings: | |
``` | |
#First in your terminal | |
export CUBLAS_WORKSPACE_CONFIG=:4096:8 | |
#And then before running inference: | |
import torch | |
torch.backends.cudnn.benchmark = False | |
torch.backends.cudnn.deterministic = True | |
torch.use_deterministic_algorithms(True) | |
``` | |
Using the above will lead to a significant increase in runtime. Additionally, the input conditions here are provided by open data. The reprojection performed on open data differs from the one carried out at the operational level, hence small differences in the forecast are expected. | |
# 4. Inspect the generated forecast | |
#### Plot a field | |
""" | |
import matplotlib.pyplot as plt | |
import cartopy.crs as ccrs | |
import cartopy.feature as cfeature | |
import matplotlib.tri as tri | |
def fix(lons): | |
# Shift the longitudes from 0-360 to -180-180 | |
return np.where(lons > 180, lons - 360, lons) | |
latitudes = state["latitudes"] | |
longitudes = state["longitudes"] | |
values = state["fields"]["100u"] | |
fig, ax = plt.subplots(figsize=(11, 6), subplot_kw={"projection": ccrs.PlateCarree()}) | |
ax.coastlines() | |
ax.add_feature(cfeature.BORDERS, linestyle=":") | |
triangulation = tri.Triangulation(fix(longitudes), latitudes) | |
contour=ax.tricontourf(triangulation, values, levels=20, transform=ccrs.PlateCarree(), cmap="RdBu") | |
cbar = fig.colorbar(contour, ax=ax, orientation="vertical", shrink=0.7, label="100u") | |
plt.title("100m winds (100u) at {}".format(state["date"])) | |
plt.show() |