cleaned tutorials

This commit is contained in:
Alfredo Oliviero 2024-11-07 10:47:11 +01:00
parent 86a239bd8f
commit 24b4fff9f2
7 changed files with 2188 additions and 250 deletions

26
clean_notebooks.py Normal file
View File

@ -0,0 +1,26 @@
import os
import nbformat
def clear_notebook_output(notebook_path):
with open(notebook_path, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, as_version=4)
for cell in nb.cells:
if cell.cell_type == 'code':
cell.outputs = []
cell.execution_count = None
with open(notebook_path, 'w', encoding='utf-8') as f:
nbformat.write(nb, f)
print(f'Cleared output for {notebook_path}')
def walk_and_clear_outputs(base_dir):
for root, dirs, files in os.walk(base_dir):
for file in files:
if file.endswith('.ipynb'):
notebook_path = os.path.join(root, file)
clear_notebook_output(notebook_path)
if __name__ == "__main__":
base_dir = '.' # Change this to the directory you want to start from
walk_and_clear_outputs(base_dir)

View File

@ -44,32 +44,9 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: cdsapi in /opt/conda/lib/python3.8/site-packages (0.7.4)\n",
"Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (1.5.1)\n",
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi) (4.60.0)\n",
"Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (2.24.0)\n",
"Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
"Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
"Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2020.12.5)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (1.25.10)\n",
"Requirement already satisfied: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2.8.1)\n",
"Requirement already satisfied: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2020.1)\n",
"Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (1.15.0)\n",
"Requirement already satisfied: attrs>=24.0.0 in /opt/conda/lib/python3.8/site-packages (24.2.0)\n",
"Requirement already satisfied: typing_extensions>=4.0.0 in /opt/conda/lib/python3.8/site-packages (4.12.2)\n"
]
}
],
"outputs": [],
"source": [
"!pip install \"cdsapi>=0.7.4\"\n",
"!pip install \"attrs>=24.0.0\"\n",
@ -78,96 +55,27 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (1.20.3)\n",
"Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (3.3.4)\n",
"Requirement already satisfied: cartopy in /opt/conda/lib/python3.8/site-packages (0.18.0)\n",
"Requirement already satisfied: xarray in /opt/conda/lib/python3.8/site-packages (0.17.0)\n",
"Requirement already satisfied: netCDF4 in /opt/conda/lib/python3.8/site-packages (1.5.6)\n",
"Requirement already satisfied: cdsapi in /opt/conda/lib/python3.8/site-packages (0.7.4)\n",
"Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (2.8.1)\n",
"Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (0.10.0)\n",
"Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (7.2.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (2.4.7)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (1.2.0)\n",
"Requirement already satisfied: shapely>=1.5.6 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.7.1)\n",
"Requirement already satisfied: six>=1.3.0 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.15.0)\n",
"Requirement already satisfied: setuptools>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from cartopy) (49.6.0.post20200814)\n",
"Requirement already satisfied: pyshp>=1.1.4 in /opt/conda/lib/python3.8/site-packages (from cartopy) (2.1.3)\n",
"Requirement already satisfied: pandas>=0.25 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.1.1)\n",
"Requirement already satisfied: cftime in /opt/conda/lib/python3.8/site-packages (from netCDF4) (1.5.0)\n",
"Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (1.5.1)\n",
"Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi) (4.60.0)\n",
"Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (2.24.0)\n",
"Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2020.1)\n",
"Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
"Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
"Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2020.12.5)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (1.25.10)\n",
"Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2.10)\n"
]
}
],
"outputs": [],
"source": [
"!pip install numpy matplotlib cartopy xarray netCDF4 cdsapi"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (1.1.1)\n",
"Requirement already satisfied: numpy>=1.15.4 in /opt/conda/lib/python3.8/site-packages (from pandas) (1.20.3)\n",
"Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas) (2020.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"pip install pandas"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: xarray in /opt/conda/lib/python3.8/site-packages (0.17.0)\n",
"Requirement already satisfied: zarr in /opt/conda/lib/python3.8/site-packages (2.5.0)\n",
"Requirement already satisfied: dask in /opt/conda/lib/python3.8/site-packages (2.20.0)\n",
"Requirement already satisfied: fsspec in /opt/conda/lib/python3.8/site-packages (0.8.0)\n",
"Requirement already satisfied: numpy>=1.15 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.20.3)\n",
"Requirement already satisfied: setuptools>=40.4 in /opt/conda/lib/python3.8/site-packages (from xarray) (49.6.0.post20200814)\n",
"Requirement already satisfied: pandas>=0.25 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.1.1)\n",
"Requirement already satisfied: fasteners in /opt/conda/lib/python3.8/site-packages (from zarr) (0.16)\n",
"Requirement already satisfied: asciitree in /opt/conda/lib/python3.8/site-packages (from zarr) (0.3.3)\n",
"Requirement already satisfied: numcodecs>=0.6.4 in /opt/conda/lib/python3.8/site-packages (from zarr) (0.7.3)\n",
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.8/site-packages (from dask) (5.3.1)\n",
"Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2020.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2.8.1)\n",
"Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from fasteners->zarr) (1.15.0)\n"
]
}
],
"outputs": [],
"source": [
"!pip install zarr dask fsspec"
]
@ -206,43 +114,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git\n",
" Cloning https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git to /tmp/pip-req-build-luzr1m0l\n",
"Requirement already satisfied, skipping upgrade: cdsapi>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (0.7.4)\n",
"Requirement already satisfied, skipping upgrade: attrs in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (20.1.0)\n",
"Requirement already satisfied, skipping upgrade: typing_extensions in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (3.7.4.2)\n",
"Requirement already satisfied, skipping upgrade: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.24.0)\n",
"Requirement already satisfied, skipping upgrade: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (4.60.0)\n",
"Requirement already satisfied, skipping upgrade: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.5.1)\n",
"Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.12.5)\n",
"Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.10)\n",
"Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.25.10)\n",
"Requirement already satisfied, skipping upgrade: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (0.3.2)\n",
"Requirement already satisfied, skipping upgrade: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.1)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.8.1)\n",
"Requirement already satisfied, skipping upgrade: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.15.0)\n",
"Building wheels for collected packages: d4science-copernicus-cds\n",
" Building wheel for d4science-copernicus-cds (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for d4science-copernicus-cds: filename=d4science_copernicus_cds-1.0.0-py3-none-any.whl size=12134 sha256=1f12bffb2cf09d7083b136fb4de34b441adb37ef0d74f468b41c08e309bf204d\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-8yuqw8dx/wheels/52/8f/79/78b8dae3ae67225c9ad8417f73f2b630b4ad077f0a27911303\n",
"Successfully built d4science-copernicus-cds\n",
"Installing collected packages: d4science-copernicus-cds\n",
" Attempting uninstall: d4science-copernicus-cds\n",
" Found existing installation: d4science-copernicus-cds 1.0.0\n",
" Uninstalling d4science-copernicus-cds-1.0.0:\n",
" Successfully uninstalled d4science-copernicus-cds-1.0.0\n",
"Successfully installed d4science-copernicus-cds-1.0.0\n"
]
}
],
"outputs": [],
"source": [
"!pip install -U git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
]
@ -258,7 +132,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -296,21 +170,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ENV - None None\n",
"env is not configured\n",
"Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"saving config to env\n",
"Set environment variables CDSAPI_URL, CDSAPI_KEY\n"
]
}
],
"outputs": [],
"source": [
"client = cds_authenticate()"
]
@ -333,34 +195,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
"Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from environment: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from file: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n"
]
},
{
"data": {
"text/plain": [
"({'url': 'https://cds.climate.copernicus.eu/api',\n",
" 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'},\n",
" {'url': 'https://cds.climate.copernicus.eu/api',\n",
" 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"cds_show_conf()"
]
@ -381,18 +218,9 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"URL https://cds.climate.copernicus.eu/api\n",
"KEY db1f2085-6b8b-42e6-b832-625dfaf831a4\n"
]
}
],
"outputs": [],
"source": [
"URL, KEY = cds_get_credentials()\n",
"print(\"URL\", URL)\n",
@ -416,19 +244,9 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
"Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Saved Configuration file /home/jovyan/.cdsapirc\n"
]
}
],
"outputs": [],
"source": [
"cds_save_conf()"
]
@ -448,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -470,7 +288,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -490,34 +308,9 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
"Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from environment: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
"Configuration from file: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n"
]
},
{
"data": {
"text/plain": [
"({'url': 'https://cds.climate.copernicus.eu/api',\n",
" 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'},\n",
" {'url': 'https://cds.climate.copernicus.eu/api',\n",
" 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'})"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"cds_show_conf()"
]
@ -547,17 +340,9 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"datadir: %s /home/jovyan/cds_dataDir/out_2024_11_06_16_34_36_example/\n"
]
}
],
"outputs": [],
"source": [
"datadir = cds_datadir(\"example\")"
]
@ -589,17 +374,9 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"datadir: %s ./out/out_2024_11_06_16_34_36_current_example/\n"
]
}
],
"outputs": [],
"source": [
"datadir_current = cds_datadir(\"current_example\", basepath=\"./out\")\n"
]

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
nbformat

View File

@ -2,6 +2,7 @@ cdsapi>=0.7.4
attrs>=24.0.0
typing_extensions>=4.0.0
numpy>=1.16.5,<1.23.0
llvmlite 0.31. --ignore-installed
matplotlib
cartopy
xarray

View File

@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "8130d058",
"metadata": {},
"source": [
"# Tutorial on July 2023 record-breaking global surface temperatures using climate data from C3S"
@ -9,6 +10,7 @@
},
{
"cell_type": "markdown",
"id": "65b72b64",
"metadata": {},
"source": [
"### About\n",
@ -29,6 +31,7 @@
},
{
"cell_type": "markdown",
"id": "771a66cc",
"metadata": {},
"source": [
"### d4science_copernicus_cds Library\n",
@ -50,6 +53,7 @@
},
{
"cell_type": "markdown",
"id": "fe7a0949",
"metadata": {},
"source": [
"This tutorial is based on the official turorial **[CDS API guide](https://ecmwf-projects.github.io/copernicus-training-c3s/reanalysis-temp-record.html)**, extended and adapted for use in the **BlueCloud JupyterLab** environment."
@ -58,6 +62,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "f07ec047",
"metadata": {},
"outputs": [],
"source": [
@ -67,6 +72,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e10262fc",
"metadata": {},
"outputs": [],
"source": [
@ -76,6 +82,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "4bd3fb71",
"metadata": {},
"outputs": [],
"source": [
@ -90,6 +97,7 @@
},
{
"cell_type": "markdown",
"id": "692a7b7c",
"metadata": {},
"source": [
"cds_datadir will create a folder in our workspace, under cds_dataDir, with current timestamp and custom label"
@ -98,6 +106,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "4e3a9659",
"metadata": {},
"outputs": [],
"source": [
@ -107,6 +116,7 @@
},
{
"cell_type": "markdown",
"id": "07fcf992",
"metadata": {},
"source": [
"## 1. Search, download and view data"
@ -114,6 +124,7 @@
},
{
"cell_type": "markdown",
"id": "54ad2fba",
"metadata": {},
"source": [
"Before we begin we must prepare our environment. This includes installing the Application Programming Interface (API) of the CDS as well as other required libs, and importing the various python libraries that we will need."
@ -121,6 +132,7 @@
},
{
"cell_type": "markdown",
"id": "fc93afc0",
"metadata": {},
"source": [
"#### Infrastructure introduction (installing API of the CDS)"
@ -128,6 +140,7 @@
},
{
"cell_type": "markdown",
"id": "d8db144f",
"metadata": {},
"source": [
"In this exercise we will mainly use `cdsapi`, `xarray`, `matplotlib` and `cartopy` python libraries."
@ -135,6 +148,7 @@
},
{
"cell_type": "markdown",
"id": "5638a7c8",
"metadata": {},
"source": [
"There are several options to run the code in this tutorial:\n",
@ -144,6 +158,7 @@
},
{
"cell_type": "markdown",
"id": "2a0767bd",
"metadata": {},
"source": [
"#### Installation on your computer"
@ -151,6 +166,7 @@
},
{
"cell_type": "markdown",
"id": "91ee485f",
"metadata": {},
"source": [
"First of all, in order to run this notebook on your computer you need to install Python and the required libs."
@ -158,6 +174,7 @@
},
{
"cell_type": "markdown",
"id": "2875c115",
"metadata": {},
"source": [
"The easiest way to install Python without interfering with other potential Python installations on your system is by using [Miniconda, Miniforge or Mambaforge](https://github.com/conda-forge/miniforge/blob/main/README.md). This will install a modern Python for your user and the **Conda**/**Mamba** package manager. **Mamba** is a performant drop-in replacement for **Conda**."
@ -165,6 +182,7 @@
},
{
"cell_type": "markdown",
"id": "aeb3656c",
"metadata": {},
"source": [
"Once Python + **Conda**/**Mamba** are installed run the following from the command line to install the API of the CDS, `cdsapi`, and the rest of the requirements:\n",
@ -180,6 +198,7 @@
},
{
"cell_type": "markdown",
"id": "4a2a46d7",
"metadata": {},
"source": [
"If everything is installed correctly run the following from the command line:\n",
@ -193,6 +212,7 @@
},
{
"cell_type": "markdown",
"id": "eb3c1071",
"metadata": {},
"source": [
"#### Running on Colab or Kaggle"
@ -200,6 +220,7 @@
},
{
"cell_type": "markdown",
"id": "c6fe3496",
"metadata": {},
"source": [
"If you are on Colab or Kaggle just run the following line of code to install the API of the CDS and the rest of the dependencies before running the rest of the code:"
@ -208,6 +229,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "73fdf414",
"metadata": {},
"outputs": [],
"source": [
@ -216,6 +238,7 @@
},
{
"cell_type": "markdown",
"id": "779f51f5",
"metadata": {},
"source": [
"#### Import libraries"
@ -223,6 +246,7 @@
},
{
"cell_type": "markdown",
"id": "be67dade",
"metadata": {},
"source": [
"We will start importing the required libraries. These libs should be already installed. If you have not installed the requirements, please go to the specific section above."
@ -231,6 +255,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c144aa3f",
"metadata": {},
"outputs": [],
"source": [
@ -248,6 +273,7 @@
},
{
"cell_type": "markdown",
"id": "afd220dd",
"metadata": {},
"source": [
"#### Search for data\n",
@ -258,6 +284,7 @@
},
{
"cell_type": "markdown",
"id": "939b2301",
"metadata": {},
"source": [
"Having selected the correct dataset, we now need to specify what product type, variables, temporal and geographic coverage we are interested in. These can all be selected in the **\"Download data\"** tab. In this tab a form appears in which we will select the following parameters to download:\n",
@ -275,6 +302,7 @@
},
{
"cell_type": "markdown",
"id": "6e755a8b",
"metadata": {},
"source": [
"<center><img src=\"img/reanalysis-temp-record-001.png\" /></center>"
@ -282,6 +310,7 @@
},
{
"cell_type": "markdown",
"id": "f9755801",
"metadata": {},
"source": [
"#### Download data\n",
@ -292,6 +321,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9d8818a8",
"metadata": {},
"outputs": [],
"source": [
@ -342,6 +372,7 @@
},
{
"cell_type": "markdown",
"id": "7ed10fdc",
"metadata": {},
"source": [
"#### Inspect data\n",
@ -352,6 +383,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "8938c920",
"metadata": {},
"outputs": [],
"source": [
@ -360,6 +392,7 @@
},
{
"cell_type": "markdown",
"id": "b2f8d3c9",
"metadata": {},
"source": [
"Now we can query our newly created Xarray dataset... Let's have a look at the `ds`."
@ -368,6 +401,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e32a3886",
"metadata": {},
"outputs": [],
"source": [
@ -376,6 +410,7 @@
},
{
"cell_type": "markdown",
"id": "7c42c6e1",
"metadata": {},
"source": [
"We see that the dataset has one variable called `t2m`, which stands for \"2 metre temperature\", and three coordinates of `longitude`, `latitude` and `time`. \n",
@ -393,6 +428,7 @@
},
{
"cell_type": "markdown",
"id": "a99342c1",
"metadata": {},
"source": [
"There is also an `expver` coordinate. More on this later."
@ -400,6 +436,7 @@
},
{
"cell_type": "markdown",
"id": "b1978d15",
"metadata": {},
"source": [
"Select the icons to the right of the table above to expand the attributes of the coordinates and data variables. What are the units of the temperature data?"
@ -407,6 +444,7 @@
},
{
"cell_type": "markdown",
"id": "3dc62489",
"metadata": {},
"source": [
"While an Xarray dataset may contain multiple variables, an Xarray data array holds a single multi-dimensional variable and its coordinates. To make the processing of the `t2m` data easier, we convert it into an Xarray data array. We will call it `da_tmp` (a temporary data array) because we will transform the data in some ways."
@ -415,6 +453,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "da8db3c5",
"metadata": {},
"outputs": [],
"source": [
@ -423,6 +462,7 @@
},
{
"cell_type": "markdown",
"id": "d2a03104",
"metadata": {},
"source": [
"Let's view this data:"
@ -431,6 +471,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e9e1cc50",
"metadata": {},
"outputs": [],
"source": [
@ -439,6 +480,7 @@
},
{
"cell_type": "markdown",
"id": "17c621f8",
"metadata": {},
"source": [
"From the result of the cell above you can see that now we have a `xarray.DataArray`."
@ -446,6 +488,7 @@
},
{
"cell_type": "markdown",
"id": "9f34f5d4",
"metadata": {},
"source": [
"#### Merge the two ERA5 experiments (1 and 5, `expver = [1,5]`)\n",
@ -458,6 +501,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "cd5eb8a2",
"metadata": {},
"outputs": [],
"source": [
@ -467,6 +511,7 @@
},
{
"cell_type": "markdown",
"id": "3b95cc14",
"metadata": {},
"source": [
"Let's check again the `da_tmp` data array. If there was an `expver` coordinate we [reduce this dimension](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.reduce.html) by performing a [`nansum`](https://numpy.org/doc/stable/reference/generated/numpy.nansum.html) operation, i.e. a sum of the array elements over this axis, treating Not a Numbers (NaNs) as zero. The result is a new `xarray.DataArray` merging the data along the `expver` dimension:"
@ -475,6 +520,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9109ec75",
"metadata": {},
"outputs": [],
"source": [
@ -483,6 +529,7 @@
},
{
"cell_type": "markdown",
"id": "1ee284b5",
"metadata": {},
"source": [
"Now the data array contains the three expected dimensions: `time`, `latitude` and `longitude`."
@ -490,6 +537,7 @@
},
{
"cell_type": "markdown",
"id": "53ad44a8",
"metadata": {},
"source": [
"#### Change temperature units from Kelvin to Celsius\n",
@ -500,6 +548,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "8d8e1cbb",
"metadata": {},
"outputs": [],
"source": [
@ -512,6 +561,7 @@
},
{
"cell_type": "markdown",
"id": "3fedd3fb",
"metadata": {},
"source": [
"#### Data to be used"
@ -519,6 +569,7 @@
},
{
"cell_type": "markdown",
"id": "36645c48",
"metadata": {},
"source": [
"The `da_celsius` data array will be used in the rest of the surface temperature exercise. Let's check what we have:"
@ -527,6 +578,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c519a979",
"metadata": {},
"outputs": [],
"source": [
@ -535,6 +587,7 @@
},
{
"cell_type": "markdown",
"id": "12436699",
"metadata": {},
"source": [
"Now we can see the updated values in *Celsius* and the `units` attribute updated accordingly."
@ -542,6 +595,7 @@
},
{
"cell_type": "markdown",
"id": "ba44a1f5",
"metadata": {},
"source": [
"#### Plotting one timestep"
@ -549,6 +603,7 @@
},
{
"cell_type": "markdown",
"id": "b582dd41",
"metadata": {},
"source": [
"Just to check what we have so far, let's plot a map of 2m temperature for the first (July 1940) and the last (July 2023) timesteps. We will plot these maps using the convenience method `plot` available for `xarray.DataArray`. This allows the creation of simple plots using one line of code. Also, with the xarray method `sel()`, you can select a data array based on coordinate labels."
@ -557,6 +612,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "87864f37",
"metadata": {},
"outputs": [],
"source": [
@ -566,6 +622,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c5946b4b",
"metadata": {},
"outputs": [],
"source": [
@ -574,6 +631,7 @@
},
{
"cell_type": "markdown",
"id": "f31fc1ef",
"metadata": {},
"source": [
"## 2. Calculate a surface temperature climatology: reference period 1991-2020"
@ -581,6 +639,7 @@
},
{
"cell_type": "markdown",
"id": "a3cbaa85",
"metadata": {},
"source": [
"#### Standard reference periods and climatologies\n",
@ -595,6 +654,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "15e2eb25",
"metadata": {},
"outputs": [],
"source": [
@ -603,6 +663,7 @@
},
{
"cell_type": "markdown",
"id": "220d67bd",
"metadata": {},
"source": [
"If we have a look at this data object we will see now we have only two coordinates, `latitude` and `longitude`."
@ -611,6 +672,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7e313e58",
"metadata": {},
"outputs": [],
"source": [
@ -619,6 +681,7 @@
},
{
"cell_type": "markdown",
"id": "aec6b340",
"metadata": {},
"source": [
"We can also make a quick plot to have an exploratory view of this new `xarray.DataArray`:"
@ -627,6 +690,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "900462ab",
"metadata": {},
"outputs": [],
"source": [
@ -635,6 +699,7 @@
},
{
"cell_type": "markdown",
"id": "9d500623",
"metadata": {},
"source": [
"## 3. Visualise surface temperature anomalies\n",
@ -647,6 +712,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "769fb056",
"metadata": {},
"outputs": [],
"source": [
@ -656,6 +722,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "bc174576",
"metadata": {},
"outputs": [],
"source": [
@ -665,6 +732,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "6d34a81f",
"metadata": {},
"outputs": [],
"source": [
@ -674,6 +742,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0f226683",
"metadata": {},
"outputs": [],
"source": [
@ -682,6 +751,7 @@
},
{
"cell_type": "markdown",
"id": "cd992193",
"metadata": {},
"source": [
"The anomaly will be the difference between `t2m_july2023` and `t2m_ref_per`. A positive value means July 2023 is above the expected mean:"
@ -690,6 +760,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "86868d79",
"metadata": {},
"outputs": [],
"source": [
@ -698,6 +769,7 @@
},
{
"cell_type": "markdown",
"id": "0e57b035",
"metadata": {},
"source": [
"The previous operation results in the anomaly on each longitude and latitude location stored in the `anom` data array. We can plot this in a map to check where the anomaly was positive (July 2023 warmer than the climatology) or negative (July 2023 colder than the climatology). This time we will create the plot using the `matplotlib` and `cartopy` libraries.\n",
@ -708,6 +780,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "61863839",
"metadata": {},
"outputs": [],
"source": [
@ -746,6 +819,7 @@
},
{
"cell_type": "markdown",
"id": "3b3213c7",
"metadata": {},
"source": [
"## 4. View time series and analyse surface temperature trends"
@ -753,6 +827,7 @@
},
{
"cell_type": "markdown",
"id": "b64c9f82",
"metadata": {},
"source": [
"Now let us view the time series from 1940 to 2023 averaged over the entire region. To do this we need to average `da_celsius` over the latitude and longitude dimensions. A very important consideration however is that the gridded data cells do not all correspond to the same areas. The size covered by each data point on the model grid varies as a function of latitude. We need to take this into account when calculating spatial averages. \n",
@ -769,6 +844,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "56a2b4d1",
"metadata": {},
"outputs": [],
"source": [
@ -779,6 +855,7 @@
},
{
"cell_type": "markdown",
"id": "a8cdb891",
"metadata": {},
"source": [
"Then we calculate the weighted mean so we will have a time series with the spatially averaged July `t2m` from 1940 to 2023."
@ -787,6 +864,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ed7197ee",
"metadata": {},
"outputs": [],
"source": [
@ -795,6 +873,7 @@
},
{
"cell_type": "markdown",
"id": "448b1d59",
"metadata": {},
"source": [
"Let's look at the new data array:"
@ -803,6 +882,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "f2cd6755",
"metadata": {},
"outputs": [],
"source": [
@ -811,6 +891,7 @@
},
{
"cell_type": "markdown",
"id": "ae4315b8",
"metadata": {},
"source": [
"We will calculate the climatology for this global spatially averaged July `t2m`. This value will be used later to check which years have global average 2m temperature above or below the climatology."
@ -819,6 +900,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "12551a1d",
"metadata": {},
"outputs": [],
"source": [
@ -828,6 +910,7 @@
},
{
"cell_type": "markdown",
"id": "d2ce5442",
"metadata": {},
"source": [
"We will create a constant array with the climatology value that has the same length as the time series:"
@ -836,6 +919,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "10a9173c",
"metadata": {},
"outputs": [],
"source": [
@ -844,6 +928,7 @@
},
{
"cell_type": "markdown",
"id": "7b263f44",
"metadata": {},
"source": [
"Let's plot the mean value since 1940. The values below the climatology will be highlighted in light blue while the values above the climatology will be highlighted in red. Code is commented in the code cell below."
@ -852,6 +937,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ef498af4",
"metadata": {},
"outputs": [],
"source": [
@ -864,6 +950,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "b879e5a4",
"metadata": {},
"outputs": [],
"source": [
@ -910,6 +997,7 @@
},
{
"cell_type": "markdown",
"id": "f584f57c",
"metadata": {},
"source": [
"Could you try a similar figure but using the anomalies (*\"monthly value\" - \"1991-2020 climatological value\"*) instead of the spatially aggregated average monthly values?"
@ -918,6 +1006,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "af9f8782",
"metadata": {},
"outputs": [],
"source": [
@ -926,6 +1015,7 @@
},
{
"cell_type": "markdown",
"id": "5b77860c",
"metadata": {},
"source": [
"Now let's order the months from colder to warmer."
@ -934,6 +1024,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "819e5613",
"metadata": {},
"outputs": [],
"source": [
@ -942,6 +1033,7 @@
},
{
"cell_type": "markdown",
"id": "dc997bf8",
"metadata": {},
"source": [
"Let's have a look to the result and check if it is sorted."
@ -950,6 +1042,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "97493e3c",
"metadata": {},
"outputs": [],
"source": [
@ -958,6 +1051,7 @@
},
{
"cell_type": "markdown",
"id": "ce41cbed",
"metadata": {},
"source": [
"If we plot the ranking from colder to warmer including also the climate normal we'll see the following. As before, code is commented in the code cell below:"
@ -966,6 +1060,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a1fe678e",
"metadata": {},
"outputs": [],
"source": [
@ -1013,6 +1108,7 @@
},
{
"cell_type": "markdown",
"id": "ee592192",
"metadata": {},
"source": [
"## 5. View time series and analyse North Atlantic sea surface temperature trends"
@ -1020,6 +1116,7 @@
},
{
"cell_type": "markdown",
"id": "0329dc4f",
"metadata": {},
"source": [
"#### This is a new exercise. In this part of the tutorial we will be working with monthly sea surface temperature (SST) data.\n",
@ -1040,6 +1137,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "fed64d52",
"metadata": {},
"outputs": [],
"source": [
@ -1080,6 +1178,7 @@
},
{
"cell_type": "markdown",
"id": "6ab7e416",
"metadata": {},
"source": [
"Let's do some work with this new dataset. First of all, let's read it."
@ -1088,6 +1187,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "992d6184",
"metadata": {},
"outputs": [],
"source": [
@ -1096,6 +1196,7 @@
},
{
"cell_type": "markdown",
"id": "a08d8b8d",
"metadata": {},
"source": [
"Now we can have a look at the dataset:"
@ -1104,6 +1205,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "014eeeb6",
"metadata": {},
"outputs": [],
"source": [
@ -1112,6 +1214,7 @@
},
{
"cell_type": "markdown",
"id": "b0ab7a75",
"metadata": {},
"source": [
"As before, we see there are four dimensions and units are in *Kelvin*. We will work with data in *degrees Celsius* and we will reduce the `expver` dimension as before:"
@ -1120,6 +1223,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "bdaa0b17",
"metadata": {},
"outputs": [],
"source": [
@ -1132,6 +1236,7 @@
},
{
"cell_type": "markdown",
"id": "3b91a7ab",
"metadata": {},
"source": [
"We can have a quick look at the data using the convenient `plot` method:"
@ -1140,6 +1245,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "1b0082ff",
"metadata": {},
"outputs": [],
"source": [
@ -1148,6 +1254,7 @@
},
{
"cell_type": "markdown",
"id": "f1cee5e7",
"metadata": {},
"source": [
"In the plot above we can see many values are below 0, those located on land. Actually, in the original `sst_ds` `xarray.Dataset` the land positions had a value of `numpy.nan`. Now, for `sst_expver` this is not true. This is a result of the previous operation using `numpy.nansum` and subtracting `273.15`. After this operation the land locations have a value of `-273.15` which is not valid. Let's amend this using a mask:"
@ -1156,6 +1263,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c781e076",
"metadata": {},
"outputs": [],
"source": [
@ -1166,6 +1274,7 @@
},
{
"cell_type": "markdown",
"id": "0447a66f",
"metadata": {},
"source": [
"Again, as before, we weight the dataset by the area:"
@ -1174,6 +1283,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7c6d857c",
"metadata": {},
"outputs": [],
"source": [
@ -1185,6 +1295,7 @@
},
{
"cell_type": "markdown",
"id": "cb0a62dc",
"metadata": {},
"source": [
"And, also, we calculate the spatially averaged value for each month to get a monthly time series of the average temperature of the sst over the main area of the North Atlantic from January 1991 to July 2023:"
@ -1193,6 +1304,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "88de69f6",
"metadata": {},
"outputs": [],
"source": [
@ -1203,6 +1315,7 @@
},
{
"cell_type": "markdown",
"id": "7b54ae7e",
"metadata": {},
"source": [
"In the plot above we can see the monthly evolution since 1991.\n",
@ -1213,6 +1326,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ab4bffcb",
"metadata": {},
"outputs": [],
"source": [
@ -1234,6 +1348,7 @@
},
{
"cell_type": "markdown",
"id": "8e29cd28",
"metadata": {},
"source": [
"And once we have this we can compare how recent SST values compare with those of previous years and to the climatology.\n",
@ -1249,6 +1364,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "5e6cb3b4",
"metadata": {},
"outputs": [],
"source": [
@ -1282,6 +1398,7 @@
},
{
"cell_type": "markdown",
"id": "5d7efd10",
"metadata": {},
"source": [
"Notice the dramatic increase in SST over the North Atlantic in 2023 compared to previous years!"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,847 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "754e8220",
"metadata": {},
"source": [
"# Sorting pre-requisits for ibicus: downloading and preprocessing data"
]
},
{
"cell_type": "markdown",
"id": "46c1c6de",
"metadata": {},
"source": [
"This notebook shows how to download and preprocess climate model data for bias correction and further use. To apply a bias adjustment method, three datasets are needed: 1) observation or reanalysis data; 2) historical climate model data over the same reference period that observations are available for; and 3) climate model data for a future, or more generally, application, period that is to be bias corrected. \n",
"\n",
"Here we will download and preprocess CMIP6 data from the Climate Data Store (CDS) as climate model input and two reanalysis datasets: 1) ERA5 from the CDS and 2) NCEP/DOE Reanalysis II from the PSL datastore (NOAA).\n",
"\n",
"There are many ways to access climate data on different temporal or spatial resolutions. This notebook is meant to illustrate one possible way to download data at daily resolution which is currently the primary temporal resolution supported in ibicus, although some can be applied at monthly resolution. "
]
},
{
"cell_type": "markdown",
"id": "d9803f09",
"metadata": {},
"source": [
"<hr>"
]
},
{
"cell_type": "markdown",
"id": "a4a70574",
"metadata": {},
"source": [
"### d4science_copernicus_cds Library\n",
"\n",
"To request data from the Climate Data Store (CDS) programmatically using the CDS API, we will manage our authentication with the `d4science_copernicus_cds` library.\n",
"\n",
"The library prompts us to enter our credentials, which are then securely saved in our workspace. **This request is only made the first time**; afterward, the `get_credentials` function will automatically retrieve the credentials from the environment or workspace, eliminating the need to re-enter them in the Jupyter notebook.\n",
"\n",
"To obtain your API credentials:\n",
"1. Register or log in to the CDS at [https://cds.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
"2. Visit [https://cds.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
"\n",
"The library will prompt you to enter:\n",
"- **URL**: The URL field is prefilled; simply press Enter to accept the default.\n",
"- **KEY**: Insert the obtained API key when prompted, then confirm saving your credentials by pressing \"y.\"\n",
"\n",
"Once saved, your credentials will be loaded automatically in future sessions, ensuring a seamless experience with the CDS API."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80d2e5fe",
"metadata": {},
"outputs": [],
"source": [
"!pip install git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbec7c68",
"metadata": {},
"outputs": [],
"source": [
"from d4science_copernicus_cds import cds_get_credentials, cds_datadir"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4aab64a",
"metadata": {},
"outputs": [],
"source": [
"URL, KEY = cds_get_credentials()\n",
"print(\"URL\", URL)\n",
"print (\"KEY\", KEY)"
]
},
{
"cell_type": "markdown",
"id": "68bb08de",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac69b211",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import urllib\n",
"\n",
"# Scientific computing\n",
"import iris\n",
"import xarray\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "88aa13ae",
"metadata": {
"tags": []
},
"source": [
"## 1. Download data"
]
},
{
"cell_type": "markdown",
"id": "15f7f634",
"metadata": {},
"source": [
"Let's create a data-directory where our inputs will go, if it does not yet exist:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c707ab7",
"metadata": {},
"outputs": [],
"source": [
"DATADIR = \"data_download_and_preprocessing\"\n",
"\n",
"if not os.path.exists(DATADIR):\n",
" os.mkdir(DATADIR)"
]
},
{
"cell_type": "markdown",
"id": "2ba8ad1b",
"metadata": {},
"source": [
"### 1.1. Download climate model data"
]
},
{
"cell_type": "markdown",
"id": "928fcceb",
"metadata": {},
"source": [
"To request climate data from the Climate Data Store (CDS) we will use the CDS API. Run the following cell if you have not yet istalled it:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd98913b",
"metadata": {},
"outputs": [],
"source": [
"#!pip install cdsapi\n",
"import cdsapi\n",
"\n",
"# We disable urllib3 (used by cdsapi) warning\n",
"import urllib3\n",
"urllib3.disable_warnings()\n"
]
},
{
"cell_type": "markdown",
"id": "c47258de",
"metadata": {},
"source": [
"We make use of the option to manually set the CDS API credentials. First, you have to set two variables: URL and KEY which build together your CDS API key. The string of characters that make up your KEY include your personal User ID and CDS API key. To obtain these, first register or login to the CDS (http://cds.climate.copernicus.eu), then visit https://cds.climate.copernicus.eu/api-how-to and copy the string of characters listed after \"key:\". Replace the ######### below with your key."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83bd63a0",
"metadata": {},
"outputs": [],
"source": [
"#URL = 'https://cds.climate.copernicus.eu/api/v2'\n",
"#KEY = '########################################' # enter your key instead"
]
},
{
"cell_type": "markdown",
"id": "8cac24af",
"metadata": {},
"source": [
"Let's choose a model and variable of interest, and fix some meta-paramters. If we are interested in multiple variable we can just iterate the code below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e697ae16",
"metadata": {},
"outputs": [],
"source": [
"# choose model\n",
"model = 'mpi_esm1_2_lr'\n",
"\n",
"# choose variables to extract (not all variables available at daily resolution for all cmip6 models at the moment)\n",
"variable = 'precipitation'\n",
"\n",
"# choose area to extract\n",
"area = [80, 3, 20, 30]\n",
"\n",
"# choose a historical period to extract\n",
"period_hist = '1979-01-01/2005-12-31' \n",
"\n",
"# choose a future period to extract:\n",
"period_fut = '2050-01-01/2070-12-31'\n",
"\n",
"# choose a filename for the historical cm data\n",
"fname_cm_hist = f\"cmip6_daily_1979-2015_ipsl_historical_{variable}.zip\"\n",
"\n",
"# choose a filename for the future cm data\n",
"fname_cm_future = f\"cmip6_daily_2050-2070_ipsl_ssp5_8_5_{variable}.zip\"\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "173ff1a6",
"metadata": {},
"source": [
"Both datasets will be in `DATADIR` under `fname_cm_hist` and `fname_cm_future`."
]
},
{
"cell_type": "markdown",
"id": "1f56a8ca",
"metadata": {},
"source": [
"#### 1.1.1. Download historical climate model data"
]
},
{
"cell_type": "markdown",
"id": "c1cb6ecd",
"metadata": {},
"source": [
"Executing the following cell will retrieve historical climate model data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ca4dc59",
"metadata": {},
"outputs": [],
"source": [
"# download historical climate model data\n",
"\n",
"c = cdsapi.Client(url=URL, key=KEY)\n",
"\n",
"c.retrieve(\n",
" 'projections-cmip6',\n",
" {\n",
" 'temporal_resolution': 'daily',\n",
" 'experiment': 'historical',\n",
" 'level': 'single_levels',\n",
" 'variable': variable,\n",
" 'model': model,\n",
" 'date': period_hist,\n",
" 'area': area,\n",
" 'format': 'zip',\n",
" },\n",
" f'{DATADIR}/{fname_cm_hist}')"
]
},
{
"cell_type": "markdown",
"id": "1d05a296",
"metadata": {},
"source": [
"After unzipping the folder..."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fee48ec4",
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"\n",
"with zipfile.ZipFile(f'{DATADIR}/{fname_cm_hist}', 'r') as zip_ref:\n",
" zip_ref.extractall(DATADIR)"
]
},
{
"cell_type": "markdown",
"id": "4eb56057",
"metadata": {},
"source": [
"...the file is now in `DATADIR/pr_day_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_19790101-20141231_*.nc`"
]
},
{
"cell_type": "markdown",
"id": "36252bae",
"metadata": {},
"source": [
"#### 1.1.2. Download future climate model data"
]
},
{
"cell_type": "markdown",
"id": "cfdde7d4",
"metadata": {},
"source": [
"Now we go through the same steps to download climate data in the future or application period:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99e63f7a",
"metadata": {},
"outputs": [],
"source": [
"# download future climate model data\n",
"\n",
"c = cdsapi.Client(url=URL, key=KEY)\n",
"\n",
"c.retrieve(\n",
" 'projections-cmip6',\n",
" {\n",
" 'temporal_resolution': 'daily',\n",
" 'experiment': 'ssp5_8_5',\n",
" 'level': 'single_levels',\n",
" 'variable': variable,\n",
" 'model': model,\n",
" 'date': period_fut,\n",
" 'area': area,\n",
" 'format': 'zip',\n",
" },\n",
" f'{DATADIR}/{fname_cm_future}')"
]
},
{
"cell_type": "markdown",
"id": "f1f64630",
"metadata": {},
"source": [
"Again, we need to unzip the folder:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "796570da",
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"\n",
"with zipfile.ZipFile(f'{DATADIR}/{fname_cm_future}', 'r') as zip_ref:\n",
" zip_ref.extractall(DATADIR)"
]
},
{
"cell_type": "markdown",
"id": "1c510e91",
"metadata": {},
"source": [
"The file is now in `DATADIR/pr_day_MPI-ESM1-2-LR_ssp585_r1i1p1f1_gn_20500101-20701231_*.nc`"
]
},
{
"cell_type": "markdown",
"id": "a0b85cf6",
"metadata": {},
"source": [
"### 1.2. Download observations"
]
},
{
"cell_type": "markdown",
"id": "1046a89d",
"metadata": {},
"source": [
"Now we will download observations. We will first download ERA5 data from the CDS and afterwards the NCEP/DOE II Reanalysis from the PSL."
]
},
{
"cell_type": "markdown",
"id": "138a4937",
"metadata": {},
"source": [
"#### 1.2.1. Download ERA5"
]
},
{
"cell_type": "markdown",
"id": "4be4070f",
"metadata": {},
"source": [
"We will download ERA5 on daily temporal resolution (if the climate model were on other temporal resolutions we would also need a different one for ERA5). The script is inspired by [this discussion](https://confluence.ecmwf.int/pages/viewpage.action?pageId=228867588) and uses the [\n",
"Daily statistics calculated from ERA5 data\n",
"](https://cds.climate.copernicus.eu/cdsapp#!/software/app-c3s-daily-era5-statistics?tab=overview) application. The output of this application is a separate netCDF file for chosen daily statistic for each month for each year. We concatenate those files then manually. First we need to make some selections (make sure the data chosen here is consistent with the cm data pulled above):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc749c3b",
"metadata": {},
"outputs": [],
"source": [
"# choose years to request (this should overlap with the `period_hist` chosen for the cm data)\n",
"# this is chosen shorter for demonstration purposes\n",
"years = list(map(str, range(1979, 1981)))\n",
"\n",
"# choose months to request\n",
"months = list(map(str, range(10, 12)))\n",
"\n",
"# choose a variable (must be a valid ERA5 CDS API name)\n",
"variable = \"total_precipitation\"\n",
"\n",
"# choose a required statistic (valid names given in the application description above)\n",
"statistic = \"daily_mean\"\n",
"\n",
"# choose an area (should be the same as above)\n",
"area = {\"lat\": [30, 80], \"lon\": [3, 20]}\n",
"\n",
"# choose a filename\n",
"fname_era5 = f\"era5_{variable}_{statistic}_1979_1981.nc\""
]
},
{
"cell_type": "markdown",
"id": "16856537",
"metadata": {},
"source": [
"And now we can request the data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15ac64b4",
"metadata": {},
"outputs": [],
"source": [
"c = cdsapi.Client(url=URL, key=KEY, timeout=300)\n",
"\n",
"# Loop over years and months\n",
"filenames_for_cleanup= []\n",
"for yr in years:\n",
" print(f\"----- Requesting year: {yr} -----\")\n",
" for mn in months:\n",
" result = c.service(\n",
" \"tool.toolbox.orchestrator.workflow\",\n",
" params={\n",
" \"realm\": \"user-apps\",\n",
" \"project\": \"app-c3s-daily-era5-statistics\",\n",
" \"version\": \"master\",\n",
" \"kwargs\": {\n",
" \"dataset\": \"reanalysis-era5-single-levels\",\n",
" \"product_type\": \"reanalysis\",\n",
" \"variable\": variable,\n",
" \"statistic\": statistic,\n",
" \"year\": yr,\n",
" \"month\": mn,\n",
" \"time_zone\": \"UTC+00:0\",\n",
" \"frequency\": \"1-hourly\",\n",
" \"grid\": \"1.0/1.0\",\n",
" \"area\": area,\n",
" },\n",
" \"workflow_name\": \"application\"\n",
" }) \n",
"\n",
" \n",
" filename = f\"{DATADIR}/era5_{variable}_{statistic}_{yr}_{mn}.nc\"\n",
" url = result[0]['location']\n",
"\n",
" # Download nc file\n",
" urllib.request.urlretrieve(url, filename)\n",
" # Append filename to list of filenames to cleanup\n",
" filenames_for_cleanup.append(filename)\n",
"\n",
"# Combine monthly data\n",
"combined_data = xarray.open_mfdataset(f\"{DATADIR}/era5_{variable}_{statistic}_*.nc\", combine = 'nested', concat_dim=\"time\")\n",
"combined_data.to_netcdf(f\"{DATADIR}/{fname_era5}\")\n",
"\n",
"# Cleanup\n",
"for filename in filenames_for_cleanup:\n",
" os.remove(filename)"
]
},
{
"cell_type": "markdown",
"id": "89f05dad",
"metadata": {},
"source": [
"#### 1.2.2. Download NCEP/DOE II"
]
},
{
"cell_type": "markdown",
"id": "111c8740",
"metadata": {},
"source": [
"We now download the [NCEP/DOE II data](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html). [Here is an overview](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html) of the possible variables and we take the data from [the datastore here](https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/). "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dec2749e",
"metadata": {},
"outputs": [],
"source": [
"# Variable name. Needs to be one of the NCEP-names in https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/. \n",
"variable = \"prate.sfc.gauss\"\n",
"\n",
"# choose years to request (this should overlap with the `period_hist` chosen for the cm data)*\n",
"years = map(str, range(1979, 2005))\n",
"\n",
"# choose an area (should be the same as above)\n",
"area = {\"lat\": [30, 80], \"lon\": [3, 20]}\n",
"\n",
"# choose a filename\n",
"fname_ncep_doe = f\"ncep_doe_{variable}_1979_2005.nc\""
]
},
{
"cell_type": "markdown",
"id": "140db911",
"metadata": {},
"source": [
"Now we can download the data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8fd4968a",
"metadata": {},
"outputs": [],
"source": [
"# Download data year by year\n",
"filenames_for_cleanup = []\n",
"for year in years:\n",
" url = f\"https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/{variable}.{year}.nc\"\n",
" filename = f\"{DATADIR}/{variable}_{year}.nc\"\n",
" # Download nc file\n",
" urllib.request.urlretrieve(url, filename)\n",
" # Append filename to list of filenames to cleanup\n",
" filenames_for_cleanup.append(filename)\n",
"\n",
"# Combine data for variable\n",
"combined_data = xarray.open_mfdataset(f\"{DATADIR}/{variable}_*.nc\", combine = 'nested', concat_dim=\"time\")\n",
"# Select area\n",
"combined_data = combined_data.sel(lon=slice(min(area[\"lon\"]), max(area[\"lon\"])),lat=slice(max(area[\"lat\"]), min(area[\"lat\"])))\n",
"# Write to file \n",
"combined_data.to_netcdf(f\"{DATADIR}/{fname_ncep_doe}\")\n",
"# Cleanup\n",
"for filename in filenames_for_cleanup:\n",
" os.remove(filename)\n"
]
},
{
"cell_type": "markdown",
"id": "8fa8329b",
"metadata": {},
"source": [
"It is also possible (and probably easier) to download the data via ftp through the same links, or via the visual interface accessible via the graph-icon next to a variable in the [NCEP/DOE 2 overview page](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html). The latter also provides an option to select a range of dates and access merged data for that range that can directly be used for the further preprocessing steps."
]
},
{
"cell_type": "markdown",
"id": "9156db89",
"metadata": {},
"source": [
"## 2. Convert and regrid data"
]
},
{
"cell_type": "markdown",
"id": "a131dd39",
"metadata": {},
"source": [
"Now that we have downloaded the data we need to make sure that observations and climate model data are:\n",
"\n",
"- on the same temporal resolution: this is covered because we downloaded the data on daily resolution.\n",
"- on the same spatial resolution: we need to regrid the data.\n",
"- in the same units: we may need to convert units.\n",
"\n",
"Furthermore we might want to extract additional information and need to get the numpy arrays corresponding to the data. In the numpy arrays we need to make sure that they have the form `[t,x,y]`.\n"
]
},
{
"cell_type": "markdown",
"id": "c4bdf004",
"metadata": {},
"source": [
"### 2.1. Regrid data "
]
},
{
"cell_type": "markdown",
"id": "1ebf5afa",
"metadata": {},
"source": [
"Now that we have data on the same temporal resolution for both the climate model and observations we need to make sure they are also on the same spatial one and regrid the datasets. The climate model data is on a coarser grid, therefore we will regrid the observational data onto this resolution. However there are also other solutions, where the [climate model data is regridded onto the resolution of the observations](https://esd.copernicus.org/articles/9/313/2018/).\n",
"\n",
"We will use iris for the regridding, however there are also xarray solutions. Different variables might require different regridding schemes: [a list of ones available in iris is given here](https://scitools-iris.readthedocs.io/en/latest/userguide/interpolation_and_regridding.html?highlight=interpolate#regridding). For precipitation we choose a regridder based on Nearest values. Other regridders like linear ones *might* introduce negative values."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3bfc9e58",
"metadata": {},
"outputs": [],
"source": [
"cm_hist = iris.load_cube(f\"{DATADIR}/pr_day_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_19790101-20051231_v20190710.nc\", \"precipitation_flux\")\n",
"cm_future = iris.load_cube(f\"{DATADIR}/pr_day_MPI-ESM1-2-LR_ssp585_r1i1p1f1_gn_20500101-20701231_v20190710.nc\", \"precipitation_flux\")"
]
},
{
"cell_type": "markdown",
"id": "397a0cb7",
"metadata": {},
"source": [
"First let's take care of the ERA5 reanalysis:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fc7d11d",
"metadata": {},
"outputs": [],
"source": [
"obs_era5 = iris.load_cube(f\"{DATADIR}/{fname_era5}\")\n",
"obs_era5 = obs_era5.regrid(cm_hist, iris.analysis.Nearest())"
]
},
{
"cell_type": "markdown",
"id": "2d56577c",
"metadata": {},
"source": [
"And now of the NCEP/DOE II data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ca33391",
"metadata": {},
"outputs": [],
"source": [
"obs_ncep_doe = iris.load_cube(f\"{DATADIR}/{fname_ncep_doe}\")\n",
"obs_ncep_doe = obs_ncep_doe.regrid(cm_hist, iris.analysis.Nearest())"
]
},
{
"cell_type": "markdown",
"id": "e800787d",
"metadata": {},
"source": [
"### 2.1. Extract additional information"
]
},
{
"cell_type": "markdown",
"id": "465d30f2",
"metadata": {},
"source": [
"The data objects are now all at the same temporal and spatial resolution. Because some debiasers need the dates as input, it is useful to extract them here:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9444564",
"metadata": {},
"outputs": [],
"source": [
"def get_dates(x):\n",
" time_dimension = x.coords()[0]\n",
" dates = time_dimension.units.num2date(time_dimension.points)\n",
" return dates\n",
"\n",
"get_dates = np.vectorize(get_dates)\n",
"\n",
"dates_cm_hist = get_dates(cm_hist)\n",
"dates_cm_future = get_dates(cm_future)\n",
"\n",
"dates_obs_era5 = get_dates(obs_era5)\n",
"dates_obs_ncep_doe = get_dates(obs_ncep_doe)"
]
},
{
"cell_type": "markdown",
"id": "44fa2e3d",
"metadata": {},
"source": [
"### 2.3. Get numpy arrays"
]
},
{
"cell_type": "markdown",
"id": "dab004f5",
"metadata": {},
"source": [
"In order to start working with ibicus, we need to get the numpy arrays associated with the data from the iris cubes:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "165cf33a",
"metadata": {},
"outputs": [],
"source": [
"cm_hist = cm_hist.data\n",
"cm_future = cm_future.data\n",
"\n",
"obs_era5 = obs_era5.data\n",
"obs_ncep_doe = obs_ncep_doe.data"
]
},
{
"cell_type": "markdown",
"id": "5f997223",
"metadata": {},
"source": [
"We look at the shapes to make sure they are all in the form `[t, x, y]`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57b96bc0",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Shape cm_hist: {cm_hist.shape}\")\n",
"print(f\"Shape cm_future: {cm_future.shape}\")\n",
"\n",
"print(f\"Shape obs_era5: {obs_era5.shape}\")\n",
"print(f\"Shape obs_ncep_doe: {obs_ncep_doe.shape}\")"
]
},
{
"cell_type": "markdown",
"id": "de4780c7",
"metadata": {},
"source": [
"### 2.4. Convert units"
]
},
{
"cell_type": "markdown",
"id": "c760fe32",
"metadata": {},
"source": [
"From the [ERA5 documentation](https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation) we can see that the precipitation is measured in m, whilst in `cm_hist` and `cm_future` it is measured as flux (m / s^-1). To convert we need to divide the ERA5-values by 86 400 (the number of seconds per day):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a37eb5e7",
"metadata": {},
"outputs": [],
"source": [
"obs_era5 = obs_era5/86400"
]
},
{
"cell_type": "markdown",
"id": "4d99dfe0",
"metadata": {},
"source": [
"The values in the NCEP/DOE II reanalysis are in the same units."
]
},
{
"cell_type": "markdown",
"id": "1812a128",
"metadata": {},
"source": [
"## 3. Apply debiaser"
]
},
{
"cell_type": "markdown",
"id": "24d67c72",
"metadata": {},
"source": [
"After these preparations we can finally apply a bias adjustment method. For a detailed introduction into the actual application of bias correction using ibicus, we refer you to the other notebooks.\n",
"\n",
"For illustrative purposes we give one example here using a simple quantile mapping methodology that we apply to both ERA5 and NCEP/DOE II data."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5990e8ce",
"metadata": {},
"outputs": [],
"source": [
"from ibicus.debias import QuantileMapping\n",
"\n",
"debiaser = QuantileMapping.from_variable(\"pr\")\n",
"\n",
"debiased_cm_future_era5 = debiaser.apply(obs_era5, cm_hist, cm_future)\n",
"debiased_cm_future_ncep_doe = debiaser.apply(obs_ncep_doe, cm_hist, cm_future)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}