diff --git a/clean_notebooks.py b/clean_notebooks.py
new file mode 100644
index 0000000..18f8a1f
--- /dev/null
+++ b/clean_notebooks.py
@@ -0,0 +1,26 @@
+import os
+import nbformat
+
+def clear_notebook_output(notebook_path):
+ with open(notebook_path, 'r', encoding='utf-8') as f:
+ nb = nbformat.read(f, as_version=4)
+
+ for cell in nb.cells:
+ if cell.cell_type == 'code':
+ cell.outputs = []
+ cell.execution_count = None
+
+ with open(notebook_path, 'w', encoding='utf-8') as f:
+ nbformat.write(nb, f)
+ print(f'Cleared output for {notebook_path}')
+
+def walk_and_clear_outputs(base_dir):
+ for root, dirs, files in os.walk(base_dir):
+ for file in files:
+ if file.endswith('.ipynb'):
+ notebook_path = os.path.join(root, file)
+ clear_notebook_output(notebook_path)
+
+if __name__ == "__main__":
+ base_dir = '.' # Change this to the directory you want to start from
+ walk_and_clear_outputs(base_dir)
diff --git a/config_auth_cds.ipynb b/config_auth_cds.ipynb
index b1e3955..033b05c 100644
--- a/config_auth_cds.ipynb
+++ b/config_auth_cds.ipynb
@@ -44,32 +44,9 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: cdsapi in /opt/conda/lib/python3.8/site-packages (0.7.4)\n",
- "Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (1.5.1)\n",
- "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi) (4.60.0)\n",
- "Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (2.24.0)\n",
- "Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
- "Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
- "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2020.12.5)\n",
- "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (3.0.4)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2.10)\n",
- "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (1.25.10)\n",
- "Requirement already satisfied: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2.8.1)\n",
- "Requirement already satisfied: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2020.1)\n",
- "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (1.15.0)\n",
- "Requirement already satisfied: attrs>=24.0.0 in /opt/conda/lib/python3.8/site-packages (24.2.0)\n",
- "Requirement already satisfied: typing_extensions>=4.0.0 in /opt/conda/lib/python3.8/site-packages (4.12.2)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!pip install \"cdsapi>=0.7.4\"\n",
"!pip install \"attrs>=24.0.0\"\n",
@@ -78,96 +55,27 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (1.20.3)\n",
- "Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (3.3.4)\n",
- "Requirement already satisfied: cartopy in /opt/conda/lib/python3.8/site-packages (0.18.0)\n",
- "Requirement already satisfied: xarray in /opt/conda/lib/python3.8/site-packages (0.17.0)\n",
- "Requirement already satisfied: netCDF4 in /opt/conda/lib/python3.8/site-packages (1.5.6)\n",
- "Requirement already satisfied: cdsapi in /opt/conda/lib/python3.8/site-packages (0.7.4)\n",
- "Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (2.8.1)\n",
- "Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (0.10.0)\n",
- "Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (7.2.0)\n",
- "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (2.4.7)\n",
- "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib) (1.2.0)\n",
- "Requirement already satisfied: shapely>=1.5.6 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.7.1)\n",
- "Requirement already satisfied: six>=1.3.0 in /opt/conda/lib/python3.8/site-packages (from cartopy) (1.15.0)\n",
- "Requirement already satisfied: setuptools>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from cartopy) (49.6.0.post20200814)\n",
- "Requirement already satisfied: pyshp>=1.1.4 in /opt/conda/lib/python3.8/site-packages (from cartopy) (2.1.3)\n",
- "Requirement already satisfied: pandas>=0.25 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.1.1)\n",
- "Requirement already satisfied: cftime in /opt/conda/lib/python3.8/site-packages (from netCDF4) (1.5.0)\n",
- "Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (1.5.1)\n",
- "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi) (4.60.0)\n",
- "Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (2.24.0)\n",
- "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2020.1)\n",
- "Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
- "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
- "Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2020.12.5)\n",
- "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (3.0.4)\n",
- "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (1.25.10)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2.10)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!pip install numpy matplotlib cartopy xarray netCDF4 cdsapi"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (1.1.1)\n",
- "Requirement already satisfied: numpy>=1.15.4 in /opt/conda/lib/python3.8/site-packages (from pandas) (1.20.3)\n",
- "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas) (2020.1)\n",
- "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas) (2.8.1)\n",
- "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"pip install pandas"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: xarray in /opt/conda/lib/python3.8/site-packages (0.17.0)\n",
- "Requirement already satisfied: zarr in /opt/conda/lib/python3.8/site-packages (2.5.0)\n",
- "Requirement already satisfied: dask in /opt/conda/lib/python3.8/site-packages (2.20.0)\n",
- "Requirement already satisfied: fsspec in /opt/conda/lib/python3.8/site-packages (0.8.0)\n",
- "Requirement already satisfied: numpy>=1.15 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.20.3)\n",
- "Requirement already satisfied: setuptools>=40.4 in /opt/conda/lib/python3.8/site-packages (from xarray) (49.6.0.post20200814)\n",
- "Requirement already satisfied: pandas>=0.25 in /opt/conda/lib/python3.8/site-packages (from xarray) (1.1.1)\n",
- "Requirement already satisfied: fasteners in /opt/conda/lib/python3.8/site-packages (from zarr) (0.16)\n",
- "Requirement already satisfied: asciitree in /opt/conda/lib/python3.8/site-packages (from zarr) (0.3.3)\n",
- "Requirement already satisfied: numcodecs>=0.6.4 in /opt/conda/lib/python3.8/site-packages (from zarr) (0.7.3)\n",
- "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.8/site-packages (from dask) (5.3.1)\n",
- "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2020.1)\n",
- "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.25->xarray) (2.8.1)\n",
- "Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from fasteners->zarr) (1.15.0)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!pip install zarr dask fsspec"
]
@@ -206,43 +114,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Collecting git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git\n",
- " Cloning https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git to /tmp/pip-req-build-luzr1m0l\n",
- "Requirement already satisfied, skipping upgrade: cdsapi>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (0.7.4)\n",
- "Requirement already satisfied, skipping upgrade: attrs in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (20.1.0)\n",
- "Requirement already satisfied, skipping upgrade: typing_extensions in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (3.7.4.2)\n",
- "Requirement already satisfied, skipping upgrade: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.24.0)\n",
- "Requirement already satisfied, skipping upgrade: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (4.60.0)\n",
- "Requirement already satisfied, skipping upgrade: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.5.1)\n",
- "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (3.0.4)\n",
- "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.12.5)\n",
- "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.10)\n",
- "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.25.10)\n",
- "Requirement already satisfied, skipping upgrade: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (0.3.2)\n",
- "Requirement already satisfied, skipping upgrade: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.1)\n",
- "Requirement already satisfied, skipping upgrade: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.8.1)\n",
- "Requirement already satisfied, skipping upgrade: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.15.0)\n",
- "Building wheels for collected packages: d4science-copernicus-cds\n",
- " Building wheel for d4science-copernicus-cds (setup.py) ... \u001b[?25ldone\n",
- "\u001b[?25h Created wheel for d4science-copernicus-cds: filename=d4science_copernicus_cds-1.0.0-py3-none-any.whl size=12134 sha256=1f12bffb2cf09d7083b136fb4de34b441adb37ef0d74f468b41c08e309bf204d\n",
- " Stored in directory: /tmp/pip-ephem-wheel-cache-8yuqw8dx/wheels/52/8f/79/78b8dae3ae67225c9ad8417f73f2b630b4ad077f0a27911303\n",
- "Successfully built d4science-copernicus-cds\n",
- "Installing collected packages: d4science-copernicus-cds\n",
- " Attempting uninstall: d4science-copernicus-cds\n",
- " Found existing installation: d4science-copernicus-cds 1.0.0\n",
- " Uninstalling d4science-copernicus-cds-1.0.0:\n",
- " Successfully uninstalled d4science-copernicus-cds-1.0.0\n",
- "Successfully installed d4science-copernicus-cds-1.0.0\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!pip install -U git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
]
@@ -258,7 +132,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -296,21 +170,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "ENV - None None\n",
- "env is not configured\n",
- "Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "saving config to env\n",
- "Set environment variables CDSAPI_URL, CDSAPI_KEY\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"client = cds_authenticate()"
]
@@ -333,34 +195,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
- "Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from environment: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from file: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "({'url': 'https://cds.climate.copernicus.eu/api',\n",
- " 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'},\n",
- " {'url': 'https://cds.climate.copernicus.eu/api',\n",
- " 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'})"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cds_show_conf()"
]
@@ -381,18 +218,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "URL https://cds.climate.copernicus.eu/api\n",
- "KEY db1f2085-6b8b-42e6-b832-625dfaf831a4\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"URL, KEY = cds_get_credentials()\n",
"print(\"URL\", URL)\n",
@@ -416,19 +244,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
- "Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Saved Configuration file /home/jovyan/.cdsapirc\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"cds_save_conf()"
]
@@ -448,7 +266,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -470,7 +288,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -490,34 +308,9 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "ENV - https://cds.climate.copernicus.eu/api db1f2085-6b8b-42e6-b832-625dfaf831a4\n",
- "Configuration from environment {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from file /home/jovyan/.cdsapirc: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from environment: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n",
- "Configuration from file: {'url': 'https://cds.climate.copernicus.eu/api', 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'}\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "({'url': 'https://cds.climate.copernicus.eu/api',\n",
- " 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'},\n",
- " {'url': 'https://cds.climate.copernicus.eu/api',\n",
- " 'key': 'db1f2085-6b8b-42e6-b832-625dfaf831a4'})"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"cds_show_conf()"
]
@@ -547,17 +340,9 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "datadir: %s /home/jovyan/cds_dataDir/out_2024_11_06_16_34_36_example/\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"datadir = cds_datadir(\"example\")"
]
@@ -589,17 +374,9 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "datadir: %s ./out/out_2024_11_06_16_34_36_current_example/\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"datadir_current = cds_datadir(\"current_example\", basepath=\"./out\")\n"
]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1e33940
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+nbformat
diff --git a/requirements_tutorials.txt b/requirements_tutorials.txt
index 61574a3..1a49efa 100644
--- a/requirements_tutorials.txt
+++ b/requirements_tutorials.txt
@@ -2,6 +2,7 @@ cdsapi>=0.7.4
attrs>=24.0.0
typing_extensions>=4.0.0
numpy>=1.16.5,<1.23.0
+llvmlite 0.31. --ignore-installed
matplotlib
cartopy
xarray
diff --git a/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb b/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb
index 6806566..3e9673a 100644
--- a/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb
+++ b/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb
@@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "markdown",
+ "id": "8130d058",
"metadata": {},
"source": [
"# Tutorial on July 2023 record-breaking global surface temperatures using climate data from C3S"
@@ -9,6 +10,7 @@
},
{
"cell_type": "markdown",
+ "id": "65b72b64",
"metadata": {},
"source": [
"### About\n",
@@ -29,6 +31,7 @@
},
{
"cell_type": "markdown",
+ "id": "771a66cc",
"metadata": {},
"source": [
"### d4science_copernicus_cds Library\n",
@@ -50,6 +53,7 @@
},
{
"cell_type": "markdown",
+ "id": "fe7a0949",
"metadata": {},
"source": [
"This tutorial is based on the official turorial **[CDS API guide](https://ecmwf-projects.github.io/copernicus-training-c3s/reanalysis-temp-record.html)**, extended and adapted for use in the **BlueCloud JupyterLab** environment."
@@ -58,6 +62,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "f07ec047",
"metadata": {},
"outputs": [],
"source": [
@@ -67,6 +72,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "e10262fc",
"metadata": {},
"outputs": [],
"source": [
@@ -76,6 +82,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "4bd3fb71",
"metadata": {},
"outputs": [],
"source": [
@@ -90,6 +97,7 @@
},
{
"cell_type": "markdown",
+ "id": "692a7b7c",
"metadata": {},
"source": [
"cds_datadir will create a folder in our workspace, under cds_dataDir, with current timestamp and custom label"
@@ -98,6 +106,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "4e3a9659",
"metadata": {},
"outputs": [],
"source": [
@@ -107,6 +116,7 @@
},
{
"cell_type": "markdown",
+ "id": "07fcf992",
"metadata": {},
"source": [
"## 1. Search, download and view data"
@@ -114,6 +124,7 @@
},
{
"cell_type": "markdown",
+ "id": "54ad2fba",
"metadata": {},
"source": [
"Before we begin we must prepare our environment. This includes installing the Application Programming Interface (API) of the CDS as well as other required libs, and importing the various python libraries that we will need."
@@ -121,6 +132,7 @@
},
{
"cell_type": "markdown",
+ "id": "fc93afc0",
"metadata": {},
"source": [
"#### Infrastructure introduction (installing API of the CDS)"
@@ -128,6 +140,7 @@
},
{
"cell_type": "markdown",
+ "id": "d8db144f",
"metadata": {},
"source": [
"In this exercise we will mainly use `cdsapi`, `xarray`, `matplotlib` and `cartopy` python libraries."
@@ -135,6 +148,7 @@
},
{
"cell_type": "markdown",
+ "id": "5638a7c8",
"metadata": {},
"source": [
"There are several options to run the code in this tutorial:\n",
@@ -144,6 +158,7 @@
},
{
"cell_type": "markdown",
+ "id": "2a0767bd",
"metadata": {},
"source": [
"#### Installation on your computer"
@@ -151,6 +166,7 @@
},
{
"cell_type": "markdown",
+ "id": "91ee485f",
"metadata": {},
"source": [
"First of all, in order to run this notebook on your computer you need to install Python and the required libs."
@@ -158,6 +174,7 @@
},
{
"cell_type": "markdown",
+ "id": "2875c115",
"metadata": {},
"source": [
"The easiest way to install Python without interfering with other potential Python installations on your system is by using [Miniconda, Miniforge or Mambaforge](https://github.com/conda-forge/miniforge/blob/main/README.md). This will install a modern Python for your user and the **Conda**/**Mamba** package manager. **Mamba** is a performant drop-in replacement for **Conda**."
@@ -165,6 +182,7 @@
},
{
"cell_type": "markdown",
+ "id": "aeb3656c",
"metadata": {},
"source": [
"Once Python + **Conda**/**Mamba** are installed run the following from the command line to install the API of the CDS, `cdsapi`, and the rest of the requirements:\n",
@@ -180,6 +198,7 @@
},
{
"cell_type": "markdown",
+ "id": "4a2a46d7",
"metadata": {},
"source": [
"If everything is installed correctly run the following from the command line:\n",
@@ -193,6 +212,7 @@
},
{
"cell_type": "markdown",
+ "id": "eb3c1071",
"metadata": {},
"source": [
"#### Running on Colab or Kaggle"
@@ -200,6 +220,7 @@
},
{
"cell_type": "markdown",
+ "id": "c6fe3496",
"metadata": {},
"source": [
"If you are on Colab or Kaggle just run the following line of code to install the API of the CDS and the rest of the dependencies before running the rest of the code:"
@@ -208,6 +229,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "73fdf414",
"metadata": {},
"outputs": [],
"source": [
@@ -216,6 +238,7 @@
},
{
"cell_type": "markdown",
+ "id": "779f51f5",
"metadata": {},
"source": [
"#### Import libraries"
@@ -223,6 +246,7 @@
},
{
"cell_type": "markdown",
+ "id": "be67dade",
"metadata": {},
"source": [
"We will start importing the required libraries. These libs should be already installed. If you have not installed the requirements, please go to the specific section above."
@@ -231,6 +255,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "c144aa3f",
"metadata": {},
"outputs": [],
"source": [
@@ -248,6 +273,7 @@
},
{
"cell_type": "markdown",
+ "id": "afd220dd",
"metadata": {},
"source": [
"#### Search for data\n",
@@ -258,6 +284,7 @@
},
{
"cell_type": "markdown",
+ "id": "939b2301",
"metadata": {},
"source": [
"Having selected the correct dataset, we now need to specify what product type, variables, temporal and geographic coverage we are interested in. These can all be selected in the **\"Download data\"** tab. In this tab a form appears in which we will select the following parameters to download:\n",
@@ -275,6 +302,7 @@
},
{
"cell_type": "markdown",
+ "id": "6e755a8b",
"metadata": {},
"source": [
"
"
@@ -282,6 +310,7 @@
},
{
"cell_type": "markdown",
+ "id": "f9755801",
"metadata": {},
"source": [
"#### Download data\n",
@@ -292,6 +321,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "9d8818a8",
"metadata": {},
"outputs": [],
"source": [
@@ -342,6 +372,7 @@
},
{
"cell_type": "markdown",
+ "id": "7ed10fdc",
"metadata": {},
"source": [
"#### Inspect data\n",
@@ -352,6 +383,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "8938c920",
"metadata": {},
"outputs": [],
"source": [
@@ -360,6 +392,7 @@
},
{
"cell_type": "markdown",
+ "id": "b2f8d3c9",
"metadata": {},
"source": [
"Now we can query our newly created Xarray dataset... Let's have a look at the `ds`."
@@ -368,6 +401,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "e32a3886",
"metadata": {},
"outputs": [],
"source": [
@@ -376,6 +410,7 @@
},
{
"cell_type": "markdown",
+ "id": "7c42c6e1",
"metadata": {},
"source": [
"We see that the dataset has one variable called `t2m`, which stands for \"2 metre temperature\", and three coordinates of `longitude`, `latitude` and `time`. \n",
@@ -393,6 +428,7 @@
},
{
"cell_type": "markdown",
+ "id": "a99342c1",
"metadata": {},
"source": [
"There is also an `expver` coordinate. More on this later."
@@ -400,6 +436,7 @@
},
{
"cell_type": "markdown",
+ "id": "b1978d15",
"metadata": {},
"source": [
"Select the icons to the right of the table above to expand the attributes of the coordinates and data variables. What are the units of the temperature data?"
@@ -407,6 +444,7 @@
},
{
"cell_type": "markdown",
+ "id": "3dc62489",
"metadata": {},
"source": [
"While an Xarray dataset may contain multiple variables, an Xarray data array holds a single multi-dimensional variable and its coordinates. To make the processing of the `t2m` data easier, we convert it into an Xarray data array. We will call it `da_tmp` (a temporary data array) because we will transform the data in some ways."
@@ -415,6 +453,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "da8db3c5",
"metadata": {},
"outputs": [],
"source": [
@@ -423,6 +462,7 @@
},
{
"cell_type": "markdown",
+ "id": "d2a03104",
"metadata": {},
"source": [
"Let's view this data:"
@@ -431,6 +471,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "e9e1cc50",
"metadata": {},
"outputs": [],
"source": [
@@ -439,6 +480,7 @@
},
{
"cell_type": "markdown",
+ "id": "17c621f8",
"metadata": {},
"source": [
"From the result of the cell above you can see that now we have a `xarray.DataArray`."
@@ -446,6 +488,7 @@
},
{
"cell_type": "markdown",
+ "id": "9f34f5d4",
"metadata": {},
"source": [
"#### Merge the two ERA5 experiments (1 and 5, `expver = [1,5]`)\n",
@@ -458,6 +501,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "cd5eb8a2",
"metadata": {},
"outputs": [],
"source": [
@@ -467,6 +511,7 @@
},
{
"cell_type": "markdown",
+ "id": "3b95cc14",
"metadata": {},
"source": [
"Let's check again the `da_tmp` data array. If there was an `expver` coordinate we [reduce this dimension](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.reduce.html) by performing a [`nansum`](https://numpy.org/doc/stable/reference/generated/numpy.nansum.html) operation, i.e. a sum of the array elements over this axis, treating Not a Numbers (NaNs) as zero. The result is a new `xarray.DataArray` merging the data along the `expver` dimension:"
@@ -475,6 +520,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "9109ec75",
"metadata": {},
"outputs": [],
"source": [
@@ -483,6 +529,7 @@
},
{
"cell_type": "markdown",
+ "id": "1ee284b5",
"metadata": {},
"source": [
"Now the data array contains the three expected dimensions: `time`, `latitude` and `longitude`."
@@ -490,6 +537,7 @@
},
{
"cell_type": "markdown",
+ "id": "53ad44a8",
"metadata": {},
"source": [
"#### Change temperature units from Kelvin to Celsius\n",
@@ -500,6 +548,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "8d8e1cbb",
"metadata": {},
"outputs": [],
"source": [
@@ -512,6 +561,7 @@
},
{
"cell_type": "markdown",
+ "id": "3fedd3fb",
"metadata": {},
"source": [
"#### Data to be used"
@@ -519,6 +569,7 @@
},
{
"cell_type": "markdown",
+ "id": "36645c48",
"metadata": {},
"source": [
"The `da_celsius` data array will be used in the rest of the surface temperature exercise. Let's check what we have:"
@@ -527,6 +578,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "c519a979",
"metadata": {},
"outputs": [],
"source": [
@@ -535,6 +587,7 @@
},
{
"cell_type": "markdown",
+ "id": "12436699",
"metadata": {},
"source": [
"Now we can see the updated values in *Celsius* and the `units` attribute updated accordingly."
@@ -542,6 +595,7 @@
},
{
"cell_type": "markdown",
+ "id": "ba44a1f5",
"metadata": {},
"source": [
"#### Plotting one timestep"
@@ -549,6 +603,7 @@
},
{
"cell_type": "markdown",
+ "id": "b582dd41",
"metadata": {},
"source": [
"Just to check what we have so far, let's plot a map of 2m temperature for the first (July 1940) and the last (July 2023) timesteps. We will plot these maps using the convenience method `plot` available for `xarray.DataArray`. This allows the creation of simple plots using one line of code. Also, with the xarray method `sel()`, you can select a data array based on coordinate labels."
@@ -557,6 +612,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "87864f37",
"metadata": {},
"outputs": [],
"source": [
@@ -566,6 +622,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "c5946b4b",
"metadata": {},
"outputs": [],
"source": [
@@ -574,6 +631,7 @@
},
{
"cell_type": "markdown",
+ "id": "f31fc1ef",
"metadata": {},
"source": [
"## 2. Calculate a surface temperature climatology: reference period 1991-2020"
@@ -581,6 +639,7 @@
},
{
"cell_type": "markdown",
+ "id": "a3cbaa85",
"metadata": {},
"source": [
"#### Standard reference periods and climatologies\n",
@@ -595,6 +654,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "15e2eb25",
"metadata": {},
"outputs": [],
"source": [
@@ -603,6 +663,7 @@
},
{
"cell_type": "markdown",
+ "id": "220d67bd",
"metadata": {},
"source": [
"If we have a look at this data object we will see now we have only two coordinates, `latitude` and `longitude`."
@@ -611,6 +672,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "7e313e58",
"metadata": {},
"outputs": [],
"source": [
@@ -619,6 +681,7 @@
},
{
"cell_type": "markdown",
+ "id": "aec6b340",
"metadata": {},
"source": [
"We can also make a quick plot to have an exploratory view of this new `xarray.DataArray`:"
@@ -627,6 +690,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "900462ab",
"metadata": {},
"outputs": [],
"source": [
@@ -635,6 +699,7 @@
},
{
"cell_type": "markdown",
+ "id": "9d500623",
"metadata": {},
"source": [
"## 3. Visualise surface temperature anomalies\n",
@@ -647,6 +712,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "769fb056",
"metadata": {},
"outputs": [],
"source": [
@@ -656,6 +722,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "bc174576",
"metadata": {},
"outputs": [],
"source": [
@@ -665,6 +732,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "6d34a81f",
"metadata": {},
"outputs": [],
"source": [
@@ -674,6 +742,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "0f226683",
"metadata": {},
"outputs": [],
"source": [
@@ -682,6 +751,7 @@
},
{
"cell_type": "markdown",
+ "id": "cd992193",
"metadata": {},
"source": [
"The anomaly will be the difference between `t2m_july2023` and `t2m_ref_per`. A positive value means July 2023 is above the expected mean:"
@@ -690,6 +760,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "86868d79",
"metadata": {},
"outputs": [],
"source": [
@@ -698,6 +769,7 @@
},
{
"cell_type": "markdown",
+ "id": "0e57b035",
"metadata": {},
"source": [
"The previous operation results in the anomaly on each longitude and latitude location stored in the `anom` data array. We can plot this in a map to check where the anomaly was positive (July 2023 warmer than the climatology) or negative (July 2023 colder than the climatology). This time we will create the plot using the `matplotlib` and `cartopy` libraries.\n",
@@ -708,6 +780,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "61863839",
"metadata": {},
"outputs": [],
"source": [
@@ -746,6 +819,7 @@
},
{
"cell_type": "markdown",
+ "id": "3b3213c7",
"metadata": {},
"source": [
"## 4. View time series and analyse surface temperature trends"
@@ -753,6 +827,7 @@
},
{
"cell_type": "markdown",
+ "id": "b64c9f82",
"metadata": {},
"source": [
"Now let us view the time series from 1940 to 2023 averaged over the entire region. To do this we need to average `da_celsius` over the latitude and longitude dimensions. A very important consideration however is that the gridded data cells do not all correspond to the same areas. The size covered by each data point on the model grid varies as a function of latitude. We need to take this into account when calculating spatial averages. \n",
@@ -769,6 +844,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "56a2b4d1",
"metadata": {},
"outputs": [],
"source": [
@@ -779,6 +855,7 @@
},
{
"cell_type": "markdown",
+ "id": "a8cdb891",
"metadata": {},
"source": [
"Then we calculate the weighted mean so we will have a time series with the spatially averaged July `t2m` from 1940 to 2023."
@@ -787,6 +864,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "ed7197ee",
"metadata": {},
"outputs": [],
"source": [
@@ -795,6 +873,7 @@
},
{
"cell_type": "markdown",
+ "id": "448b1d59",
"metadata": {},
"source": [
"Let's look at the new data array:"
@@ -803,6 +882,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "f2cd6755",
"metadata": {},
"outputs": [],
"source": [
@@ -811,6 +891,7 @@
},
{
"cell_type": "markdown",
+ "id": "ae4315b8",
"metadata": {},
"source": [
"We will calculate the climatology for this global spatially averaged July `t2m`. This value will be used later to check which years have global average 2m temperature above or below the climatology."
@@ -819,6 +900,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "12551a1d",
"metadata": {},
"outputs": [],
"source": [
@@ -828,6 +910,7 @@
},
{
"cell_type": "markdown",
+ "id": "d2ce5442",
"metadata": {},
"source": [
"We will create a constant array with the climatology value that has the same length as the time series:"
@@ -836,6 +919,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "10a9173c",
"metadata": {},
"outputs": [],
"source": [
@@ -844,6 +928,7 @@
},
{
"cell_type": "markdown",
+ "id": "7b263f44",
"metadata": {},
"source": [
"Let's plot the mean value since 1940. The values below the climatology will be highlighted in light blue while the values above the climatology will be highlighted in red. Code is commented in the code cell below."
@@ -852,6 +937,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "ef498af4",
"metadata": {},
"outputs": [],
"source": [
@@ -864,6 +950,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "b879e5a4",
"metadata": {},
"outputs": [],
"source": [
@@ -910,6 +997,7 @@
},
{
"cell_type": "markdown",
+ "id": "f584f57c",
"metadata": {},
"source": [
"Could you try a similar figure but using the anomalies (*\"monthly value\" - \"1991-2020 climatological value\"*) instead of the spatially aggregated average monthly values?"
@@ -918,6 +1006,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "af9f8782",
"metadata": {},
"outputs": [],
"source": [
@@ -926,6 +1015,7 @@
},
{
"cell_type": "markdown",
+ "id": "5b77860c",
"metadata": {},
"source": [
"Now let's order the months from colder to warmer."
@@ -934,6 +1024,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "819e5613",
"metadata": {},
"outputs": [],
"source": [
@@ -942,6 +1033,7 @@
},
{
"cell_type": "markdown",
+ "id": "dc997bf8",
"metadata": {},
"source": [
"Let's have a look to the result and check if it is sorted."
@@ -950,6 +1042,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "97493e3c",
"metadata": {},
"outputs": [],
"source": [
@@ -958,6 +1051,7 @@
},
{
"cell_type": "markdown",
+ "id": "ce41cbed",
"metadata": {},
"source": [
"If we plot the ranking from colder to warmer including also the climate normal we'll see the following. As before, code is commented in the code cell below:"
@@ -966,6 +1060,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "a1fe678e",
"metadata": {},
"outputs": [],
"source": [
@@ -1013,6 +1108,7 @@
},
{
"cell_type": "markdown",
+ "id": "ee592192",
"metadata": {},
"source": [
"## 5. View time series and analyse North Atlantic sea surface temperature trends"
@@ -1020,6 +1116,7 @@
},
{
"cell_type": "markdown",
+ "id": "0329dc4f",
"metadata": {},
"source": [
"#### This is a new exercise. In this part of the tutorial we will be working with monthly sea surface temperature (SST) data.\n",
@@ -1040,6 +1137,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "fed64d52",
"metadata": {},
"outputs": [],
"source": [
@@ -1080,6 +1178,7 @@
},
{
"cell_type": "markdown",
+ "id": "6ab7e416",
"metadata": {},
"source": [
"Let's do some work with this new dataset. First of all, let's read it."
@@ -1088,6 +1187,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "992d6184",
"metadata": {},
"outputs": [],
"source": [
@@ -1096,6 +1196,7 @@
},
{
"cell_type": "markdown",
+ "id": "a08d8b8d",
"metadata": {},
"source": [
"Now we can have a look at the dataset:"
@@ -1104,6 +1205,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "014eeeb6",
"metadata": {},
"outputs": [],
"source": [
@@ -1112,6 +1214,7 @@
},
{
"cell_type": "markdown",
+ "id": "b0ab7a75",
"metadata": {},
"source": [
"As before, we see there are four dimensions and units are in *Kelvin*. We will work with data in *degrees Celsius* and we will reduce the `expver` dimension as before:"
@@ -1120,6 +1223,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "bdaa0b17",
"metadata": {},
"outputs": [],
"source": [
@@ -1132,6 +1236,7 @@
},
{
"cell_type": "markdown",
+ "id": "3b91a7ab",
"metadata": {},
"source": [
"We can have a quick look at the data using the convenient `plot` method:"
@@ -1140,6 +1245,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "1b0082ff",
"metadata": {},
"outputs": [],
"source": [
@@ -1148,6 +1254,7 @@
},
{
"cell_type": "markdown",
+ "id": "f1cee5e7",
"metadata": {},
"source": [
"In the plot above we can see many values are below 0, those located on land. Actually, in the original `sst_ds` `xarray.Dataset` the land positions had a value of `numpy.nan`. Now, for `sst_expver` this is not true. This is a result of the previous operation using `numpy.nansum` and subtracting `273.15`. After this operation the land locations have a value of `-273.15` which is not valid. Let's amend this using a mask:"
@@ -1156,6 +1263,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "c781e076",
"metadata": {},
"outputs": [],
"source": [
@@ -1166,6 +1274,7 @@
},
{
"cell_type": "markdown",
+ "id": "0447a66f",
"metadata": {},
"source": [
"Again, as before, we weight the dataset by the area:"
@@ -1174,6 +1283,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "7c6d857c",
"metadata": {},
"outputs": [],
"source": [
@@ -1185,6 +1295,7 @@
},
{
"cell_type": "markdown",
+ "id": "cb0a62dc",
"metadata": {},
"source": [
"And, also, we calculate the spatially averaged value for each month to get a monthly time series of the average temperature of the sst over the main area of the North Atlantic from January 1991 to July 2023:"
@@ -1193,6 +1304,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "88de69f6",
"metadata": {},
"outputs": [],
"source": [
@@ -1203,6 +1315,7 @@
},
{
"cell_type": "markdown",
+ "id": "7b54ae7e",
"metadata": {},
"source": [
"In the plot above we can see the monthly evolution since 1991.\n",
@@ -1213,6 +1326,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "ab4bffcb",
"metadata": {},
"outputs": [],
"source": [
@@ -1234,6 +1348,7 @@
},
{
"cell_type": "markdown",
+ "id": "8e29cd28",
"metadata": {},
"source": [
"And once we have this we can compare how recent SST values compare with those of previous years and to the climatology.\n",
@@ -1249,6 +1364,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "5e6cb3b4",
"metadata": {},
"outputs": [],
"source": [
@@ -1282,6 +1398,7 @@
},
{
"cell_type": "markdown",
+ "id": "5d7efd10",
"metadata": {},
"source": [
"Notice the dramatic increase in SST over the North Atlantic in 2023 compared to previous years!"
diff --git a/tutorials/04_seasonal_forecast/04x02_sf-verification_NOT_WORKING.ipynb b/tutorials/04_seasonal_forecast/04x02_sf-verification_NOT_WORKING.ipynb
new file mode 100644
index 0000000..450a792
--- /dev/null
+++ b/tutorials/04_seasonal_forecast/04x02_sf-verification_NOT_WORKING.ipynb
@@ -0,0 +1,1169 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "74c186a0",
+ "metadata": {},
+ "source": [
+ "# Seasonal Forecast Verification"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a9e2ed65",
+ "metadata": {},
+ "source": [
+ "### About"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1dbc6097",
+ "metadata": {},
+ "source": [
+ "This notebook provides a practical introduction on how to produce some verification metrics and scores for seasonal forecasts with data from the Copernicus Climate Change Service (C3S). C3S seasonal forecast products are based on data from several state-of-the-art seasonal prediction systems. In this notebook, as an example, we will focus on data produced by [CMCC SPSv3.5 system](https://confluence.ecmwf.int/display/CKB/Description+of+CMCC-CM2-v20191201+C3S+contribution), which is one of the forecasting systems available through C3S.\n",
+ "\n",
+ "The tutorial will demonstrate how to access retrospective forecast (hindcast) data of 2-metre temperature initialized in the period 1993-2016, with a forecast start date in the 1st of March. All these forecasts are 6 months long (from March to August). More details about the role of the hindcasts can be found in [this Copernicus Knowledge Base article](https://confluence.ecmwf.int/display/CKB/Seasonal+forecasts+and+the+Copernicus+Climate+Change+Service). Observation data (ERA5 reanalysis) for the same reference period, 1993 to 2016, and the same months will also be obtained from the CDS. The tutorial will then show how to compute some deterministic products (anomalies) and some probabilistic products (probabilities for tercile categories). In addition to the 1-month average data retrieved from the CDS, 3-months aggregations will be also produced. Finally, verification metrics (correlation, area under the ROC curve, and RPS) will be calculated and visualised in a set of plots."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79e7a545",
+ "metadata": {},
+ "source": [
+ "The notebook has the following outline:\n",
+ "* 1 - Request data from the CDS using CDS API\n",
+ " * 1a - Retrieve hindcast data\n",
+ " * 1b - Retrieve observations data (ERA5) \n",
+ "* 2 - Compute deterministic and probabilistic products from the hindcast data\n",
+ " * 2a - Anomalies\n",
+ " * 2b - Probabilities for tercile categories\n",
+ "* 3 - Compute deterministic and probabilistic scores\n",
+ " * 3a - Read observations data into a xr.Dataset\n",
+ " * 3b - Compute deterministic scores\n",
+ " * 3c - Compute probabilistic scores for tercile categories\n",
+ "* 4 - Visualize verification plots\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "587b347d",
+ "metadata": {},
+ "source": [
+ "Please see here the full documentation of the [C3S Seasonal Forecast Datasets](https://confluence.ecmwf.int/display/CKB/C3S+Seasonal+Forecasts%3A+datasets+documentation). This notebook will use data from the CDS dataset [seasonal forecast monthly statistics on single levels](https://cds-beta.climate.copernicus.eu/datasets/seasonal-monthly-single-levels?tab=overview) (as opposed to multiple levels in the atmosphere)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1fe7f66c",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
NOTE:
\n",
+ " An example on the use of the code of this notebook to create verification plots for the operational seasonal forecast systems available in the CDS can be found in a
dedicated page of the documentation hosted in the Copernicus Knowledge Base. \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d21f858",
+ "metadata": {},
+ "source": [
+ "### How to run this tutorial\n",
+ "To run this Jupyter notebook tutorial, you will need to install a number of Python packages. A good way to start is by installing [Anaconda](https://www.anaconda.com/), which is a popular Python distribution for scientific programming. The cell below can be run to install the various packages needed to run this tutorial, assuming you have already installed Python and Jupyter (which come with Anaconda)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15b15a9e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!pip install numpy pandas xarray xskillscore\n",
+ "!pip install cdsapi\n",
+ "!pip install matplotlib cartopy\n",
+ "!pip install cfgrib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c56a698",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pip install -U xskillscore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ef8edea",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "42e128c5",
+ "metadata": {},
+ "source": [
+ "\n",
+ "WARNING:
\n",
+ " Please take into account that to run this tutorial, around 8GB of RAM and around 5GB of disk space are needed.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24b5206e",
+ "metadata": {},
+ "source": [
+ "### Load packages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4d26b91",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# CDS API\n",
+ "import cdsapi\n",
+ "\n",
+ "# Libraries for working with multi-dimensional arrays\n",
+ "import xarray as xr\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Forecast verification metrics with xarray\n",
+ "import xskillscore as xs\n",
+ "\n",
+ "# Date and calendar libraries\n",
+ "from dateutil.relativedelta import relativedelta\n",
+ "import calendar\n",
+ "\n",
+ "# Libraries for plotting and geospatial data visualisation\n",
+ "from matplotlib import pyplot as plt\n",
+ "import cartopy.crs as ccrs\n",
+ "import cartopy.feature as cfeature\n",
+ "\n",
+ "# Disable warnings for data download via API and matplotlib (do I need both???)\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "662df356",
+ "metadata": {},
+ "source": [
+ "## 1. Request data from the CDS using CDS API"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9e007558",
+ "metadata": {},
+ "source": [
+ "First we will set up some common variables to be used in this notebook to define the folder containing the data, the variables to be retrieved, the hindcast period and the [nominal start month](https://confluence.ecmwf.int/display/CKB/Summary+of+available+data#Summaryofavailabledata-nominal_start_date)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "578e6385",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "DATADIR = './data'\n",
+ "\n",
+ "config = dict(\n",
+ " list_vars = ['2m_temperature', ],\n",
+ " hcstarty = 1993,\n",
+ " hcendy = 2016,\n",
+ " start_month = 3,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de42e329",
+ "metadata": {},
+ "source": [
+ "After setting up this initial configuration variables, the existence of all the data folders will be checked and directories will be created if needed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ac9689c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "SCOREDIR = DATADIR + '/scores'\n",
+ "PLOTSDIR = DATADIR + f'/plots/stmonth{config[\"start_month\"]:02d}'\n",
+ "\n",
+ "for directory in [DATADIR, SCOREDIR, PLOTSDIR]:\n",
+ " # Check if the directory exists\n",
+ " if not os.path.exists(directory):\n",
+ " # If it doesn't exist, create it\n",
+ " os.makedirs(directory)\n",
+ " print(f'Creating folder {directory}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb7293de",
+ "metadata": {},
+ "source": [
+ "The first step is to request data from the Climate Data Store programmatically with the help of the CDS API. Let us make use of the option to manually set the CDS API credentials. First, you have to define two variables: `CDSAPI_URL` and `CDSAPI_KEY` which build together your CDS API key. Below, you have to replace the `#########` with your personal CDS key. Please find [here](https://cds-beta.climate.copernicus.eu/how-to-api) your personal CDS key."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "24093960",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "CDSAPI_URL = 'https://cds-beta.climate.copernicus.eu/api'\n",
+ "CDSAPI_KEY = '########################################'\n",
+ "\n",
+ "c = cdsapi.Client(url=CDSAPI_URL, key=CDSAPI_KEY)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "90f23c32",
+ "metadata": {},
+ "source": [
+ "### 1a. Retrieve hindcast data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03c60e1b",
+ "metadata": {
+ "papermill": {
+ "duration": 0.42258,
+ "end_time": "2022-03-14T17:52:13.01783",
+ "exception": false,
+ "start_time": "2022-03-14T17:52:12.59525",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "The next step is then to request the seasonal forecast monthly statistics data on single levels with the help of the CDS API. Below, we download a GRIB file containing the retrospective forecasts (hindcasts, or reforecasts) for the variables, start month and years defined in the variable `config` \n",
+ "\n",
+ "Running the code block below will download the data from the CDS as specified by the following API keywords:\n",
+ "\n",
+ "> **Format**: `Grib`
\n",
+ "> **Originating centre**: `CMCC`
\n",
+ "> **System**: `35` *this refers to CMCC SPSv3.5*
\n",
+ "> **Variable**: `2-metre temperature`
\n",
+ "> **Product type**: `Monthly mean` *all ensemble members will be retrieved*
\n",
+ "> **Year**: `1993 to 2016`
\n",
+ "> **Month**: `03` *March*
\n",
+ "> **Leadtime month**: `1 to 6` *All available lead times (March to August)*\n",
+ "\n",
+ "Note we will also be defining some variable, `origin`, with the forecast system details to be included in the `config` variable. Additionally it defines a base name variable `hcst_bname` to be used throughout the notebook."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f4cc59e",
+ "metadata": {},
+ "source": [
+ "If you have not already done so, you will need to accept the **terms & conditions** of the data before you can download it. These can be viewed and accepted in the [CDS download page](https://cds-beta.climate.copernicus.eu/datasets/seasonal-monthly-single-levels?tab=download) by scrolling to the end of the download form."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "45ca97eb",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
NOTE:
\n",
+ " An API request can be generated automatically from the
CDS download page. At the end of the download form there is a
Show API request
icon, which allows to copy-paste a snippet of code equivalent to the one used below.
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "96ce56e8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "origin = 'cmcc.s35'\n",
+ "origin_labels = {'institution': 'CMCC', 'name': 'SPSv3.5'}\n",
+ "\n",
+ "config['origin'], config['system'] = origin.split('.s')\n",
+ "\n",
+ "\n",
+ "hcst_bname = '{origin}_s{system}_stmonth{start_month:02d}_hindcast{hcstarty}-{hcendy}_monthly'.format(**config)\n",
+ "hcst_fname = f'{DATADIR}/{hcst_bname}.grib'\n",
+ "print(hcst_fname)\n",
+ "\n",
+ "\n",
+ "c.retrieve(\n",
+ " 'seasonal-monthly-single-levels',\n",
+ " {\n",
+ " 'data_format': 'grib',\n",
+ " 'originating_centre': config['origin'],\n",
+ " 'system': config['system'],\n",
+ " 'variable': config['list_vars'],\n",
+ " 'product_type': 'monthly_mean',\n",
+ " 'year': ['{}'.format(yy) for yy in range(config['hcstarty'],config['hcendy']+1)],\n",
+ " 'month': '{:02d}'.format(config['start_month']),\n",
+ " 'leadtime_month': ['1', '2', '3','4', '5', '6'],\n",
+ " },\n",
+ " hcst_fname)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "47223479",
+ "metadata": {},
+ "source": [
+ "### 1b. Retrieve observations data (ERA5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed320073",
+ "metadata": {
+ "papermill": {
+ "duration": 0.42258,
+ "end_time": "2022-03-14T17:52:13.01783",
+ "exception": false,
+ "start_time": "2022-03-14T17:52:12.59525",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "Now we will request from the CDS the observation data that will be used as the ground truth against which the hindcast data will be compared. In this notebook we will be using as observational reference the CDS dataset `reanalysis-era5-single-levels-monthly-means` which contains [ERA5](https://cds-beta.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview) monthly averaged data from the CDS. In order to compare it with the hindcast data we have just downloaded above, we will ask the CDS to regrid it to the same grid used by the C3S seasonal forecasts.\n",
+ "\n",
+ "Running the code block below will download the ERA5 data from the CDS as specified by the following API keywords:\n",
+ "\n",
+ "> **Format**: `Grib`
\n",
+ "> **Variable**: `2-metre temperature`
\n",
+ "> **Product type**: `Monthly averaged reanalysis`
\n",
+ "> **Year**: `1993 to 2016`
\n",
+ "> **Month**: `March to August` *same months contained in the hindcast data*
\n",
+ "> **Time**: `00`
\n",
+ "> **Grid**: `1x1-degree lat-lon regular grid`
\n",
+ "> **Area**: `-89.5 to 89.5 in latitude; 0.5 to 359.5 in longitude`
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "86d99493",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "obs_fname = '{fpath}/era5_monthly_stmonth{start_month:02d}_{hcstarty}-{hcendy}.grib'.format(fpath=DATADIR,**config)\n",
+ "print(obs_fname)\n",
+ "\n",
+ "\n",
+ "c.retrieve(\n",
+ " 'reanalysis-era5-single-levels-monthly-means',\n",
+ " {\n",
+ " 'product_type': 'monthly_averaged_reanalysis',\n",
+ " 'variable': config['list_vars'],\n",
+ " # NOTE from observations we need to go one year beyond so we have available all the right valid dates\n",
+ " # e.g. Nov.2016 start date forecast goes up to April 2017 \n",
+ " 'year': ['{}'.format(yy) for yy in range(config['hcstarty'],config['hcendy']+2)],\n",
+ " 'month': ['{:02d}'.format((config['start_month']+leadm)%12) if config['start_month']+leadm!=12 else '12' for leadm in range(6)],\n",
+ " 'time': '00:00',\n",
+ " # We can ask CDS to interpolate ERA5 to the same grid used by C3S seasonal forecasts\n",
+ " 'grid': '1/1',\n",
+ " 'area': '89.5/0.5/-89.5/359.5',\n",
+ " 'data_format': 'grib',\n",
+ " },\n",
+ " obs_fname)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b1e19f17",
+ "metadata": {},
+ "source": [
+ "## 2. Compute deterministic and probabilistic products from the hindcast data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b94f3cc",
+ "metadata": {},
+ "source": [
+ "In this section we will be calculating the different derived products we will be scoring later in the notebook. We will use the reference period defined in `config[\"hcstarty\"]`,`config[\"hcendy\"]` to calculate:\n",
+ "* anomalies: defined as the difference of a given hindcast to the model climate, computed as the average over all the available members (that is, among all start years and ensemble members)\n",
+ "* probabilities for tercile categories: defined as the proportion of members in a given hindcast lying within each one of the categories bounded by the terciles computed from all available members in the reference period\n",
+ "\n",
+ "In addition to the 1-month anomalies and probabilities, a rolling average of 3 months will be also computed to produce anomalies and probabilities for this 3-months aggregation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfc4b79c",
+ "metadata": {},
+ "source": [
+ "\n",
+ "NOTE:
\n",
+ " Some of the seasonal forecast monthly data on the CDS comes from systems using members initialized on different start dates (lagged start date ensembles). In the GRIB encoding used for those systems we will therefore have two different xarray/cfgrib
keywords for the real start date of each member (time
) and for the nominal start date (indexing_time
) which is the one we would need to use for those systems initializing their members with a lagged start date approach.\n",
+ " The following line of code will take care of that as long as we include the value config['isLagged']=True
in the config dictionary as defined in section 1.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ce38886",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# For the re-shaping of time coordinates in xarray.Dataset we need to select the right one \n",
+ "# -> burst mode ensembles (e.g. ECMWF SEAS5) use \"time\". This is the default option in this notebook\n",
+ "# -> lagged start ensembles (e.g. MetOffice GloSea6) use \"indexing_time\" (see CDS documentation about nominal start date)\n",
+ "st_dim_name = 'time' if not config.get('isLagged',False) else 'indexing_time'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a77c97e",
+ "metadata": {},
+ "source": [
+ "### 2a. Anomalies"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a87790fc",
+ "metadata": {},
+ "source": [
+ "We will start opening the hindcast data GRIB file we downloaded in the previous section to load it into an `xarray.Dataset` object.\n",
+ "\n",
+ "Some minor modifications of the `xr.Dataset` object also happen here:\n",
+ "* We define chunks on the forecastMonth (leadtime) coordinate so `dask.array` can be used behind the scenes.\n",
+ "* We rename some coordinates to align them with the names they will have in the observations `xr.Dataset` and those expected by default in the `xskillscore` package we will be using for the calculation of scores.\n",
+ "* We create a new array named `valid_time` to be a new coordinate (depending on both `start_date` and `forecastMonth`).\n",
+ "\n",
+ "After that, both anomalies for 1-month and 3-months aggregations are computed and stored into a netCDF file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b85a5221",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "print('Reading HCST data from file')\n",
+ "hcst = xr.open_dataset(hcst_fname,engine='cfgrib', backend_kwargs=dict(time_dims=('forecastMonth', st_dim_name)))\n",
+ "hcst = hcst.chunk({'forecastMonth':1, 'latitude':'auto', 'longitude':'auto'}) #force dask.array using chunks on leadtime, latitude and longitude coordinate\n",
+ "hcst = hcst.rename({'latitude':'lat','longitude':'lon', st_dim_name:'start_date'})\n",
+ "\n",
+ "print ('Re-arranging time metadata in xr.Dataset object')\n",
+ "# Add start_month to the xr.Dataset\n",
+ "start_month = pd.to_datetime(hcst.start_date.values[0]).month\n",
+ "hcst = hcst.assign_coords({'start_month':start_month})\n",
+ "# Add valid_time to the xr.Dataset\n",
+ "vt = xr.DataArray(dims=('start_date','forecastMonth'), coords={'forecastMonth':hcst.forecastMonth,'start_date':hcst.start_date})\n",
+ "vt.data = [[pd.to_datetime(std)+relativedelta(months=fcmonth-1) for fcmonth in vt.forecastMonth.values] for std in vt.start_date.values]\n",
+ "hcst = hcst.assign_coords(valid_time=vt)\n",
+ "\n",
+ "# CALCULATE 3-month AGGREGATIONS\n",
+ "# NOTE rolling() assigns the label to the end of the N month period, so the first N-1 elements have NaN and can be dropped\n",
+ "print('Computing 3-month aggregation')\n",
+ "hcst_3m = hcst.rolling(forecastMonth=3).mean()\n",
+ "hcst_3m = hcst_3m.where(hcst_3m.forecastMonth>=3,drop=True)\n",
+ "\n",
+ "\n",
+ "# CALCULATE ANOMALIES (and save to file)\n",
+ "print('Computing anomalies 1m')\n",
+ "hcmean = hcst.mean(['number','start_date'])\n",
+ "anom = hcst - hcmean\n",
+ "anom = anom.assign_attrs(reference_period='{hcstarty}-{hcendy}'.format(**config))\n",
+ "\n",
+ "print('Computing anomalies 3m')\n",
+ "hcmean_3m = hcst_3m.mean(['number','start_date'])\n",
+ "anom_3m = hcst_3m - hcmean_3m\n",
+ "anom_3m = anom_3m.assign_attrs(reference_period='{hcstarty}-{hcendy}'.format(**config))\n",
+ "\n",
+ "print('Saving anomalies 1m/3m to netCDF files')\n",
+ "anom.to_netcdf(f'{DATADIR}/{hcst_bname}.1m.anom.nc')\n",
+ "anom_3m.to_netcdf(f'{DATADIR}/{hcst_bname}.3m.anom.nc')\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "62af30c5",
+ "metadata": {},
+ "source": [
+ "### 2b. Probabilities for tercile categories"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "75fb94c0",
+ "metadata": {},
+ "source": [
+ "Before we start computing the probabilities it will be useful to create a function devoted to computing the boundaries of a given category (`icat`) coming from a given set of quantiles (`quantiles`) of an `xr.Dataset` object."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "721ed6ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We define a function to calculate the boundaries of forecast categories defined by quantiles\n",
+ "def get_thresh(icat,quantiles,xrds,dims=['number','start_date']):\n",
+ "\n",
+ " if not all(elem in xrds.dims for elem in dims): \n",
+ " raise Exception('Some of the dimensions in {} is not present in the xr.Dataset {}'.format(dims,xrds)) \n",
+ " else:\n",
+ " if icat == 0:\n",
+ " xrds_lo = -np.inf\n",
+ " xrds_hi = xrds.quantile(quantiles[icat],dim=dims) \n",
+ " \n",
+ " elif icat == len(quantiles):\n",
+ " xrds_lo = xrds.quantile(quantiles[icat-1],dim=dims)\n",
+ " xrds_hi = np.inf\n",
+ " \n",
+ " else:\n",
+ " xrds_lo = xrds.quantile(quantiles[icat-1],dim=dims)\n",
+ " xrds_hi = xrds.quantile(quantiles[icat],dim=dims)\n",
+ " \n",
+ " return xrds_lo,xrds_hi"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c64b2ceb",
+ "metadata": {},
+ "source": [
+ "We will start by creating a list with the quantiles which will be the boundaries for our categorical forecast. In this example our product will be based on 3 categories ('below lower tercile', 'normal' and 'above upper tercile') so we will be using as category boundaries the values `quantiles = [1/3., 2/3.]`\n",
+ "\n",
+ "The `xr.Dataset` objects `hcst` and `hcst_3m` read in the previous step from the corresponding netCDF files will be used here, and therefore probabilities for both 1-month and 3-months aggregations are computed and stored into a netCDF file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a4d0fba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CALCULATE PROBABILITIES for tercile categories by counting members within each category\n",
+ "\n",
+ "print('Computing probabilities (tercile categories)')\n",
+ "quantiles = [1/3., 2/3.]\n",
+ "numcategories = len(quantiles)+1\n",
+ "\n",
+ "for aggr,h in [(\"1m\",hcst), (\"3m\",hcst_3m)]:\n",
+ " print(f'Computing tercile probabilities {aggr}')\n",
+ "\n",
+ " l_probs_hcst=list()\n",
+ " for icat in range(numcategories):\n",
+ " print(f'category={icat}')\n",
+ " h_lo,h_hi = get_thresh(icat, quantiles, h)\n",
+ " probh = np.logical_and(h>h_lo, h<=h_hi).sum('number')/float(h.dims['number'])\n",
+ " # Instead of using the coordinate 'quantile' coming from the hindcast xr.Dataset\n",
+ " # we will create a new coordinate called 'category'\n",
+ " if 'quantile' in probh:\n",
+ " probh = probh.drop('quantile')\n",
+ " l_probs_hcst.append(probh.assign_coords({'category':icat}))\n",
+ "\n",
+ " print(f'Concatenating {aggr} tercile probs categories')\n",
+ " probs = xr.concat(l_probs_hcst,dim='category') \n",
+ " print(f'Saving {aggr} tercile probs netCDF files')\n",
+ " probs.to_netcdf(f'{DATADIR}/{hcst_bname}.{aggr}.tercile_probs.nc')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "45bb14c2",
+ "metadata": {},
+ "source": [
+ "## 3. Compute deterministic and probabilistic scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6e5ee40f",
+ "metadata": {},
+ "source": [
+ "In this section we will produce a set of metrics and scores consistent with the [guidance on forecast verification](https://library.wmo.int/doc_num.php?explnum_id=4886) provided by WMO. Those values can be used to describe statistics of forecast performance over a past period (the reference period defined in the variable `config`), as an indication of expectation of 'confidence' in the real-time outputs.\n",
+ "\n",
+ "In this notebook metrics and scores for both deterministic (anomalies) and probabilistic (probabilities of tercile categories) products, as those provided in the C3S seasonal forecast graphical products section, will be computed. Specifically:\n",
+ "* Deterministic:\n",
+ " * linear temporal correlation (Spearman rank correlation)\n",
+ "* Probabilistic:\n",
+ " * area under the ROC curve (relative, or 'receiver', operating characteristic)\n",
+ " * RPS (ranked probability score)\n",
+ "\n",
+ "Values for both 1-month and 3-months aggregations will be produced and stored into netCDF files."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bf7a97d4",
+ "metadata": {},
+ "source": [
+ "### 3a. Read observations data into a xr.Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c5093f24",
+ "metadata": {},
+ "source": [
+ "Before any score can be calculated we will need to read into an `xr.Dataset` object the observational data from the GRIB file we downloaded in the first section of this notebook (whose name was stored in variable `obs_fname`).\n",
+ "\n",
+ "Similarly to what we did with the hindcast data, some minor modifications of the `xr.Dataset` object also happen here:\n",
+ "\n",
+ "* We rename some coordinates to align them with the names in the hindcast `xr.Dataset` and those expected by default in the `xskillscore` package we will be using for the calculation of scores.\n",
+ "* We index the reanalysis data with `forecastMonth` to allow an easy like-for-like comparison of valid times.\n",
+ "\n",
+ "In addition to the monthly averages we retrieved from the CDS, a rolling average of 3 months is also computed to use as observational reference for the 3-months aggregation on the hindcasts."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71bc4e03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "era5_1deg = xr.open_dataset(obs_fname, engine='cfgrib')\n",
+ "\n",
+ "# Renaming to match hindcast names \n",
+ "era5_1deg = era5_1deg.rename({'latitude':'lat','longitude':'lon','time':'start_date'}).swap_dims({'start_date':'valid_time'})\n",
+ "\n",
+ "# Assign 'forecastMonth' coordinate values\n",
+ "fcmonths = [mm+1 if mm>=0 else mm+13 for mm in [t.month - config['start_month'] for t in pd.to_datetime(era5_1deg.valid_time.values)] ]\n",
+ "era5_1deg = era5_1deg.assign_coords(forecastMonth=('valid_time',fcmonths))\n",
+ "# Drop obs values not needed (earlier than first start date) - this is useful to create well shaped 3-month aggregations from obs.\n",
+ "era5_1deg = era5_1deg.where(era5_1deg.valid_time>=np.datetime64('{hcstarty}-{start_month:02d}-01'.format(**config)),drop=True)\n",
+ "\n",
+ "# CALCULATE 3-month AGGREGATIONS\n",
+ "# NOTE rolling() assigns the label to the end of the N month period\n",
+ "print('Calculate observation 3-monthly aggregations')\n",
+ "# NOTE care should be taken with the data available in the \"obs\" xr.Dataset so the rolling mean (over valid_time) is meaningful\n",
+ "era5_1deg_3m = era5_1deg.rolling(valid_time=3).mean()\n",
+ "era5_1deg_3m = era5_1deg_3m.where(era5_1deg_3m.forecastMonth>=3)\n",
+ "\n",
+ "# As we don't need it anymore at this stage, we can safely remove 'forecastMonth'\n",
+ "era5_1deg = era5_1deg.drop('forecastMonth')\n",
+ "era5_1deg_3m = era5_1deg_3m.drop('forecastMonth')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0292bce6",
+ "metadata": {},
+ "source": [
+ "### 3b. Compute deterministic scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6292be5a",
+ "metadata": {},
+ "source": [
+ "The score used here is temporal correlation (Spearman rank correlation) calculated at each grid point and for each leadtime.\n",
+ "\n",
+ "We will start reading the netCDF files containing the anomalies we computed in section 2 of this notebook into a `xr.Dataset` object.\n",
+ "\n",
+ "For an easier use of the `xs.spearman_r` function a loop over the hindcast leadtimes (`forecastMonth`) has been introduced, requiring as a final step a concatenation of the values computed for each leadtime.\n",
+ "\n",
+ "In addition to the correlation, the p-values will be computed to allow us plotting not only the values of correlation, but also where those values are statistically significant.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3fd7c41d",
+ "metadata": {},
+ "source": [
+ "\n",
+ "NOTE:
\n",
+ " The computations shown here are prepared to cater for anomalies files containing all ensemble members or files containing only an ensemble mean anomaly. The presence (or absence) of a dimension called number
in the hindcast xr.Dataset
object will indicate we have a full ensemble (or an ensemble mean).
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "423bda99",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Loop over aggregations\n",
+ "for aggr in ['1m','3m']:\n",
+ "\n",
+ " if aggr=='1m':\n",
+ " o = era5_1deg\n",
+ " elif aggr=='3m':\n",
+ " o = era5_1deg_3m\n",
+ " else:\n",
+ " raise BaseException(f'Unknown aggregation {aggr}')\n",
+ "\n",
+ " print(f'Computing deterministic scores for {aggr}-aggregation')\n",
+ "\n",
+ " # Read anomalies file\n",
+ " h = xr.open_dataset(f'{DATADIR}/{hcst_bname}.{aggr}.anom.nc' )\n",
+ " is_fullensemble = 'number' in h.dims\n",
+ "\n",
+ " l_corr=list()\n",
+ " l_corr_pval=list()\n",
+ "\n",
+ " for this_fcmonth in h.forecastMonth.values:\n",
+ " print(f'forecastMonth={this_fcmonth}' )\n",
+ " thishcst = h.sel(forecastMonth=this_fcmonth).swap_dims({'start_date':'valid_time'})\n",
+ " thisobs = o.where(o.valid_time==thishcst.valid_time,drop=True)\n",
+ " thishcst_em = thishcst if not is_fullensemble else thishcst.mean('number')\n",
+ " l_corr.append( xs.spearman_r(thishcst_em, thisobs, dim='valid_time') )\n",
+ " l_corr_pval.append ( xs.spearman_r_p_value(thishcst_em, thisobs, dim='valid_time') )\n",
+ "\n",
+ " print(f'Concatenating (by fcmonth) correlation for {aggr}-aggregation')\n",
+ " corr=xr.concat(l_corr,dim='forecastMonth')\n",
+ " corr_pval=xr.concat(l_corr_pval,dim='forecastMonth')\n",
+ "\n",
+ " print(f'Saving to netCDF file correlation for {aggr}-aggregation') \n",
+ " corr.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.corr.nc')\n",
+ " corr_pval.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.corr_pval.nc')\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a0d422b",
+ "metadata": {},
+ "source": [
+ "### 3c. Compute probabilistic scores for tercile categories"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2b958bfe",
+ "metadata": {},
+ "source": [
+ "The scores used here for probabilistic forecasts are the area under the Relative Operating Characteristic (ROC) curve, the Ranked Probability Score (RPS) and the Brier Score (BS). Note that both ROC and BS are appropriate for binary events (applied individually to each category), while RPS scores all categories together. \n",
+ "\n",
+ "We will now read the netCDF files containing the probabilities we computed in section 2 of this notebook into a `xr.Dataset` object. Note the variables `quantiles` and `numcategories` defined there and describing the categories for which the probabilities have been computed will be also needed here.\n",
+ "\n",
+ "The step to compute the probabilities for each category in the observation data for each aggregation (1-month or 3-months) is also performed here starting from the `xr.Dataset` objects read in section 3a. (`era5_1deg` and `era5_1deg_3m`).\n",
+ "\n",
+ "A loop over the hindcast leadtimes (`forecastMonth`) has been introduced for an easier use of the `xskillscore` functions to compute ROC curve area and RPS. This requires as a final step a concatenation of the values computed for each leadtime."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0c66e277",
+ "metadata": {},
+ "source": [
+ "\n",
+ "NOTE:
\n",
+ " A couple of additional examples, both the calculation of Brier Score (BS) and the calculation of ROC Skill Score using climatology as a reference (which has a ROC value of 0.5), have been also included in the code below, but no plots will be produced for them in section 4 of the notebook.
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6c3dcb7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Loop over aggregations\n",
+ "for aggr in ['1m','3m']:\n",
+ "\n",
+ " if aggr=='1m':\n",
+ " o = era5_1deg\n",
+ " elif aggr=='3m':\n",
+ " o = era5_1deg_3m\n",
+ " else:\n",
+ " raise BaseException(f'Unknown aggregation {aggr}')\n",
+ "\n",
+ " print(f'Computing deterministic scores for {aggr}-aggregation')\n",
+ " \n",
+ " # READ hindcast probabilities file\n",
+ " probs_hcst = xr.open_dataset(f'{DATADIR}/{hcst_bname}.{aggr}.tercile_probs.nc')\n",
+ "\n",
+ " l_roc=list()\n",
+ " l_rps=list()\n",
+ " l_rocss=list()\n",
+ " l_bs=list()\n",
+ " for this_fcmonth in probs_hcst.forecastMonth.values:\n",
+ " print(f'forecastMonth={this_fcmonth}')\n",
+ " thishcst = probs_hcst.sel(forecastMonth=this_fcmonth).swap_dims({'start_date':'valid_time'})\n",
+ "\n",
+ " # CALCULATE probabilities from observations\n",
+ " print('We need to calculate probabilities (tercile categories) from observations')\n",
+ " l_probs_obs=list()\n",
+ " thiso = o.where(o.valid_time==thishcst.valid_time,drop=True)\n",
+ " for icat in range(numcategories):\n",
+ " #print(f'category={icat}')\n",
+ " o_lo,o_hi = get_thresh(icat, quantiles, thiso, dims=['valid_time'])\n",
+ " probo = 1. * np.logical_and(thiso>o_lo, thiso<=o_hi)\n",
+ " if 'quantile' in probo:\n",
+ " probo=probo.drop('quantile')\n",
+ " l_probs_obs.append(probo.assign_coords({'category':icat}))\n",
+ "\n",
+ " thisobs = xr.concat(l_probs_obs, dim='category')\n",
+ "\n",
+ " print('Now we can calculate the probabilistic (tercile categories) scores')\n",
+ " thisroc = xr.Dataset()\n",
+ " thisrps = xr.Dataset()\n",
+ " thisrocss = xr.Dataset()\n",
+ " thisbs = xr.Dataset()\n",
+ " for var in thishcst.data_vars:\n",
+ " thisroc[var] = xs.roc(thisobs[var],thishcst[var], dim='valid_time', bin_edges=np.linspace(0,1,101))\n",
+ "\n",
+ " thisrps[var] = xs.rps(thisobs[var],thishcst[var], dim='valid_time', category_edges=None, input_distributions='p')\n",
+ "\n",
+ " thisrocss[var] = (thisroc[var] - 0.5) / (1. - 0.5)\n",
+ " bscat = list()\n",
+ " for cat in thisobs[var].category:\n",
+ " #print(f'compute BS ({var}) for cat={cat.values}' )\n",
+ " thisobscat = thisobs[var].sel(category=cat)\n",
+ " thishcstcat = thishcst[var].sel(category=cat)\n",
+ " bscat.append(xs.brier_score(thisobscat, thishcstcat, dim='valid_time'))\n",
+ " thisbs[var] = xr.concat(bscat,dim='category')\n",
+ "\n",
+ " l_roc.append(thisroc)\n",
+ " l_rps.append(thisrps)\n",
+ " l_rocss.append(thisrocss)\n",
+ " l_bs.append(thisbs)\n",
+ "\n",
+ "\n",
+ " print('concat roc')\n",
+ " roc=xr.concat(l_roc,dim='forecastMonth')\n",
+ " print('concat rps')\n",
+ " rps=xr.concat(l_rps,dim='forecastMonth')\n",
+ " print('concat rocss')\n",
+ " rocss=xr.concat(l_rocss,dim='forecastMonth')\n",
+ " print('concat bs')\n",
+ " bs=xr.concat(l_bs,dim='forecastMonth')\n",
+ "\n",
+ " print('writing to netcdf rps')\n",
+ " rps.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.rps.nc')\n",
+ " print('writing to netcdf bs')\n",
+ " bs.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.bs.nc')\n",
+ " print('writing to netcdf roc')\n",
+ " roc.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.roc.nc')\n",
+ " print('writing to netcdf rocss')\n",
+ " rocss.to_netcdf(f'{DATADIR}/scores/{hcst_bname}.{aggr}.rocss.nc')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1411b8e9",
+ "metadata": {},
+ "source": [
+ "## 4. Visualize verification plots"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b935869f",
+ "metadata": {},
+ "source": [
+ "After we have computed some metrics and scores for our seasonal forecast data, in this section some examples of visualization will be introduced."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c0289771",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
NOTE:
\n",
+ " In the \"Introduction\" section of the
C3S seasonal forecasts verification plots page on the Copernicus Knowledge Base you can find some information on how these metrics and scores, consistent with the
guidance on forecast verification provided by WMO, describe statistics of forecast performance over a past period, as an indication of expectation of 'confidence' in the real-time outputs. \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "01117acd",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "#### Common variables\n",
+ "\n",
+ "We start by setting up some variables to subset the plots we will be producing in this notebook:\n",
+ "\n",
+ "> **`aggr`**: to specify the aggregation period (possible values: \"1m\" or \"3m\")
\n",
+ "> **`fcmonth`**: to specify the leadtime (possible values: integer from 1 to 6)
\n",
+ "\n",
+ "Additionally variables used for labelling the plots are also defined here. It has been also included a change in the default values of the font size of the titles to improve the visual aspect of the plots."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7fedbcf6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Subset of plots to be produced\n",
+ "aggr = '1m'\n",
+ "fcmonth = 1\n",
+ "\n",
+ "# Common labels to be used in plot titles\n",
+ "VARNAMES = {\n",
+ " 't2m' : '2-metre temperature',\n",
+ "# 'sst' : 'sea-surface temperature',\n",
+ "# 'msl' : 'mean-sea-level pressure',\n",
+ "# 'tp' : 'total precipitation'\n",
+ "}\n",
+ "\n",
+ "\n",
+ "CATNAMES=['lower tercile', 'middle tercile', 'upper tercile']\n",
+ "\n",
+ "# Change titles font size\n",
+ "plt.rc('axes', titlesize=20)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7c9b63fa",
+ "metadata": {},
+ "source": [
+ "#### Create plot titles base information\n",
+ "\n",
+ "In the following variables we include some relevant information to be later used in the plot titles. Specifically:\n",
+ "* Line 1: Name of the institution and forecast system\n",
+ "* Line 2: Start date and valid date (with appropriate formatting depending on the aggregation, 1-month or 3-months)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4444e737",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# PREPARE STRINGS for TITLES\n",
+ "tit_line1 = '{institution} {name}'.format(**origin_labels)\n",
+ "# tit_line2_base = rf'Start month: $\\bf{calendar.month_abbr[config[\"start_month\"]].upper()}$'\n",
+ "tit_line2_base = f'Start month: {calendar.month_abbr[config[\"start_month\"]].upper()}'\n",
+ "\n",
+ "if aggr=='1m':\n",
+ " validmonth = config['start_month'] + (fcmonth-1)\n",
+ " validmonth = validmonth if validmonth<=12 else validmonth-12\n",
+ "# tit_line2 = tit_line2_base + rf' - Valid month: $\\bf{calendar.month_abbr[validmonth].upper()}$'\n",
+ " tit_line2 = tit_line2_base + f' - Valid month: {calendar.month_abbr[validmonth].upper()}'\n",
+ "elif aggr=='3m':\n",
+ " validmonths = [vm if vm<=12 else vm-12 for vm in [config['start_month'] + (fcmonth-1) - shift for shift in range(3)]]\n",
+ " validmonths = [calendar.month_abbr[vm][0] for vm in reversed(validmonths)]\n",
+ "# tit_line2 = tit_line2_base + rf' - Valid months: $\\bf{\"\".join(validmonths)}$'\n",
+ " tit_line2 = tit_line2_base + f' - Valid months: {\"\".join(validmonths)}'\n",
+ "else:\n",
+ " raise BaseException(f'Unexpected aggregation {aggr}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a2a59a0",
+ "metadata": {},
+ "source": [
+ "### 4a. Correlation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "523d201a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read the data files\n",
+ "corr = xr.open_dataset(f'{DATADIR}/scores/{hcst_bname}.{aggr}.corr.nc')\n",
+ "corr_pval = xr.open_dataset(f'{DATADIR}/scores/{hcst_bname}.{aggr}.corr_pval.nc')\n",
+ "# RE-ARRANGE the DATASETS longitude values for plotting purposes\n",
+ "corr = corr.assign_coords(lon=(((corr.lon + 180) % 360) - 180)).sortby('lon')\n",
+ "corr_pval = corr_pval.assign_coords(lon=(((corr_pval.lon + 180) % 360) - 180)).sortby('lon')\n",
+ "\n",
+ "\n",
+ "thiscorr = corr.sel(forecastMonth=fcmonth)\n",
+ "thiscorrpval = corr_pval.sel(forecastMonth=fcmonth)\n",
+ "\n",
+ "for var in thiscorr.data_vars:\n",
+ " fig = plt.figure(figsize=(18,10))\n",
+ " ax = plt.axes(projection=ccrs.PlateCarree())\n",
+ " ax.add_feature(cfeature.BORDERS, edgecolor='black', linewidth=0.5)\n",
+ " ax.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=2.)\n",
+ " corrvalues = thiscorr[var].values \n",
+ " corrpvalvalues = thiscorrpval[var].values\n",
+ "\n",
+ " if corrvalues.T.shape == (thiscorr[var].lat.size, thiscorr[var].lon.size):\n",
+ " print('Data values matrices need to be transposed')\n",
+ " corrvalues = corrvalues.T\n",
+ " corrpvalvalues = corrpvalvalues.T\n",
+ " elif corrvalues.shape == (thiscorr[var].lat.size, thiscorr[var].lon.size):\n",
+ " pass \n",
+ " # print('Data values matrices shapes are ok')\n",
+ " else:\n",
+ " raise BaseException(f'Unexpected data value matrix shape: {corrvalues.shape}' )\n",
+ "\n",
+ "\n",
+ " plt.contourf(thiscorr[var].lon,thiscorr[var].lat,corrvalues,levels=np.linspace(-1.,1.,11),cmap='RdYlBu_r')\n",
+ " cb = plt.colorbar(shrink=0.5)\n",
+ " cb.ax.set_ylabel('Correlation',fontsize=12)\n",
+ " origylim = ax.get_ylim()\n",
+ " plt.contourf(thiscorrpval[var].lon,thiscorrpval[var].lat,corrpvalvalues,levels=[0.05,np.inf],hatches=['...',None],colors='none')\n",
+ " # We need to ensure after running plt.contourf() the ylim hasn't changed\n",
+ " if ax.get_ylim()!=origylim:\n",
+ " ax.set_ylim(origylim)\n",
+ "\n",
+ " plt.title(tit_line1 + f' {VARNAMES[var]}' +' (stippling where significance below 95%)\\n' + tit_line2,loc='left')\n",
+ "\n",
+ " \n",
+ " plt.tight_layout()\n",
+ " figname = f'{DATADIR}/plots/stmonth{config[\"start_month\"]:02d}/{hcst_bname}.{aggr}.fcmonth{fcmonth}.{var}.corr.png'\n",
+ " plt.savefig(figname)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cb4a706b",
+ "metadata": {},
+ "source": [
+ "### 4b. Ranked Probability Score (RPS)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3887913f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# READ scores xr.Datasets\n",
+ "rps = xr.open_dataset(f'{DATADIR}/scores/{hcst_bname}.{aggr}.rps.nc')\n",
+ "# RE-ARRANGE the DATASETS longitude values for plotting purposes\n",
+ "rps = rps.assign_coords(lon=(((rps.lon + 180) % 360) - 180)).sortby('lon')\n",
+ "\n",
+ "thisrps = rps.sel(forecastMonth=fcmonth)\n",
+ "\n",
+ "for var in thisrps.data_vars:\n",
+ " fig = plt.figure(figsize=(18,10))\n",
+ " ax = plt.axes(projection=ccrs.PlateCarree())\n",
+ " ax.add_feature(cfeature.BORDERS, edgecolor='black', linewidth=0.5)\n",
+ " ax.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=2.)\n",
+ " avalues = thisrps[var].values\n",
+ " cs = plt.contourf(thisrps[var].lon,thisrps[var].lat,avalues,levels=np.linspace(0.,0.5,11),cmap='YlGn_r', extend='max')\n",
+ " cs.cmap.set_under('purple')\n",
+ " cb = plt.colorbar(shrink=0.5)\n",
+ " cb.ax.set_ylabel('RPS',fontsize=12)\n",
+ " plt.title(tit_line1 + f' {VARNAMES[var]}' + ' (tercile categories)\\n' + tit_line2,loc='left')\n",
+ " plt.tight_layout() \n",
+ " figname = f'{DATADIR}/plots/stmonth{config[\"start_month\"]:02d}/{hcst_bname}.{aggr}.fcmonth{fcmonth}.{var}.rps.png'\n",
+ " plt.savefig(figname) \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5407dcb9",
+ "metadata": {},
+ "source": [
+ "### 4c. Area under Relative Operating Characteristic (ROC) curve"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d2db367e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# READ scores xr.Datasets\n",
+ "roc = xr.open_dataset(f'{DATADIR}/scores/{hcst_bname}.{aggr}.roc.nc')\n",
+ "# RE-ARRANGE the DATASETS longitude values for plotting purposes\n",
+ "roc = roc.assign_coords(lon=(((roc.lon + 180) % 360) - 180)).sortby('lon')\n",
+ "\n",
+ "thisroc = roc.sel(forecastMonth=fcmonth)\n",
+ "\n",
+ "for var in thisroc.data_vars:\n",
+ " for icat in thisroc.category.values:\n",
+ " fig = plt.figure(figsize=(18,10))\n",
+ " ax = plt.axes(projection=ccrs.PlateCarree())\n",
+ " ax.add_feature(cfeature.BORDERS, edgecolor='black', linewidth=0.5)\n",
+ " ax.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=2.)\n",
+ " avalues = thisroc.sel(category=icat)[var].values\n",
+ " cs = plt.contourf(thisroc[var].lon,thisroc[var].lat,avalues,levels=np.linspace(0.5,1.,6),cmap='YlGn', extend='min')\n",
+ " cs.cmap.set_under('lightgray')\n",
+ " cb = plt.colorbar(shrink=0.5)\n",
+ " cb.ax.set_ylabel('Area under ROC curve',fontsize=12)\n",
+ " plt.title(tit_line1 + f' {VARNAMES[var]}' + f' ({CATNAMES[icat]})\\n' + tit_line2, loc='left')\n",
+ " plt.tight_layout() \n",
+ " figname = f'{DATADIR}/plots/stmonth{config[\"start_month\"]:02d}/{hcst_bname}.{aggr}.fcmonth{fcmonth}.{var}.category{icat}.roc.png'\n",
+ " plt.savefig(figname) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b6031fd0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/06_bias_correction/6x00 Download and Preprocess.ipynb b/tutorials/06_bias_correction/6x00 Download and Preprocess.ipynb
new file mode 100644
index 0000000..7950adc
--- /dev/null
+++ b/tutorials/06_bias_correction/6x00 Download and Preprocess.ipynb
@@ -0,0 +1,847 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "754e8220",
+ "metadata": {},
+ "source": [
+ "# Sorting pre-requisits for ibicus: downloading and preprocessing data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46c1c6de",
+ "metadata": {},
+ "source": [
+ "This notebook shows how to download and preprocess climate model data for bias correction and further use. To apply a bias adjustment method, three datasets are needed: 1) observation or reanalysis data; 2) historical climate model data over the same reference period that observations are available for; and 3) climate model data for a future, or more generally, application, period that is to be bias corrected. \n",
+ "\n",
+ "Here we will download and preprocess CMIP6 data from the Climate Data Store (CDS) as climate model input and two reanalysis datasets: 1) ERA5 from the CDS and 2) NCEP/DOE Reanalysis II from the PSL datastore (NOAA).\n",
+ "\n",
+ "There are many ways to access climate data on different temporal or spatial resolutions. This notebook is meant to illustrate one possible way to download data at daily resolution which is currently the primary temporal resolution supported in ibicus, although some can be applied at monthly resolution. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9803f09",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4a70574",
+ "metadata": {},
+ "source": [
+ "### d4science_copernicus_cds Library\n",
+ "\n",
+ "To request data from the Climate Data Store (CDS) programmatically using the CDS API, we will manage our authentication with the `d4science_copernicus_cds` library.\n",
+ "\n",
+ "The library prompts us to enter our credentials, which are then securely saved in our workspace. **This request is only made the first time**; afterward, the `get_credentials` function will automatically retrieve the credentials from the environment or workspace, eliminating the need to re-enter them in the Jupyter notebook.\n",
+ "\n",
+ "To obtain your API credentials:\n",
+ "1. Register or log in to the CDS at [https://cds.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
+ "2. Visit [https://cds.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
+ "\n",
+ "The library will prompt you to enter:\n",
+ "- **URL**: The URL field is prefilled; simply press Enter to accept the default.\n",
+ "- **KEY**: Insert the obtained API key when prompted, then confirm saving your credentials by pressing \"y.\"\n",
+ "\n",
+ "Once saved, your credentials will be loaded automatically in future sessions, ensuring a seamless experience with the CDS API."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80d2e5fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbec7c68",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from d4science_copernicus_cds import cds_get_credentials, cds_datadir"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4aab64a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "URL, KEY = cds_get_credentials()\n",
+ "print(\"URL\", URL)\n",
+ "print (\"KEY\", KEY)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "68bb08de",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac69b211",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import urllib\n",
+ "\n",
+ "# Scientific computing\n",
+ "import iris\n",
+ "import xarray\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "88aa13ae",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## 1. Download data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15f7f634",
+ "metadata": {},
+ "source": [
+ "Let's create a data-directory where our inputs will go, if it does not yet exist:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0c707ab7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DATADIR = \"data_download_and_preprocessing\"\n",
+ "\n",
+ "if not os.path.exists(DATADIR):\n",
+ " os.mkdir(DATADIR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ba8ad1b",
+ "metadata": {},
+ "source": [
+ "### 1.1. Download climate model data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "928fcceb",
+ "metadata": {},
+ "source": [
+ "To request climate data from the Climate Data Store (CDS) we will use the CDS API. Run the following cell if you have not yet istalled it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd98913b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#!pip install cdsapi\n",
+ "import cdsapi\n",
+ "\n",
+ "# We disable urllib3 (used by cdsapi) warning\n",
+ "import urllib3\n",
+ "urllib3.disable_warnings()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c47258de",
+ "metadata": {},
+ "source": [
+ "We make use of the option to manually set the CDS API credentials. First, you have to set two variables: URL and KEY which build together your CDS API key. The string of characters that make up your KEY include your personal User ID and CDS API key. To obtain these, first register or login to the CDS (http://cds.climate.copernicus.eu), then visit https://cds.climate.copernicus.eu/api-how-to and copy the string of characters listed after \"key:\". Replace the ######### below with your key."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "83bd63a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#URL = 'https://cds.climate.copernicus.eu/api/v2'\n",
+ "#KEY = '########################################' # enter your key instead"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cac24af",
+ "metadata": {},
+ "source": [
+ "Let's choose a model and variable of interest, and fix some meta-paramters. If we are interested in multiple variable we can just iterate the code below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e697ae16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# choose model\n",
+ "model = 'mpi_esm1_2_lr'\n",
+ "\n",
+ "# choose variables to extract (not all variables available at daily resolution for all cmip6 models at the moment)\n",
+ "variable = 'precipitation'\n",
+ "\n",
+ "# choose area to extract\n",
+ "area = [80, 3, 20, 30]\n",
+ "\n",
+ "# choose a historical period to extract\n",
+ "period_hist = '1979-01-01/2005-12-31' \n",
+ "\n",
+ "# choose a future period to extract:\n",
+ "period_fut = '2050-01-01/2070-12-31'\n",
+ "\n",
+ "# choose a filename for the historical cm data\n",
+ "fname_cm_hist = f\"cmip6_daily_1979-2015_ipsl_historical_{variable}.zip\"\n",
+ "\n",
+ "# choose a filename for the future cm data\n",
+ "fname_cm_future = f\"cmip6_daily_2050-2070_ipsl_ssp5_8_5_{variable}.zip\"\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "173ff1a6",
+ "metadata": {},
+ "source": [
+ "Both datasets will be in `DATADIR` under `fname_cm_hist` and `fname_cm_future`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1f56a8ca",
+ "metadata": {},
+ "source": [
+ "#### 1.1.1. Download historical climate model data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1cb6ecd",
+ "metadata": {},
+ "source": [
+ "Executing the following cell will retrieve historical climate model data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ca4dc59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# download historical climate model data\n",
+ "\n",
+ "c = cdsapi.Client(url=URL, key=KEY)\n",
+ "\n",
+ "c.retrieve(\n",
+ " 'projections-cmip6',\n",
+ " {\n",
+ " 'temporal_resolution': 'daily',\n",
+ " 'experiment': 'historical',\n",
+ " 'level': 'single_levels',\n",
+ " 'variable': variable,\n",
+ " 'model': model,\n",
+ " 'date': period_hist,\n",
+ " 'area': area,\n",
+ " 'format': 'zip',\n",
+ " },\n",
+ " f'{DATADIR}/{fname_cm_hist}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d05a296",
+ "metadata": {},
+ "source": [
+ "After unzipping the folder..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fee48ec4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import zipfile\n",
+ "\n",
+ "with zipfile.ZipFile(f'{DATADIR}/{fname_cm_hist}', 'r') as zip_ref:\n",
+ " zip_ref.extractall(DATADIR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4eb56057",
+ "metadata": {},
+ "source": [
+ "...the file is now in `DATADIR/pr_day_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_19790101-20141231_*.nc`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36252bae",
+ "metadata": {},
+ "source": [
+ "#### 1.1.2. Download future climate model data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfdde7d4",
+ "metadata": {},
+ "source": [
+ "Now we go through the same steps to download climate data in the future or application period:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "99e63f7a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# download future climate model data\n",
+ "\n",
+ "c = cdsapi.Client(url=URL, key=KEY)\n",
+ "\n",
+ "c.retrieve(\n",
+ " 'projections-cmip6',\n",
+ " {\n",
+ " 'temporal_resolution': 'daily',\n",
+ " 'experiment': 'ssp5_8_5',\n",
+ " 'level': 'single_levels',\n",
+ " 'variable': variable,\n",
+ " 'model': model,\n",
+ " 'date': period_fut,\n",
+ " 'area': area,\n",
+ " 'format': 'zip',\n",
+ " },\n",
+ " f'{DATADIR}/{fname_cm_future}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f1f64630",
+ "metadata": {},
+ "source": [
+ "Again, we need to unzip the folder:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "796570da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import zipfile\n",
+ "\n",
+ "with zipfile.ZipFile(f'{DATADIR}/{fname_cm_future}', 'r') as zip_ref:\n",
+ " zip_ref.extractall(DATADIR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c510e91",
+ "metadata": {},
+ "source": [
+ "The file is now in `DATADIR/pr_day_MPI-ESM1-2-LR_ssp585_r1i1p1f1_gn_20500101-20701231_*.nc`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a0b85cf6",
+ "metadata": {},
+ "source": [
+ "### 1.2. Download observations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1046a89d",
+ "metadata": {},
+ "source": [
+ "Now we will download observations. We will first download ERA5 data from the CDS and afterwards the NCEP/DOE II Reanalysis from the PSL."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "138a4937",
+ "metadata": {},
+ "source": [
+ "#### 1.2.1. Download ERA5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4be4070f",
+ "metadata": {},
+ "source": [
+ "We will download ERA5 on daily temporal resolution (if the climate model were on other temporal resolutions we would also need a different one for ERA5). The script is inspired by [this discussion](https://confluence.ecmwf.int/pages/viewpage.action?pageId=228867588) and uses the [\n",
+ "Daily statistics calculated from ERA5 data\n",
+ "](https://cds.climate.copernicus.eu/cdsapp#!/software/app-c3s-daily-era5-statistics?tab=overview) application. The output of this application is a separate netCDF file for chosen daily statistic for each month for each year. We concatenate those files then manually. First we need to make some selections (make sure the data chosen here is consistent with the cm data pulled above):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dc749c3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# choose years to request (this should overlap with the `period_hist` chosen for the cm data)\n",
+ "# this is chosen shorter for demonstration purposes\n",
+ "years = list(map(str, range(1979, 1981)))\n",
+ "\n",
+ "# choose months to request\n",
+ "months = list(map(str, range(10, 12)))\n",
+ "\n",
+ "# choose a variable (must be a valid ERA5 CDS API name)\n",
+ "variable = \"total_precipitation\"\n",
+ "\n",
+ "# choose a required statistic (valid names given in the application description above)\n",
+ "statistic = \"daily_mean\"\n",
+ "\n",
+ "# choose an area (should be the same as above)\n",
+ "area = {\"lat\": [30, 80], \"lon\": [3, 20]}\n",
+ "\n",
+ "# choose a filename\n",
+ "fname_era5 = f\"era5_{variable}_{statistic}_1979_1981.nc\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16856537",
+ "metadata": {},
+ "source": [
+ "And now we can request the data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15ac64b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c = cdsapi.Client(url=URL, key=KEY, timeout=300)\n",
+ "\n",
+ "# Loop over years and months\n",
+ "filenames_for_cleanup= []\n",
+ "for yr in years:\n",
+ " print(f\"----- Requesting year: {yr} -----\")\n",
+ " for mn in months:\n",
+ " result = c.service(\n",
+ " \"tool.toolbox.orchestrator.workflow\",\n",
+ " params={\n",
+ " \"realm\": \"user-apps\",\n",
+ " \"project\": \"app-c3s-daily-era5-statistics\",\n",
+ " \"version\": \"master\",\n",
+ " \"kwargs\": {\n",
+ " \"dataset\": \"reanalysis-era5-single-levels\",\n",
+ " \"product_type\": \"reanalysis\",\n",
+ " \"variable\": variable,\n",
+ " \"statistic\": statistic,\n",
+ " \"year\": yr,\n",
+ " \"month\": mn,\n",
+ " \"time_zone\": \"UTC+00:0\",\n",
+ " \"frequency\": \"1-hourly\",\n",
+ " \"grid\": \"1.0/1.0\",\n",
+ " \"area\": area,\n",
+ " },\n",
+ " \"workflow_name\": \"application\"\n",
+ " }) \n",
+ "\n",
+ " \n",
+ " filename = f\"{DATADIR}/era5_{variable}_{statistic}_{yr}_{mn}.nc\"\n",
+ " url = result[0]['location']\n",
+ "\n",
+ " # Download nc file\n",
+ " urllib.request.urlretrieve(url, filename)\n",
+ " # Append filename to list of filenames to cleanup\n",
+ " filenames_for_cleanup.append(filename)\n",
+ "\n",
+ "# Combine monthly data\n",
+ "combined_data = xarray.open_mfdataset(f\"{DATADIR}/era5_{variable}_{statistic}_*.nc\", combine = 'nested', concat_dim=\"time\")\n",
+ "combined_data.to_netcdf(f\"{DATADIR}/{fname_era5}\")\n",
+ "\n",
+ "# Cleanup\n",
+ "for filename in filenames_for_cleanup:\n",
+ " os.remove(filename)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89f05dad",
+ "metadata": {},
+ "source": [
+ "#### 1.2.2. Download NCEP/DOE II"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "111c8740",
+ "metadata": {},
+ "source": [
+ "We now download the [NCEP/DOE II data](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html). [Here is an overview](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html) of the possible variables and we take the data from [the datastore here](https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/). "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dec2749e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Variable name. Needs to be one of the NCEP-names in https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/. \n",
+ "variable = \"prate.sfc.gauss\"\n",
+ "\n",
+ "# choose years to request (this should overlap with the `period_hist` chosen for the cm data)*\n",
+ "years = map(str, range(1979, 2005))\n",
+ "\n",
+ "# choose an area (should be the same as above)\n",
+ "area = {\"lat\": [30, 80], \"lon\": [3, 20]}\n",
+ "\n",
+ "# choose a filename\n",
+ "fname_ncep_doe = f\"ncep_doe_{variable}_1979_2005.nc\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "140db911",
+ "metadata": {},
+ "source": [
+ "Now we can download the data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8fd4968a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Download data year by year\n",
+ "filenames_for_cleanup = []\n",
+ "for year in years:\n",
+ " url = f\"https://downloads.psl.noaa.gov/Datasets/ncep.reanalysis2/Dailies/gaussian_grid/{variable}.{year}.nc\"\n",
+ " filename = f\"{DATADIR}/{variable}_{year}.nc\"\n",
+ " # Download nc file\n",
+ " urllib.request.urlretrieve(url, filename)\n",
+ " # Append filename to list of filenames to cleanup\n",
+ " filenames_for_cleanup.append(filename)\n",
+ "\n",
+ "# Combine data for variable\n",
+ "combined_data = xarray.open_mfdataset(f\"{DATADIR}/{variable}_*.nc\", combine = 'nested', concat_dim=\"time\")\n",
+ "# Select area\n",
+ "combined_data = combined_data.sel(lon=slice(min(area[\"lon\"]), max(area[\"lon\"])),lat=slice(max(area[\"lat\"]), min(area[\"lat\"])))\n",
+ "# Write to file \n",
+ "combined_data.to_netcdf(f\"{DATADIR}/{fname_ncep_doe}\")\n",
+ "# Cleanup\n",
+ "for filename in filenames_for_cleanup:\n",
+ " os.remove(filename)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8fa8329b",
+ "metadata": {},
+ "source": [
+ "It is also possible (and probably easier) to download the data via ftp through the same links, or via the visual interface accessible via the graph-icon next to a variable in the [NCEP/DOE 2 overview page](https://psl.noaa.gov/data/gridded/data.ncep.reanalysis2.html). The latter also provides an option to select a range of dates and access merged data for that range that can directly be used for the further preprocessing steps."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9156db89",
+ "metadata": {},
+ "source": [
+ "## 2. Convert and regrid data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a131dd39",
+ "metadata": {},
+ "source": [
+ "Now that we have downloaded the data we need to make sure that observations and climate model data are:\n",
+ "\n",
+ "- on the same temporal resolution: this is covered because we downloaded the data on daily resolution.\n",
+ "- on the same spatial resolution: we need to regrid the data.\n",
+ "- in the same units: we may need to convert units.\n",
+ "\n",
+ "Furthermore we might want to extract additional information and need to get the numpy arrays corresponding to the data. In the numpy arrays we need to make sure that they have the form `[t,x,y]`.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c4bdf004",
+ "metadata": {},
+ "source": [
+ "### 2.1. Regrid data "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ebf5afa",
+ "metadata": {},
+ "source": [
+ "Now that we have data on the same temporal resolution for both the climate model and observations we need to make sure they are also on the same spatial one and regrid the datasets. The climate model data is on a coarser grid, therefore we will regrid the observational data onto this resolution. However there are also other solutions, where the [climate model data is regridded onto the resolution of the observations](https://esd.copernicus.org/articles/9/313/2018/).\n",
+ "\n",
+ "We will use iris for the regridding, however there are also xarray solutions. Different variables might require different regridding schemes: [a list of ones available in iris is given here](https://scitools-iris.readthedocs.io/en/latest/userguide/interpolation_and_regridding.html?highlight=interpolate#regridding). For precipitation we choose a regridder based on Nearest values. Other regridders like linear ones *might* introduce negative values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bfc9e58",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cm_hist = iris.load_cube(f\"{DATADIR}/pr_day_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_19790101-20051231_v20190710.nc\", \"precipitation_flux\")\n",
+ "cm_future = iris.load_cube(f\"{DATADIR}/pr_day_MPI-ESM1-2-LR_ssp585_r1i1p1f1_gn_20500101-20701231_v20190710.nc\", \"precipitation_flux\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "397a0cb7",
+ "metadata": {},
+ "source": [
+ "First let's take care of the ERA5 reanalysis:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5fc7d11d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "obs_era5 = iris.load_cube(f\"{DATADIR}/{fname_era5}\")\n",
+ "obs_era5 = obs_era5.regrid(cm_hist, iris.analysis.Nearest())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d56577c",
+ "metadata": {},
+ "source": [
+ "And now of the NCEP/DOE II data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ca33391",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "obs_ncep_doe = iris.load_cube(f\"{DATADIR}/{fname_ncep_doe}\")\n",
+ "obs_ncep_doe = obs_ncep_doe.regrid(cm_hist, iris.analysis.Nearest())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e800787d",
+ "metadata": {},
+ "source": [
+ "### 2.1. Extract additional information"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "465d30f2",
+ "metadata": {},
+ "source": [
+ "The data objects are now all at the same temporal and spatial resolution. Because some debiasers need the dates as input, it is useful to extract them here:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f9444564",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_dates(x):\n",
+ " time_dimension = x.coords()[0]\n",
+ " dates = time_dimension.units.num2date(time_dimension.points)\n",
+ " return dates\n",
+ "\n",
+ "get_dates = np.vectorize(get_dates)\n",
+ "\n",
+ "dates_cm_hist = get_dates(cm_hist)\n",
+ "dates_cm_future = get_dates(cm_future)\n",
+ "\n",
+ "dates_obs_era5 = get_dates(obs_era5)\n",
+ "dates_obs_ncep_doe = get_dates(obs_ncep_doe)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44fa2e3d",
+ "metadata": {},
+ "source": [
+ "### 2.3. Get numpy arrays"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dab004f5",
+ "metadata": {},
+ "source": [
+ "In order to start working with ibicus, we need to get the numpy arrays associated with the data from the iris cubes:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "165cf33a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cm_hist = cm_hist.data\n",
+ "cm_future = cm_future.data\n",
+ "\n",
+ "obs_era5 = obs_era5.data\n",
+ "obs_ncep_doe = obs_ncep_doe.data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f997223",
+ "metadata": {},
+ "source": [
+ "We look at the shapes to make sure they are all in the form `[t, x, y]`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "57b96bc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Shape cm_hist: {cm_hist.shape}\")\n",
+ "print(f\"Shape cm_future: {cm_future.shape}\")\n",
+ "\n",
+ "print(f\"Shape obs_era5: {obs_era5.shape}\")\n",
+ "print(f\"Shape obs_ncep_doe: {obs_ncep_doe.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de4780c7",
+ "metadata": {},
+ "source": [
+ "### 2.4. Convert units"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c760fe32",
+ "metadata": {},
+ "source": [
+ "From the [ERA5 documentation](https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation) we can see that the precipitation is measured in m, whilst in `cm_hist` and `cm_future` it is measured as flux (m / s^-1). To convert we need to divide the ERA5-values by 86 400 (the number of seconds per day):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a37eb5e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "obs_era5 = obs_era5/86400"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d99dfe0",
+ "metadata": {},
+ "source": [
+ "The values in the NCEP/DOE II reanalysis are in the same units."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1812a128",
+ "metadata": {},
+ "source": [
+ "## 3. Apply debiaser"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24d67c72",
+ "metadata": {},
+ "source": [
+ "After these preparations we can finally apply a bias adjustment method. For a detailed introduction into the actual application of bias correction using ibicus, we refer you to the other notebooks.\n",
+ "\n",
+ "For illustrative purposes we give one example here using a simple quantile mapping methodology that we apply to both ERA5 and NCEP/DOE II data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5990e8ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from ibicus.debias import QuantileMapping\n",
+ "\n",
+ "debiaser = QuantileMapping.from_variable(\"pr\")\n",
+ "\n",
+ "debiased_cm_future_era5 = debiaser.apply(obs_era5, cm_hist, cm_future)\n",
+ "debiased_cm_future_ncep_doe = debiaser.apply(obs_ncep_doe, cm_hist, cm_future)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}