first commit

2024-11-06 14:20:09 +01:00 · 2024-11-06 14:20:09 +01:00 · 8ed56e41c6
commit 8ed56e41c6
21 changed files with 25565 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,289 @@
 # Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,macos,visualstudiocode,emacs
 # Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks,macos,visualstudiocode,emacs
 ### Emacs ###
 # -*- mode: gitignore; -*-
 *~
 \#*\#
 /.emacs.desktop
 /.emacs.desktop.lock
 *.elc
 auto-save-list
 tramp
 .\#*
 # Org-mode
 .org-id-locations
 *_archive
 # flymake-mode
 *_flymake.*
 # eshell files
 /eshell/history
 /eshell/lastdir
 # elpa packages
 /elpa/
 # reftex files
 *.rel
 # AUCTeX auto folder
 /auto/
 # cask packages
 .cask/
 dist/
 # Flycheck
 flycheck_*.el
 # server auth directory
 /server/
 # projectiles files
 .projectile
 # directory configuration
 .dir-locals.el
 # network security
 /network-security.data
 ### JupyterNotebooks ###
 # gitignore template for Jupyter Notebooks
 # website: http://jupyter.org/
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
 # IPython
 profile_default/
 ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
 ### macOS ###
 # General
 .DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 ### macOS Patch ###
 # iCloud generated files
 *.icloud
 ### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 # IPython
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 ### Python Patch ###
 # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
 poetry.toml
 # ruff
 .ruff_cache/
 # LSP config files
 pyrightconfig.json
 ### VisualStudioCode ###
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/extensions.json
 !.vscode/*.code-snippets
 # Local History for Visual Studio Code
 .history/
 # Built Visual Studio Code Extensions
 *.vsix
 ### VisualStudioCode Patch ###
 # Ignore all local history of files
 .history
 .ionide
 # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,macos,visualstudiocode,emacs
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,5 @@
 # Changelog for "d4science_auth_cds"
 ## [v1.0.0]
 - first release
--- a/FUNDING.md
+++ b/FUNDING.md
@ -0,0 +1,28 @@
 # Acknowledgments
 The projects leading to this software have received funding from a series of European Union programmes including:
 - the Sixth Framework Programme for Research and Technological Development
  - [DILIGENT](https://cordis.europa.eu/project/id/004260) (grant no. 004260)
 - the Seventh Framework Programme for research, technological development and demonstration
  - [D4Science](https://cordis.europa.eu/project/id/212488) (grant no. 212488)
  - [D4Science-II](https://cordis.europa.eu/project/id/239019) (grant no.239019)
  - [ENVRI](https://cordis.europa.eu/project/id/283465) (grant no. 283465)
  - [iMarine](https://cordis.europa.eu/project/id/283644) (grant no. 283644)
  - [EUBrazilOpenBio](https://cordis.europa.eu/project/id/288754) (grant no. 288754)
 - the H2020 research and innovation programme
  - [SoBigData](https://cordis.europa.eu/project/id/654024) (grant no. 654024)
  - [PARTHENOS](https://cordis.europa.eu/project/id/654119) (grant no. 654119)
  - [EGI-Engage](https://cordis.europa.eu/project/id/654142) (grant no. 654142)
  - [ENVRI PLUS](https://cordis.europa.eu/project/id/654182) (grant no. 654182)
  - [BlueBRIDGE](https://cordis.europa.eu/project/id/675680) (grant no. 675680)
  - [PerformFISH](https://cordis.europa.eu/project/id/727610) (grant no. 727610)
  - [AGINFRA PLUS](https://cordis.europa.eu/project/id/731001) (grant no. 731001)
  - [DESIRA](https://cordis.europa.eu/project/id/818194) (grant no. 818194)
  - [ARIADNEplus](https://cordis.europa.eu/project/id/823914) (grant no. 823914)
  - [RISIS 2](https://cordis.europa.eu/project/id/824091) (grant no. 824091)
  - [EOSC-Pillar](https://cordis.europa.eu/project/id/857650) (grant no. 857650)
  - [Blue Cloud](https://cordis.europa.eu/project/id/862409) (grant no. 862409)
  - [SoBigData-PlusPlus](https://cordis.europa.eu/project/id/871042) (grant no. 871042)
--- a/LICENSE.md
+++ b/LICENSE.md
@ -0,0 +1,270 @@
 # European Union Public Licence V. 1.1
 EUPL © the European Community 2007
 This European Union Public Licence (the “EUPL”) applies to the Work or Software
 (as defined below) which is provided under the terms of this Licence. Any use of
 the Work, other than as authorised under this Licence is prohibited (to the
 extent such use is covered by a right of the copyright holder of the Work).
 The Original Work is provided under the terms of this Licence when the Licensor
 (as defined below) has placed the following notice immediately following the
 copyright notice for the Original Work:
 Licensed under the EUPL V.1.1
 or has expressed by any other mean his willingness to license under the EUPL.
 ## 1. Definitions
 In this Licence, the following terms have the following meaning:
 - The Licence: this Licence.
 - The Original Work or the Software: the software distributed and/or
  communicated by the Licensor under this Licence, available as Source Code and
  also as Executable Code as the case may be.
 - Derivative Works: the works or software that could be created by the Licensee,
  based upon the Original Work or modifications thereof. This Licence does not
  define the extent of modification or dependence on the Original Work required
  in order to classify a work as a Derivative Work; this extent is determined by
  copyright law applicable in the country mentioned in Article 15.
 - The Work: the Original Work and/or its Derivative Works.
 - The Source Code: the human-readable form of the Work which is the most
  convenient for people to study and modify.
 - The Executable Code: any code which has generally been compiled and which is
  meant to be interpreted by a computer as a program.
 - The Licensor: the natural or legal person that distributes and/or communicates
  the Work under the Licence.
 - Contributor(s): any natural or legal person who modifies the Work under the
  Licence, or otherwise contributes to the creation of a Derivative Work.
 - The Licensee or “You”: any natural or legal person who makes any usage of the
  Software under the terms of the Licence.
 - Distribution and/or Communication: any act of selling, giving, lending,
  renting, distributing, communicating, transmitting, or otherwise making
  available, on-line or off-line, copies of the Work or providing access to its
  essential functionalities at the disposal of any other natural or legal
  person.
 ## 2. Scope of the rights granted by the Licence
 The Licensor hereby grants You a world-wide, royalty-free, non-exclusive,
 sub-licensable licence to do the following, for the duration of copyright vested
 in the Original Work:
 - use the Work in any circumstance and for all usage, reproduce the Work, modify
 - the Original Work, and make Derivative Works based upon the Work, communicate
 - to the public, including the right to make available or display the Work or
 - copies thereof to the public and perform publicly, as the case may be, the
 - Work, distribute the Work or copies thereof, lend and rent the Work or copies
 - thereof, sub-license rights in the Work or copies thereof.
 Those rights can be exercised on any media, supports and formats, whether now
 known or later invented, as far as the applicable law permits so.
 In the countries where moral rights apply, the Licensor waives his right to
 exercise his moral right to the extent allowed by law in order to make effective
 the licence of the economic rights here above listed.
 The Licensor grants to the Licensee royalty-free, non exclusive usage rights to
 any patents held by the Licensor, to the extent necessary to make use of the
 rights granted on the Work under this Licence.
 ## 3. Communication of the Source Code
 The Licensor may provide the Work either in its Source Code form, or as
 Executable Code. If the Work is provided as Executable Code, the Licensor
 provides in addition a machine-readable copy of the Source Code of the Work
 along with each copy of the Work that the Licensor distributes or indicates, in
 a notice following the copyright notice attached to the Work, a repository where
 the Source Code is easily and freely accessible for as long as the Licensor
 continues to distribute and/or communicate the Work.
 ## 4. Limitations on copyright
 Nothing in this Licence is intended to deprive the Licensee of the benefits from
 any exception or limitation to the exclusive rights of the rights owners in the
 Original Work or Software, of the exhaustion of those rights or of other
 applicable limitations thereto.
 ## 5. Obligations of the Licensee
 The grant of the rights mentioned above is subject to some restrictions and
 obligations imposed on the Licensee. Those obligations are the following:
 Attribution right: the Licensee shall keep intact all copyright, patent or
 trademarks notices and all notices that refer to the Licence and to the
 disclaimer of warranties. The Licensee must include a copy of such notices and a
 copy of the Licence with every copy of the Work he/she distributes and/or
 communicates. The Licensee must cause any Derivative Work to carry prominent
 notices stating that the Work has been modified and the date of modification.
 Copyleft clause: If the Licensee distributes and/or communicates copies of the
 Original Works or Derivative Works based upon the Original Work, this
 Distribution and/or Communication will be done under the terms of this Licence
 or of a later version of this Licence unless the Original Work is expressly
 distributed only under this version of the Licence. The Licensee (becoming
 Licensor) cannot offer or impose any additional terms or conditions on the Work
 or Derivative Work that alter or restrict the terms of the Licence.
 Compatibility clause: If the Licensee Distributes and/or Communicates Derivative
 Works or copies thereof based upon both the Original Work and another work
 licensed under a Compatible Licence, this Distribution and/or Communication can
 be done under the terms of this Compatible Licence. For the sake of this clause,
 “Compatible Licence” refers to the licences listed in the appendix attached to
 this Licence. Should the Licensee’s obligations under the Compatible Licence
 conflict with his/her obligations under this Licence, the obligations of the
 Compatible Licence shall prevail.
 Provision of Source Code: When distributing and/or communicating copies of the
 Work, the Licensee will provide a machine-readable copy of the Source Code or
 indicate a repository where this Source will be easily and freely available for
 as long as the Licensee continues to distribute and/or communicate the Work.
 Legal Protection: This Licence does not grant permission to use the trade names,
 trademarks, service marks, or names of the Licensor, except as required for
 reasonable and customary use in describing the origin of the Work and
 reproducing the content of the copyright notice.
 ## 6. Chain of Authorship
 The original Licensor warrants that the copyright in the Original Work granted
 hereunder is owned by him/her or licensed to him/her and that he/she has the
 power and authority to grant the Licence.
 Each Contributor warrants that the copyright in the modifications he/she brings
 to the Work are owned by him/her or licensed to him/her and that he/she has the
 power and authority to grant the Licence.
 Each time You accept the Licence, the original Licensor and subsequent
 Contributors grant You a licence to their contributions to the Work, under the
 terms of this Licence.
 ## 7. Disclaimer of Warranty
 The Work is a work in progress, which is continuously improved by numerous
 contributors. It is not a finished work and may therefore contain defects or
 “bugs” inherent to this type of software development.
 For the above reason, the Work is provided under the Licence on an “as is” basis
 and without warranties of any kind concerning the Work, including without
 limitation merchantability, fitness for a particular purpose, absence of defects
 or errors, accuracy, non-infringement of intellectual property rights other than
 copyright as stated in Article 6 of this Licence.
 This disclaimer of warranty is an essential part of the Licence and a condition
 for the grant of any rights to the Work.
 ## 8. Disclaimer of Liability
 Except in the cases of wilful misconduct or damages directly caused to natural
 persons, the Licensor will in no event be liable for any direct or indirect,
 material or moral, damages of any kind, arising out of the Licence or of the use
 of the Work, including without limitation, damages for loss of goodwill, work
 stoppage, computer failure or malfunction, loss of data or any commercial
 damage, even if the Licensor has been advised of the possibility of such
 damage. However, the Licensor will be liable under statutory product liability
 laws as far such laws apply to the Work.
 ## 9. Additional agreements
 While distributing the Original Work or Derivative Works, You may choose to
 conclude an additional agreement to offer, and charge a fee for, acceptance of
 support, warranty, indemnity, or other liability obligations and/or services
 consistent with this Licence. However, in accepting such obligations, You may
 act only on your own behalf and on your sole responsibility, not on behalf of
 the original Licensor or any other Contributor, and only if You agree to
 indemnify, defend, and hold each Contributor harmless for any liability incurred
 by, or claims asserted against such Contributor by the fact You have accepted
 any such warranty or additional liability.
 ## 10. Acceptance of the Licence
 The provisions of this Licence can be accepted by clicking on an icon “I agree”
 placed under the bottom of a window displaying the text of this Licence or by
 affirming consent in any other similar way, in accordance with the rules of
 applicable law. Clicking on that icon indicates your clear and irrevocable
 acceptance of this Licence and all of its terms and conditions.
 Similarly, you irrevocably accept this Licence and all of its terms and
 conditions by exercising any rights granted to You by Article 2 of this Licence,
 such as the use of the Work, the creation by You of a Derivative Work or the
 Distribution and/or Communication by You of the Work or copies thereof.
 ## 11. Information to the public
 In case of any Distribution and/or Communication of the Work by means of
 electronic communication by You (for example, by offering to download the Work
 from a remote location) the distribution channel or media (for example, a
 website) must at least provide to the public the information requested by the
 applicable law regarding the Licensor, the Licence and the way it may be
 accessible, concluded, stored and reproduced by the Licensee.
 ## 12. Termination of the Licence
 The Licence and the rights granted hereunder will terminate automatically upon
 any breach by the Licensee of the terms of the Licence.
 Such a termination will not terminate the licences of any person who has
 received the Work from the Licensee under the Licence, provided such persons
 remain in full compliance with the Licence.
 ## 13. Miscellaneous
 Without prejudice of Article 9 above, the Licence represents the complete
 agreement between the Parties as to the Work licensed hereunder.
 If any provision of the Licence is invalid or unenforceable under applicable
 law, this will not affect the validity or enforceability of the Licence as a
 whole. Such provision will be construed and/or reformed so as necessary to make
 it valid and enforceable.
 The European Commission may publish other linguistic versions and/or new
 versions of this Licence, so far this is required and reasonable, without
 reducing the scope of the rights granted by the Licence. New versions of the
 Licence will be published with a unique version number.
 All linguistic versions of this Licence, approved by the European Commission,
 have identical value. Parties can take advantage of the linguistic version of
 their choice.
 ## 14. Jurisdiction
 Any litigation resulting from the interpretation of this License, arising
 between the European Commission, as a Licensor, and any Licensee, will be
 subject to the jurisdiction of the Court of Justice of the European Communities,
 as laid down in article 238 of the Treaty establishing the European Community.
 Any litigation arising between Parties, other than the European Commission, and
 resulting from the interpretation of this License, will be subject to the
 exclusive jurisdiction of the competent court where the Licensor resides or
 conducts its primary business.
 ## 15. Applicable Law
 This Licence shall be governed by the law of the European Union country where
 the Licensor resides or has his registered office.
 This licence shall be governed by the Belgian law if:
 - a litigation arises between the European Commission, as a Licensor, and any
 - Licensee; the Licensor, other than the European Commission, has no residence
 - or registered office inside a European Union country.
 ## Appendix
 “Compatible Licences” according to article 5 EUPL are:
 - GNU General Public License (GNU GPL) v. 2
 - Open Software License (OSL) v. 2.1, v. 3.0
 - Common Public License v. 1.0
 - Eclipse Public License v. 1.0
 - Cecill v. 2.0
--- a/README.md
+++ b/README.md
@ -0,0 +1,99 @@
 # d4science_copernicus_notebooks
 **d4science_copernicus_notebooks** is a repository developed by CNR-ISTI to manage Climate Data Store (CDS) API authentication within D4Science JupyterLab environments. It simplifies credential handling, allowing for secure, reusable access to the CDS API across multiple notebooks.
 ## Version
 v.1.0.0
 ## Installation
 locally clone the repository and copy them in your JupyterLab instance
 ### test locally on vescode
 ## Built With
 * [Copernicus CDSAPI](https://cds.climate.copernicus.eu/how-to-api/) - the Copernicus Climate Data Store (CDS) Application Program Interface (API) client
 * [python](https://python.org/)
 * [d4science](https://www.d4science.org/)
 * [d4science_copernicus_cds](https://code-repo.d4science.org/D4Science/d4science_copernicus_cds)
 ## Documentation
 The tutorial notebooks are available on [Copernicus Training C3S](https://ecmwf-projects.github.io/copernicus-training-c3s/intro.html).
 These notebooks have been fixed (the official ones do not work with the new Copernicus data format), integrated with the d4science_copernicus_cds library, and adapted to the D4Science infrastructure.
 ## Testing Locally on Visual Studio Code
 To test the notebooks locally on Visual Studio Code, follow these steps:
 1. **Install Visual Studio Code**
   - Download and install Visual Studio Code from [here](https://code.visualstudio.com/).
 2. **Install Necessary Extensions for Notebooks**
   - Open Visual Studio Code.
   - Go to the Extensions view by clicking on the Extensions icon in the Activity Bar on the side of the window or by pressing `Ctrl+Shift+X`.
   - Search for and install the following extensions:
     - Python
     - Jupyter
 3. **Create and Activate a Virtual Environment**
   - Open a terminal in Visual Studio Code by selecting `Terminal` > `New Terminal` from the top menu.
   - Create a virtual environment by running:
     ```sh
     python -m venv venv
     ```
   - Activate the virtual environment:
     - On Windows:
       ```sh
       .\venv\Scripts\activate
       ```
     - On macOS and Linux:
       ```sh
       source venv/bin/activate
       ```
 4. **Install Requirements**
   - Install the [required packages for tutorials](./requirements_tutorial.txt) by running:
     ```sh
     pip install -r requirements_tutorial.txt
     ```
 5. **Register on Copernicus Climate Data Store**
   - Go to [Copernicus Climate Data Store](https://cds.climate.copernicus.eu/).
   - Register for an account and create an API key.
 6. **Open and Run `config_auth_cds.ipynb`**
   - In Visual Studio Code, open and exec the [config_auth_cds.ipynb](./config_auth_cds.ipynb) notebook.
   - Follow the instructions in the notebook to configure your CDS API key.
 7. **Run the Tutorial Notebooks**
   - Open the [tutorial notebooks](./tutorials/) in Visual Studio Code.
   - Run the cells in each notebook to execute the tutorials.
 ## Change log
 See [CHANGELOG.md](./CHANGELOG.md)
 ## Authors
 * **Alfredo Oliviero** ([ORCID]( https://orcid.org/0009-0007-3191-1025)) - [ISTI-CNR Infrascience Group](https://www.isti.cnr.it/People/A.Oliviero)
 ## Maintainers
 * **Alfredo Oliviero** ([ORCID]( https://orcid.org/0009-0007-3191-1025)) - [ISTI-CNR Infrascience Group](https://www.isti.cnr.it/People/A.Oliviero)
 ## License
 This project is licensed under the EUPL V.1.1 License - see the [LICENSE.md](./LICENSE.md) file for details.
 ## About the gCube Framework
 This software is part of the [gCubeFramework](https://www.gcube-system.org/ "gCubeFramework"): an
 open-source software toolkit used for building and operating Hybrid Data
 Infrastructures enabling the dynamic deployment of Virtual Research Environments
 by favouring the realisation of reuse oriented policies.
 The projects leading to this software have received funding from a series of European Union programmes see [FUNDING.md](./FUNDING.md)
--- a/config_auth_cds.ipynb
+++ b/config_auth_cds.ipynb
@ -0,0 +1,413 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### d4science_copernicus_cds Library Setup and Example\n",
    "\n",
    "This Jupyter notebook will guide you through setting up all dependencies and configuring the environment to use the `d4science_copernicus_cds` library. It also provides a comprehensive example of the library's features and capabilities, helping you to manage Climate Data Store (CDS) API authentication and make programmatic requests from the CDS.\n",
    "\n",
    "The `d4science_copernicus_cds` library simplifies the authentication process. It prompts for credentials the first time, securely saves them in the workspace, and automatically retrieves them in future sessions—allowing seamless access to CDS data.\n",
    "\n",
    "### Obtain Your API Credentials\n",
    "\n",
    "To begin, you’ll need your CDS API credentials. Follow these steps to obtain them:\n",
    "\n",
    "1. Register or log in to the CDS at [https://cds-beta.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
    "2. Visit [https://cds-beta.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
    "\n",
    "The library will prompt you to enter:\n",
    "- **URL**: The URL field is prefilled; simply press Enter to accept the default.\n",
    "- **KEY**: Insert the obtained API key when prompted, then confirm saving your credentials by pressing \"y.\"\n",
    "\n",
    "Once saved, the credentials will be automatically loaded in subsequent sessions, so there is no need to re-enter them.\n",
    "\n",
    "---\n",
    "\n",
    "With this setup, you’ll be ready to explore the full functionality of `d4science_copernicus_cds` in a BlueCloud JupyterLab environment, where you can seamlessly authenticate and interact with the CDS API across multiple notebooks.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Install Required Dependencies\n",
    "\n",
    "Before we begin using the `d4science_copernicus_cds` library to access the Climate Data Store (CDS), we need to install a few dependencies to ensure compatibility and functionality.\n",
    "\n",
    "Run the following commands to install the necessary packages:\n",
    "\n",
    "- **`cdsapi`**: The official API client for the Climate Data Store, allowing programmatic data access.\n",
    "- **`attrs` and `typing_extensions`**: Required packages to support the latest functionality of `cdsapi`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: cdsapi in ./venv/lib/python3.13/site-packages (0.7.4)\n",
      "Requirement already satisfied: cads-api-client>=1.4.7 in ./venv/lib/python3.13/site-packages (from cdsapi) (1.5.0)\n",
      "Requirement already satisfied: requests>=2.5.0 in ./venv/lib/python3.13/site-packages (from cdsapi) (2.32.3)\n",
      "Requirement already satisfied: tqdm in ./venv/lib/python3.13/site-packages (from cdsapi) (4.66.6)\n",
      "Requirement already satisfied: attrs in ./venv/lib/python3.13/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
      "Requirement already satisfied: multiurl>=0.3.2 in ./venv/lib/python3.13/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
      "Requirement already satisfied: typing-extensions in ./venv/lib/python3.13/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in ./venv/lib/python3.13/site-packages (from requests>=2.5.0->cdsapi) (3.4.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in ./venv/lib/python3.13/site-packages (from requests>=2.5.0->cdsapi) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in ./venv/lib/python3.13/site-packages (from requests>=2.5.0->cdsapi) (2.2.3)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in ./venv/lib/python3.13/site-packages (from requests>=2.5.0->cdsapi) (2024.8.30)\n",
      "Requirement already satisfied: pytz in ./venv/lib/python3.13/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2024.2)\n",
      "Requirement already satisfied: python-dateutil in ./venv/lib/python3.13/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2.9.0.post0)\n",
      "Requirement already satisfied: six>=1.5 in ./venv/lib/python3.13/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (1.16.0)\n",
      "Requirement already satisfied: attrs in ./venv/lib/python3.13/site-packages (24.2.0)\n",
      "Requirement already satisfied: typing_extensions in ./venv/lib/python3.13/site-packages (4.12.2)\n"
     ]
    }
   ],
   "source": [
    "!pip install -U cdsapi\n",
    "!pip install -U attrs\n",
    "!pip install -U typing_extensions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Install the d4science_copernicus_cds Library\n",
    "\n",
    "Next, install the `d4science_copernicus_cds` library from the D4Science Git repository. This library will handle authentication for the Climate Data Store (CDS) API in the JupyterLab environment, allowing you to request data seamlessly across multiple notebooks.\n",
    "\n",
    "Once installed, the library will be ready for use, and you can proceed with authenticating and configuring your environment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -U git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import d4science_copernicus_cds Functions\n",
    "\n",
    "With the `d4science_copernicus_cds` library installed, we can now import the main functions for managing CDS API authentication and configuration. These functions provide a range of capabilities for handling credentials and data directories.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from d4science_copernicus_cds import (\n",
    "    cds_authenticate,\n",
    "    cds_get_credentials,\n",
    "    cds_show_conf,\n",
    "    cds_save_conf,\n",
    "    cds_remove_conf,\n",
    "    cds_remove_env,\n",
    "    cds_datadir\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Authenticate with the CDS API\n",
    "\n",
    "To begin accessing data from the Climate Data Store (CDS), start by running the `cds_authenticate()` function to initialize authentication:\n",
    "\n",
    "#### First-Time Setup\n",
    "The first time you run this function, it will prompt you to enter your CDS API credentials:\n",
    "\n",
    "1. **URL**: The URL field is prefilled with the default CDS API endpoint. Simply press Enter to accept the default.\n",
    "2. **KEY**: You will need to enter your personal API key. To obtain it:\n",
    "   - Register or log in to the CDS at [https://cds-beta.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
    "   - Visit [https://cds-beta.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
    "3. **Saving the Credentials**: After entering the key, the function will ask if you want to save the credentials in a hidden configuration file in your workspace. Press \"y\" to confirm saving, which will allow future sessions to load the credentials automatically.\n",
    "\n",
    "#### Subsequent Sessions\n",
    "Once saved, `cds_authenticate()` will detect and load the credentials from the environment or configuration file automatically, without requiring further interaction. This setup enables seamless, secure access to the CDS API across sessions.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = cds_authenticate()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### View Current Configuration\n",
    "\n",
    "The `cds_show_conf()` function displays the current configuration settings, including the credentials and any other parameters related to your Climate Data Store (CDS) API setup.\n",
    "\n",
    "This function will output:\n",
    "\n",
    "- **Environment-Based Credentials**: If credentials are stored in environment variables, they will be displayed here.\n",
    "- **Saved Configuration File**: If a configuration file exists in your workspace, the function will show the credentials and settings retrieved from it.\n",
    "\n",
    "This display helps verify that your credentials are correctly set up and allows you to check whether they are being loaded from the environment or from a saved configuration file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cds_show_conf()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieve CDS API Credentials\n",
    "\n",
    "The `cds_get_credentials()` function retrieves your CDS API credentials, returning both the **URL** and **KEY** used for authentication\n",
    "\n",
    "If the credentials are already set in the environment or saved in a configuration file, `cds_get_credentials()` will load them directly. \n",
    "\n",
    "#### Automatic Authentication Check\n",
    "If no credentials are found, `cds_get_credentials()` will automatically invoke `cds_authenticate()` to prompt you for your credentials. This ensures that you don't need to call `cds_authenticate()` separately beforehand, as `cds_get_credentials()` will handle it if necessary.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "URL, KEY = cds_get_credentials()\n",
    "print(\"URL\", URL)\n",
    "print (\"KEY\", KEY)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save CDS API Credentials\n",
    "\n",
    "The `cds_save_conf()` function saves your CDS API credentials to a hidden configuration file in your workspace. This setup allows future sessions to load the credentials automatically, so you won’t need to re-enter them.\n",
    "When executed, this function:\n",
    "\n",
    "- Retrieves your current credentials (if already set in the environment).\n",
    "- Prompts you to confirm saving them in a hidden file in your workspace.\n",
    "\n",
    "Once saved, the credentials will be securely stored and automatically loaded in future sessions, ensuring seamless authentication with the CDS API without requiring additional input.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cds_save_conf()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Remove Saved Configuration from Workspace\n",
    "\n",
    "The `cds_remove_conf()` function removes the saved configuration file from your workspace. This is useful if you want to clear your stored credentials, so future sessions will require re-authentication.\n",
    "\n",
    "To avoid unintentional execution, this line is **commented** out by default. **Remove the comment symbol (`#`) to execute**\n",
    "\n",
    "When executed, this function permanently deletes the saved configuration file from your workspace, so the credentials will no longer be automatically loaded. You will be prompted to re-enter them next time you authenticate.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cds_remove_conf()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Remove Credentials from Environment Variables\n",
    "\n",
    "The `cds_remove_env()` function removes the CDS API credentials from the environment variables. This is helpful if you want to clear the credentials from the current session without affecting any saved configuration files.\n",
    "\n",
    "To prevent accidental execution, this line is **commented** out by default. **Remove the comment symbol (`#`) to execute**\n",
    "\n",
    "When executed, this function clears the credentials stored in environment variables. This action will require you to re-authenticate in the current session or any future sessions if there is no saved configuration file.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cds_remove_env()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Verify Removal of Credentials with `cds_show_conf()`\n",
    "\n",
    "After using `cds_remove_env()` and `cds_remove_conf()` to clear your credentials from both the environment and workspace, you can run `cds_show_conf()` to confirm that all credentials have been removed.\n",
    "\n",
    "This function will display any remaining credentials in the environment or configuration file. If both `cds_remove_env()` and `cds_remove_conf()` have been successfully executed, `cds_show_conf()` should indicate that no credentials are currently set, confirming that your workspace and environment have been cleared.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cds_show_conf()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set or Get the Data Directory with `cds_datadir()`\n",
    "\n",
    "The `cds_datadir()` function sets or retrieves a data directory for saving CDS downloads. Instead of directly using the specified folder name, it appends the provided label to a timestamp-based directory structure, making each directory unique.\n",
    "\n",
    "For example, to create a timestamped data directory with the label `\"example\"`, use:\n",
    "\n",
    "!!!python\n",
    "datadir = cds_datadir(\"example\")\n",
    "!!!\n",
    "\n",
    "This will create a directory with a timestamped format, such as:\n",
    "`/home/jovyan/cds_dataDir/out_2024_11_04_13_58_38_example/`\n",
    "\n",
    "- **Timestamped Directory**: The function appends a timestamp to the base directory, followed by the label `\"example\"`. This ensures that each call to `cds_datadir()` creates a unique directory, ideal for organizing data downloads by session or task.\n",
    "- **Custom Labels**: Use labels like `\"example\"` to organize or categorize downloads. Each call to `cds_datadir()` with a different label or at a different time will create a new directory, keeping data isolated and organized.\n",
    "\n",
    "This approach simplifies managing multiple data download sessions and ensures your data files are organized with minimal manual intervention.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "datadir: %s /home/jovyan/cds_dataDir/out_2024_11_04_13_58_38_example/\n"
     ]
    }
   ],
   "source": [
    "datadir = cds_datadir(\"example\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "datadir_current = cds_datadir(\"current_example\", basepath=\"./out\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set or Get the Data Directory with a Custom Base Path\n",
    "\n",
    "The `cds_datadir()` function also allows specifying a custom base path for saving CDS data downloads. This function appends a timestamp and a label to the provided base path, creating a unique, organized directory structure.\n",
    "\n",
    "For example, to set a custom base path `\"./out\"` with the label `\"current_example\"`, use:\n",
    "\n",
    "```python\n",
    "datadir_current = cds_datadir(\"current_example\", basepath=\"./out\")\n",
    "```\n",
    "\n",
    "This will create a directory structure with a timestamped format, such as\n",
    "\n",
    "`./out/out_2024_11_04_13_58_38_current_example/`\n",
    "\n",
    "\n",
    "- **Timestamped Directory**: The function automatically adds a timestamp and the provided label to create a unique directory. This is useful for organizing data by session or task.\n",
    "- **Custom Base Path**: By specifying `basepath=\"./out\"`, the data directory will be created within the specified location rather than the default path.\n",
    "\n",
    "This setup provides flexibility, allowing you to easily organize data downloads across multiple directories and ensuring a clear, timestamped folder structure for each session.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "datadir: %s ./out/out_2024_11_04_14_02_53_current_example/\n"
     ]
    }
   ],
   "source": [
    "datadir_current = cds_datadir(\"current_example\", basepath=\"./out\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/requirements_tutorials.txt
+++ b/requirements_tutorials.txt
@ -0,0 +1,14 @@
 numpy
 xarray
 matplotlib
 cartopy
 cdsapi>=0.7.2
 setuptools
 netcdf4
 scipy
 attrs
 typing_extensions
 xarray 
 zarr
 dask 
 fsspec
--- a/tutorials/01_reanalysis/01x01_reanalysis-climatology.ipynb
+++ b/tutorials/01_reanalysis/01x01_reanalysis-climatology.ipynb
--- a/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb
+++ b/tutorials/01_reanalysis/01x02_reanalysis-temp-record.ipynb
--- a/tutorials/01_reanalysis/01x03_reanalysis-heatwave.ipynb
+++ b/tutorials/01_reanalysis/01x03_reanalysis-heatwave.ipynb
@ -0,0 +1,815 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analysis of September 2020 European Heatwave using ERA5 Climate Reanalysis Data from C3S"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### d4science_copernicus_cds Library\n",
    "\n",
    "To request data from the Climate Data Store (CDS) programmatically using the CDS API, we will manage our authentication with the `d4science_copernicus_cds` library.\n",
    "\n",
    "The library prompts us to enter our credentials, which are then securely saved in our workspace. **This request is only made the first time**; afterward, the `get_credentials` function will automatically retrieve the credentials from the environment or workspace, eliminating the need to re-enter them in the Jupyter notebook.\n",
    "\n",
    "To obtain your API credentials:\n",
    "1. Register or log in to the CDS at [https://cds-beta.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
    "2. Visit [https://cds-beta.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
    "\n",
    "The library will prompt you to enter:\n",
    "- **URL**: The URL field is prefilled; simply press Enter to accept the default.\n",
    "- **KEY**: Insert the obtained API key when prompted, then confirm saving your credentials by pressing \"y.\"\n",
    "\n",
    "Once saved, your credentials will be loaded automatically in future sessions, ensuring a seamless experience with the CDS API."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This tutorial is based on the official turorial **[CDS API guide](https://ecmwf-projects.github.io/copernicus-training-c3s/reanalysis-heatwave.html)**, extended and adapted for use in the **BlueCloud JupyterLab** environment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git\n",
      "  Cloning https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git to /tmp/pip-req-build-e1btmzy2\n",
      "Requirement already satisfied (use --upgrade to upgrade): d4science-copernicus-cds==1.0.0 from git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git in /opt/conda/lib/python3.8/site-packages\n",
      "Requirement already satisfied: cdsapi>=0.7.2 in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (0.7.4)\n",
      "Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (24.2.0)\n",
      "Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.8/site-packages (from d4science-copernicus-cds==1.0.0) (4.12.2)\n",
      "Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.24.0)\n",
      "Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.5.0)\n",
      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (4.60.0)\n",
      "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.10)\n",
      "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (3.0.4)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.25.10)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.12.5)\n",
      "Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (0.3.2)\n",
      "Requirement already satisfied: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2.8.1)\n",
      "Requirement already satisfied: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (2020.1)\n",
      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi>=0.7.2->d4science-copernicus-cds==1.0.0) (1.15.0)\n",
      "Building wheels for collected packages: d4science-copernicus-cds\n",
      "  Building wheel for d4science-copernicus-cds (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for d4science-copernicus-cds: filename=d4science_copernicus_cds-1.0.0-py3-none-any.whl size=12139 sha256=416e2a9e23046251c020f50b84296412d7b19399947d8eb390277d32b6d96a96\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-ypibuoib/wheels/52/8f/79/78b8dae3ae67225c9ad8417f73f2b630b4ad077f0a27911303\n",
      "Successfully built d4science-copernicus-cds\n"
     ]
    }
   ],
   "source": [
    "!pip install git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from d4science_copernicus_cds import cds_get_credentials, cds_datadir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "URL https://cds-beta.climate.copernicus.eu/api\n",
      "KEY db1f2085-6b8b-42e6-b832-625dfaf831a4\n"
     ]
    }
   ],
   "source": [
    "URL, KEY = cds_get_credentials()\n",
    "print(\"URL\", URL)\n",
    "print (\"KEY\", KEY)\n",
    "\n",
    "APIKEY = KEY # this tutorial uses a different variable name..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "cds_datadir will create a folder in our workspace, under cds_dataDir, with current timestamp and custom label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "datadir: %s /home/jovyan/cds_dataDir/out_2024_11_04_16_18_49_reanalysis_heatwave/\n",
      "/home/jovyan/cds_dataDir/out_2024_11_04_16_18_49_reanalysis_heatwave/\n"
     ]
    }
   ],
   "source": [
    "DATADIR = cds_datadir(\"reanalysis_heatwave\")\n",
    "print(DATADIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis \n",
    "\n",
    "In September 2020, a record-breaking heatwave occured in large parts of western Europe, ([see a description here](https://climate.copernicus.eu/september-brings-record-breaking-warm-temperatures-and-low-sea-ice)). The city of Lille in northern France for example experienced its hottest day in September 2020 since records began in 1945. In this tutorial we will analyse this event with data from the Climate Data Store (CDS) of the Copernicus Climate Change Service (C3S).\n",
    "\n",
    "The tutorial comprises the following steps:\n",
    "\n",
    "1. Search, download and view data\n",
    "2. View daily maximum 2m temperature for September 2020\n",
    "3. Compare maximum temperatures with climatology"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Search, download and view data\n",
    "\n",
    "Before we begin we must prepare our environment. This includes installing the Application Programming Interface (API) of the CDS, and importing the various python libraries that we will need.\n",
    "\n",
    "#### Install CDS API\n",
    "\n",
    "To install the CDS API, run the following command. We use an exclamation mark to pass the command to the shell (not to the Python interpreter)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: cdsapi in /opt/conda/lib/python3.8/site-packages (0.7.4)\n",
      "Requirement already satisfied: requests>=2.5.0 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (2.24.0)\n",
      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from cdsapi) (4.60.0)\n",
      "Requirement already satisfied: cads-api-client>=1.4.7 in /opt/conda/lib/python3.8/site-packages (from cdsapi) (1.5.0)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (1.25.10)\n",
      "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (3.0.4)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2020.12.5)\n",
      "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.5.0->cdsapi) (2.10)\n",
      "Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (24.2.0)\n",
      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (4.12.2)\n",
      "Requirement already satisfied: multiurl>=0.3.2 in /opt/conda/lib/python3.8/site-packages (from cads-api-client>=1.4.7->cdsapi) (0.3.2)\n",
      "Requirement already satisfied: pytz in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2020.1)\n",
      "Requirement already satisfied: python-dateutil in /opt/conda/lib/python3.8/site-packages (from multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (2.8.1)\n",
      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil->multiurl>=0.3.2->cads-api-client>=1.4.7->cdsapi) (1.15.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install cdsapi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import libraries\n",
    "\n",
    "We will be working with data in NetCDF format. To best handle this data we will use libraries for working with multidimensional arrays, in particular Xarray. We will also need libraries for plotting and viewing data, in this case we will use Matplotlib and Cartopy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# CDS API\n",
    "import cdsapi\n",
    "\n",
    "# Libraries for working with multidimensional arrays\n",
    "import numpy as np\n",
    "import xarray as xr\n",
    "\n",
    "# Libraries for plotting and visualising data\n",
    "import matplotlib.path as mpath\n",
    "import matplotlib.pyplot as plt\n",
    "import cartopy.crs as ccrs\n",
    "from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER\n",
    "import cartopy.feature as cfeature\n",
    "\n",
    "# Disable warnings for data download via API\n",
    "import urllib3 \n",
    "urllib3.disable_warnings()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ~Enter your CDS API key~\n",
    "\n",
    "~We will request data from the Climate Data Store (CDS) programmatically with the help of the CDS API. Let us make use of the option to manually set the CDS API credentials.~\n",
    "\n",
    "~First, you have to define two variables: `URL` and `KEY` which build together your CDS API key.~\n",
    "\n",
    "~The string of characters that make up your KEY include your personal User ID and CDS API key. To obtain these, first register or login to the CDS (https://cds-beta.climate.copernicus.eu), then visit https://cds-beta.climate.copernicus.eu/how-to-api and copy the string of characters listed after \"key:\". Replace the `#########` below with this string.~\n",
    "~URL = 'https://cds-beta.climate.copernicus.eu/api'~\n",
    "\n",
    "~KEY = 'xxx'~\n",
    "\n",
    "~Here we specify a data directory in which we will download our data and all output files that we will generate:~\n",
    "\n",
    "~DATADIR = './'~"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Search for data\n",
    "\n",
    "To search for data, visit the CDS website: https://cds-beta.climate.copernicus.eu. To facilitate your search you can use keywords, or apply various filters. The data we are going to use in this exercise is the `ERA5 reanalysis data on single levels from 1979 to present`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Having selected the correct dataset, we now need to specify what product type, variables, temporal and geographic coverage we are interested in. These can all be selected in the **\"Download data\"** tab. In this tab a form appears in which we will select the following parameters to download. We will choose a subset area of 1x1 degrees, corresponding to a region of around 111km North/South and 72km East/West in Belgium and Northern France, around the city of Lille:\n",
    "\n",
    "- Product type: `Reanalysis`\n",
    "- Variable: `2m temperature`\n",
    "- Year: `all`\n",
    "- Month: `September`\n",
    "- Day: `all`\n",
    "- Time: `all`\n",
    "- Geographical area: `North: 51`, `East: 4`, `South: 50`, `West: 3`\n",
    "- Format: `NetCDF`\n",
    "\n",
    "![logo](./img/Notebook3_data.png)\n",
    "\n",
    "At the end of the download form, select **\"Show API request\"**. This will reveal a block of code, which you can simply copy and paste into a cell of your Jupyter Notebook (see cell below) ...\n",
    "\n",
    "#### Download data\n",
    "\n",
    "... having copied the API request into the cell below, running this will retrieve and download the data you requested into your local directory. However, before you run the cell below, the **terms and conditions** of this particular dataset need to have been accepted in the CDS. The option to view and accept these conditions is given at the end of the download form, just above the **\"Show API request\"** option."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-11-04 16:18:53,264 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your \n",
      "[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.\n",
      "2024-11-04 16:18:53,265 WARNING [2024-09-26T00:00:00] Should you have not yet migrated from the old CDS system to the new CDS, please check our [informative page](https://confluence.ecmwf.int/x/uINmFw) for guidance.\n",
      "2024-11-04 16:18:53,266 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
      "2024-11-04 16:18:53,266 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**\n",
      "2024-11-04 16:18:53,267 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
      "2024-11-04 16:18:53,524 WARNING [2024-10-10T00:00:00] The final validated ERA5 differs from ERA5T in July 2024 - please refer to our\n",
      "[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)\n",
      "for details and watch it for further updates on this.\n",
      "2024-11-04 16:18:53,524 INFO Request ID is dd7189c4-f6ff-4652-aad5-74d2afb2f424\n",
      "2024-11-04 16:18:53,570 INFO status has been updated to accepted\n"
     ]
    }
   ],
   "source": [
    "c = cdsapi.Client(url=URL, key=KEY)\n",
    "c.retrieve(\n",
    "    'reanalysis-era5-single-levels',\n",
    "    {\n",
    "        'product_type': 'reanalysis',\n",
    "        'data_format': 'netcdf_legacy',\n",
    "        'variable': '2m_temperature',\n",
    "        'year': [\n",
    "            '1979', '1980', '1981',\n",
    "            '1982', '1983', '1984',\n",
    "            '1985', '1986', '1987',\n",
    "            '1988', '1989', '1990',\n",
    "            '1991', '1992', '1993',\n",
    "            '1994', '1995', '1996',\n",
    "            '1997', '1998', '1999',\n",
    "            '2000', '2001', '2002',\n",
    "            '2003', '2004', '2005',\n",
    "            '2006', '2007', '2008',\n",
    "            '2009', '2010', '2011',\n",
    "            '2012', '2013', '2014',\n",
    "            '2015', '2016', '2017',\n",
    "            '2018', '2019', '2020',\n",
    "        ],\n",
    "        'month': '09',\n",
    "        'day': [\n",
    "            '01', '02', '03',\n",
    "            '04', '05', '06',\n",
    "            '07', '08', '09',\n",
    "            '10', '11', '12',\n",
    "            '13', '14', '15',\n",
    "            '16', '17', '18',\n",
    "            '19', '20', '21',\n",
    "            '22', '23', '24',\n",
    "            '25', '26', '27',\n",
    "            '28', '29', '30',\n",
    "        ],\n",
    "        'time': [\n",
    "            '00:00', '01:00', '02:00',\n",
    "            '03:00', '04:00', '05:00',\n",
    "            '06:00', '07:00', '08:00',\n",
    "            '09:00', '10:00', '11:00',\n",
    "            '12:00', '13:00', '14:00',\n",
    "            '15:00', '16:00', '17:00',\n",
    "            '18:00', '19:00', '20:00',\n",
    "            '21:00', '22:00', '23:00',\n",
    "        ],\n",
    "        'area': [\n",
    "            51, 3, 50,\n",
    "            4,\n",
    "        ],\n",
    "    },\n",
    "    f'{DATADIR}NFrance_hourly_Sep.nc')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Inspect Data\n",
    "\n",
    "We have requested the data in NetCDF format. This is a commonly used format for array-oriented scientific data. To read and process this data we will make use of the Xarray library. Xarray is an open source project and Python package that makes working with labelled multi-dimensional arrays simple and efficient. We will read the data from our NetCDF file into an [xarray.Dataset](https://xarray.pydata.org/en/stable/generated/xarray.Dataset.html)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = f'{DATADIR}NFrance_hourly_Sep.nc'\n",
    "# Create Xarray Dataset\n",
    "ds = xr.open_dataset(filename)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can query our newly created Xarray dataset ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We see that the dataset has one variable called **\"t2m\"**, which stands for \"2 metre temperature\", and three coordinates of **longitude**, **latitude** and **time**.\n",
    "\n",
    "Select the icons to the right of the table above to expand the attributes of the coordinates and data variables. What are the units of the temperature data?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "While an Xarray **dataset** may contain multiple variables, an Xarray **data array** holds a single variable (which may still be multi-dimensional) and its coordinates. To make the processing of the **t2m** data easier, we convert in into an Xarray data array:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da = ds['t2m']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's convert the units of the 2m temperature data from Kelvin to degrees Celsius. The formula for this is simple: degrees Celsius = Kelvin - 273.15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t2m_C = da - 273.15"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## View daily maximum 2m temperature for September 2020\n",
    "\n",
    "As a next step, let us visualize the daily maximum 2m air temperature for September 2020. From the graph, we should be able to identify which day in September was hottest in the area around Lille."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First we average over the subset area:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> **Note:** The size covered by each data point varies as a function of latitude. We need to take this into account when averaging. One way to do this is to use the cosine of the latitude as a proxy for the varying sizes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "weights = np.cos(np.deg2rad(t2m_C.latitude))\n",
    "weights.name = \"weights\"\n",
    "t2m_C_weighted = t2m_C.weighted(weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_t2m = t2m_C_weighted.mean([\"longitude\", \"latitude\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we select only the data for 2020:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_2020 = Lille_t2m.sel(time='2020')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can now calculate the max daily 2m temperature for each day in September 2020:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_2020_max = Lille_2020.groupby('time.day').max('time')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's plot the results in a chart:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize = (12, 6))\n",
    "\n",
    "ax.plot(Lille_2020_max.day, Lille_2020_max)\n",
    "ax.set_title('Max daily t2m for Sep 2020 in Lille region')\n",
    "ax.set_ylabel('° C')\n",
    "ax.set_xlabel('day')\n",
    "ax.grid(linestyle='--')\n",
    "for i,j in zip(Lille_2020_max.day, np.around(Lille_2020_max.values, 0).astype(int)):\n",
    "    ax.annotate(str(j),xy=(i,j))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('The maximum temperature in September 2020 in this area was', \n",
    "      np.around(Lille_2020_max.max().values, 1), 'degrees Celsius.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Which day in September had the highest maximum temperature?\n",
    "\n",
    "Is this typical for Northern France? How does this compare with the long term average? We will seek to answer these questions in the next section."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare maximum temperatures with climatology\n",
    "We will now seek to discover just how high the temperature for Lille in mid September 2020 was when compared with typical values exptected in this region at this time of year. To do that we will calculate the climatology of maximum daily 2m temperature for each day in September for the period of 1979 to 2019, and compare these with our values for 2020."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First we select all data prior to 2020:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_past = Lille_t2m.loc['1979':'2019']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we calculate the climatology for this data, i.e. the average values for each of the days in September for a period of several decades (from 1979 to 2019)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To do this, we first have to extract the maximum daily value for each day in the time series:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_max = Lille_past.resample(time='D').max().dropna('time')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will then calculate various quantiles of the maximum daily 2m temperatures for the 40 year time series for each day in September:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_max_max = Lille_max.groupby('time.day').max()\n",
    "Lille_max_min = Lille_max.groupby('time.day').min()\n",
    "Lille_max_mid = Lille_max.groupby('time.day').quantile(0.5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's plot this data. We will plot the, maximum, minimum and 50th quantile of the maximum daily temperature to have an idea of the expected range in this part of France in September, and compare this range with the values for 2020:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(16,8))\n",
    "ax = plt.subplot()\n",
    "\n",
    "ax.plot(Lille_2020_max.day, Lille_max_mid, color='green', label='Daily max t2m 50th quantile')\n",
    "ax.plot(Lille_2020_max.day, Lille_2020_max, 'bo-', color='darkred', label='Daily max t2m Sep 2020')\n",
    "ax.fill_between(Lille_2020_max.day, Lille_max_max, Lille_max_min, alpha=0.1, \n",
    "                label='Max and min values of max t2m from 1979 to 2019')\n",
    "\n",
    "ax.set_xlim(1,30)\n",
    "ax.set_ylim(10,33)\n",
    "ax.set_title('Daily max t2m for Sep 2020 compared with climatology for Sep from 1979 to 2019')\n",
    "ax.set_ylabel('t2m (Celsius)')\n",
    "ax.set_xlabel('day')\n",
    "handles, labels = ax.get_legend_handles_labels()\n",
    "ax.legend(handles, labels)\n",
    "ax.grid(linestyle='--')\n",
    "\n",
    "fig.savefig(f'{DATADIR}Max_t2m_clim_Sep_Lille.png')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Interestingly, we see from this plot that while the temperatures from 14 to 16 Sep 2020 were the highest in the ERA5 dataset, on 25 September 2020, the lowest of the maximum temperatures was recorded for this dataset."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will now look more closely at the probability distribution of maximum temperatures for 15 September in this time period. To do this, we will first select only the max daily temperature for 15 September, for each year in the time series:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_max = Lille_max.dropna('time', how='all')\n",
    "Lille_15 = Lille_max[14::30]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will then plot the histogram of this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_15.plot.hist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Look at the range of maximum temperatures for 15 September in the period from 1979 to 2019. Has the temperature in this period ever exceeded that of 15 September 2020?\n",
    "\n",
    "The histogram shows the distribution of maximum temperature of one day in each year of the time series, which corresponds to 41 samples. In order to increase the number of samples, let's plot the histogram of maximum temperatures on 15 September, plus or minus three days. This would increase our number of samples by a factor of seven.\n",
    "\n",
    "To do this, we first need to produce an index that takes the maximum 2m air temperature values from 12 to 18 September (15 September +/- three days) from every year in the time series. The first step is to initiate three numpy arrays:\n",
    "* `years`: with the number of years [0:40]\n",
    "* `days_in_sep`: index values of day range [11:17]\n",
    "* `index`: empty numpy array with 287 (41 years * 7) entries\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "years = np.arange(41)\n",
    "days_in_sep = np.arange(11,18)\n",
    "index = np.zeros(287)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In a next step, we then loop through each entry of the `years` array and fill the empty `index` array year by year with the correct indices of the day ranges for each year. The resulting array contains the index values of interest."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in years:\n",
    "    index[i*7:(i*7)+7] = days_in_sep + (i*30)\n",
    "index = index.astype(int)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We then apply this index to filter the array of max daily temperature from 1979 to 2019. The resulting object is an array of values representing the maximum 2m air temperature in Lille between 12 and 18 September for each year from 1979 to 2019:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Lille_7days = Lille_max.values[index]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can plot the histogram of maximum daily temperatures in the days 12-18 September from 1979-2019:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize = (12, 6))\n",
    "\n",
    "ax.hist(Lille_7days, bins = np.arange(10,32,1), color='lightgrey', ec='grey')\n",
    "ax.set_title('Histogram of maximum 2m temperature in the days from 12-18 Sep in the period 1979-2019')\n",
    "ax.set_xticks(np.arange(10,32,1))\n",
    "ax.set_ylabel('Accumulated days')\n",
    "ax.set_xlabel('Maximum 2m temperature (° C)')\n",
    "\n",
    "fig.savefig(f'{DATADIR}Hist_max_t2m_mid-Sep_1979-2019.png')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the histogram above, you see that even if we take an increased sample covering a wider temporal range, the maximum daily temperature still never reached that of 15 September 2020. To increase the sample even further, you could include data from a longer time period. The C3S reanalysis dataset now extends back to 1940 and is accessible here [ERA5 hourly data on single levels from 1940 to present](https://cds-beta.climate.copernicus.eu/datasets?q=era5+hourly+single+levels)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/tutorials/02_observation/02x01_erb-outgoing-longwave-radiation.ipynb
+++ b/tutorials/02_observation/02x01_erb-outgoing-longwave-radiation.ipynb
@ -0,0 +1,905 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Calculation of global distribution and timeseries of Outgoing Longwave Radiation (OLR) using NOAA/NCEI HIRS data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook-tutorial provides a practical introduction to the HIRS dataset available on \n",
    "[C3S Earth's radiation budget from 1979 to present derived from satellite observations](https://cds-beta.climate.copernicus.eu/datasets/satellite-earth-radiation-budget?tab=overview).  \n",
    "<br>\n",
    "We give a short introduction to the ECV Earth Radiation Budget, Outgoing Longwave Radiation (OLR) and provide three use cases of the dataset: plot the time-averaged global distribution of OLR (Use Case 1), calculate global timeseries of OLR (Use Case 2) and plot the Arctic weighted mean timeseries between 1979 and 2019 (Use Case 3).\n",
    "We provide step-by-step instructions on data preparation. Use cases come with extensive documentation and each line of code is explained.  \n",
    "Two figures below are results of Use Case 1 and 2, and the result of a successful run of the code."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This tutorial is based on the official tutorial **[CDS API guide](https://ecmwf-projects.github.io/copernicus-training-c3s/reanalysis-climatology.html)**, extended and adapted for use in the **BlueCloud JupyterLab** environment.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TUTORIA CODE FIX\n",
    "\n",
    "the code of the official tutorial is not compatible with the new format\n",
    "this notebook fixes the download code and the logic to process it"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### d4science_copernicus_cds Library\n",
    "\n",
    "To request data from the Climate Data Store (CDS) programmatically using the CDS API, we will manage our authentication with the `d4science_copernicus_cds` library.\n",
    "\n",
    "The library prompts us to enter our credentials, which are then securely saved in our workspace. **This request is only made the first time**; afterward, the `get_credentials` function will automatically retrieve the credentials from the environment or workspace, eliminating the need to re-enter them in the Jupyter notebook.\n",
    "\n",
    "To obtain your API credentials:\n",
    "1. Register or log in to the CDS at [https://cds-beta.climate.copernicus.eu](https://cds-beta.climate.copernicus.eu).\n",
    "2. Visit [https://cds-beta.climate.copernicus.eu/how-to-api](https://cds-beta.climate.copernicus.eu/how-to-api) and copy the API key provided.\n",
    "\n",
    "The library will prompt you to enter:\n",
    "- **URL**: The URL field is prefilled; simply press Enter to accept the default.\n",
    "- **KEY**: Insert the obtained API key when prompted, then confirm saving your credentials by pressing \"y.\"\n",
    "\n",
    "Once saved, your credentials will be loaded automatically in future sessions, ensuring a seamless experience with the CDS API."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install git+https://code-repo.d4science.org/D4Science/d4science_copernicus_cds.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from d4science_copernicus_cds import cds_get_credentials, cds_datadir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "URL, KEY = cds_get_credentials()\n",
    "print(\"URL\", URL)\n",
    "print (\"KEY\", KEY)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "cds_datadir will create a folder in our workspace, under cds_dataDir, with current timestamp and custom label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATADIR = cds_datadir(\"erb-outgoing-longwave-radiation\")\n",
    "print(DATADIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "The notebook has three main sections with the following outline:\n",
    "\n",
    "## Table of Contents\n",
    "\n",
    "* [1. Introduction](#intro)\n",
    "* [2. Prerequisites and data preparations](#preparation)\n",
    "* [3. Use cases](#usecases)\n",
    "    * [Use case 1: Climatology of the Outgoing Longwave Radiation (OLR)](#global-climatology)\n",
    "    * [Use case 2: Global time series of the OLR](#timeseries-global)\n",
    "    * [Use case 3: OLR evolution in the Arctic](#timeseries-arctic)\n",
    "* [References](#references)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "\n",
    "\n",
    "## 1. Introduction\n",
    "\n",
    "<br>\n",
    "<div style=\"text-align: justify\"> The Top-of-atmosphere (TOA) Outgoing Longwave Radiation (OLR) is one of the Global Climate Observing System (GCOS) Essential Climate Variables (ECV), and it is a key component of the Earth’s Radiation Budget (ERB). OLR combines the thermal radiation from the atmosphere and the thermal radiation from the Earth’s surface that leaves the atmosphere through the “window” regions of the electromagnetic spectrum. OLR is the reason why the Earth is not heating up indefinitely by the Sun. OLR has a latitudinal dependence, but it is not uniform mostly because of the difference in the cloud cover. The biggest volcanic eruptions emit small particles in the stratosphere, where they stay for decades and can reduce the global OLR for years. During the last decades, the average temperatures in the Arctic region have risen significantly. The Arctic region is mostly cloudless, so the longwave emission from the Earth contributes to the TOA OLR. </div>\n",
    "\n",
    "<br>\n",
    "<div style=\"text-align: justify\"> In the Copernicus Climate Data Store (CDS), a dataset of OLR has been brokered from the High Resolution Infrared Radiation Sounder (HIRS) Outgoing Longwave Radiation (OLR) Climate Data Record (CDR) version v02r07. The Copernicus Climate Change Service (C3S) CDS provides both data produced specifically for C3S and so-called brokered products. The later are existing products produced under an independent programme or project which are made available through the CDS. This widely used CDR has been produced by the NOAA/NCEI from the HIRS instruments on board the US NOAA and European MetOp satellites. It provides monthly means of the Thermal (Longwave) Radiation Fluxes at the Top of the Atmosphere (TOA) from January 1979 to present, with global coverage and at a 2.5° x 2.5° spatial resolution. </div>  \n",
    "<br>\n",
    "\n",
    "Please find further information about the dataset as well as the data in the Climate Data Store catalogue entry Earth's Radiation Budget, sections \"Overview\", \"Download data\" and \"Documentation\": \n",
    "- [Earth's Radiation Budget from 1979 to present derived from satellite observations](https://cds-beta.climate.copernicus.eu/datasets/satellite-earth-radiation-budget?tab=overview)\n",
    "\n",
    "The tutorial video describes the \"Earth Radiation Budget\" Essential Climate Variable and the methods and satellite instruments used to produce the data provided in the CDS catalogue entry:  \n",
    "- [Tutorial video on the Earth Radiation Budget Essential Climate Variable](https://datastore.copernicus-climate.eu/documents/satellite-earth-radiation-budget/C3S_D312b_Lot1.4.2.5_201902_Tutorial_ECVEarthRadiationBudget_v1.4.mp4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "\n",
    "\n",
    "## 2. Prerequisites and data preparations\n",
    "\n",
    "This chapter provides information on how to: run the notebook, lists necessary python libraries, and guides you through the process of data on data preparation: how to search and download the data via CDS API, and get it ready to be used.\n",
    "\n",
    "### 2.1 How to access the notebook\n",
    "\n",
    "This tutorial is in the form of a [Jupyter notebook](https://jupyter.org/). You will not need to install any software for the training as there are a number of free cloud-based services to create, edit, run and export Jupyter notebooks such as this. Here are some suggestions (simply click on one of the links below to run the notebook):"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<style>\n",
    "td, th {\n",
    "   border: 1px solid white;\n",
    "   border-collapse: collapse;\n",
    "}\n",
    "</style>\n",
    "<table align=\"left\">\n",
    "  <tr>\n",
    "    <th>Run the tutorial via free cloud platforms: </th>\n",
    "    <th><a href=\"https://kaggle.com/kernels/welcome?src=https://github.com/ecmwf-projects/copernicus-training-c3s/blob/main/ecv-notebooks/erb-outgoing-longwave-radiation.ipynb\">\n",
    "        <img src = \"https://kaggle.com/static/images/open-in-kaggle.svg\" alt = \"Kaggle\"></th>\n",
    "    <th><a href=\"https://mybinder.org/v2/gh/ecmwf-projects/copernicus-training-c3s/main?labpath=ecv-notebooks/erb-outgoing-longwave-radiation.ipynb\">\n",
    "        <img src = \"https://mybinder.org/badge.svg\" alt = \"Binder\"></th>\n",
    "    <th><a href=\"https://colab.research.google.com/github/ecmwf-projects/copernicus-training-c3s/blob/main/ecv-notebooks/erb-outgoing-longwave-radiation.ipynb\">\n",
    "        <img src = \"https://colab.research.google.com/assets/colab-badge.svg\" alt = \"Colab\"></th>\n",
    "  </tr>\n",
    "</table>\n",
    "\n",
    "<br><br>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We are using `cdsapi` to download the data. This package is not yet included by default on most cloud platforms. You can use `pip` to install it:\n",
    "\n",
    "\n",
    "```!pip install cdsapi```  \n",
    "\n",
    "\n",
    "**Run the tutorial in local environment**  \n",
    "If you would like to run this notebook in your own environment, we suggest you install [Anaconda](https://docs.anaconda.com/anaconda/install/), which is the easiest way to get all libraries installed.  \n",
    "We will be working with data in NetCDF format. To best handle NetCDF data we will use libraries for working with multidimensional arrays, in particular [Numpy](https://numpy.org/doc/stable/), [Pandas](https://pandas.pydata.org/docs/) and [Xarray](http://xarray.pydata.org/en/stable/). We will also need libraries for plotting and viewing data, in this case, we will use [Matplotlib](https://matplotlib.org/stable/index.html) and [Cartopy](https://scitools.org.uk/cartopy/docs/latest/). Pylab and [urllib3](https://urllib3.readthedocs.io/en/stable/) change the plot style, and urllib for disabling warnings for data download via CDS API."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-29T12:13:38.688540Z",
     "start_time": "2018-11-29T12:13:38.497044Z"
    }
   },
   "source": [
    "### 2.2 Import libraries\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-07-14T12:55:43.443614Z",
     "start_time": "2022-07-14T12:55:42.379733Z"
    }
   },
   "outputs": [],
   "source": [
    "# CDS API library\n",
    "import cdsapi\n",
    "\n",
    "# Libraries for working with multidimensional arrays\n",
    "import xarray as xr\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# Library to work with zip-archives, OS-functions and pattern expansion\n",
    "import zipfile\n",
    "import os\n",
    "\n",
    "# Libraries for plotting and visualising data\n",
    "import matplotlib.pyplot as plt\n",
    "import cartopy.crs as ccrs\n",
    "import cartopy.feature as cfeature\n",
    "\n",
    "# Disable warnings for data download via API\n",
    "import urllib3\n",
    "urllib3.disable_warnings()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3 Download data using CDS API\n",
    "\n",
    "This subsection describes the process of data search in the CDS catalogue, and data preparation for use in the use cases."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ~Enter your CDS API key~\n",
    "\n",
    "~We will request data from the Climate Data Store (CDS) programmatically with the help of the CDS API. Let us make use of the option to manually set the CDS API credentials.~\n",
    "\n",
    "~First, you have to define two variables: `URL` and `KEY` which build together your CDS API key.~\n",
    "\n",
    "~The string of characters that make up your KEY include your personal User ID and CDS API key. To obtain these, first register or login to the CDS (https://cds-beta.climate.copernicus.eu), then visit https://cds-beta.climate.copernicus.eu/how-to-api and copy the string of characters listed after \"key:\". Replace the `#########` below with this string.~\n",
    "\n",
    "~URL = 'https://cds-beta.climate.copernicus.eu/api'~\n",
    "\n",
    "~KEY = 'xxx'~\n",
    "\n",
    "~Here we specify a data directory in which we will download our data and all output files that we will generate:~\n",
    "\n",
    "~DATADIR = './'~"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filename for the zip file downloaded from the CDS\n",
    "download_zip_file = os.path.join(DATADIR, 'olr-monthly_v02r07.zip')\n",
    "# Filename for the netCDF file which contain the merged contents of the monthly files.\n",
    "merged_netcdf_file = os.path.join(DATADIR, 'olr-monthly_v02r07_197901_202207.nc')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Search for data\n",
    "\n",
    "To search for data, visit the CDS website: https://cds-beta.climate.copernicus.eu/.\n",
    "Here you can search for HIRS OLR data using the search bar. The data we need for this use case is the [Earth's Radiation Budget from 1979 to present derived from satellite observations](https://cds-beta.climate.copernicus.eu/datasets/satellite-earth-radiation-budget?tab=overview).\n",
    "The Earth Radiation Budget (ERB) comprises the quantification of the incoming radiation from the Sun and the outgoing reflected shortwave and emitted longwave radiation. This catalogue entry comprises data from a number of sources."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Having selected the correct catalogue entry, we now need to specify what origin, variables, temporal and geographic coverage we are interested in. \n",
    "These can all be selected in the **\"Download data\"** tab. In this tab a form appears in which we will select the following parameters to download:\n",
    "\n",
    "- Origin: `NOAA/NCEI HIRS`\n",
    "- Variable: `Outgoing longwave radiation`\n",
    "- Year: `1979 to present` (use \"Select all\" button)\n",
    "- Month: `all`\n",
    "- Geographical area: `Whole available region` \n",
    "- Format: `Compressed zip file (.zip)`\n",
    "\n",
    "If you have not already done so, you will need to accept the **terms & conditions** of the data before you can download it.\n",
    "\n",
    "At the end of the download form, select **`Show API request`**. This will reveal a block of code, which you can simply copy and paste into a cell of your Jupyter Notebook (see cell below) ..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "the code in tutorial is not working anymore\n",
    "```\n",
    "c = cdsapi.Client(url=URL, key=KEY)\n",
    "c.retrieve(\n",
    "    'satellite-earth-radiation-budget',\n",
    "    {\n",
    "        'download_format': 'zip',\n",
    "        \n",
    "        'data_format': 'netcdf_legacy',\n",
    "        'origin': 'noaa_ncei_hirs',\n",
    "        'variable': 'outgoing_longwave_radiation',\n",
    "        'year': ['%04d' % (year) for year in range(1979, 2023)],\n",
    "        'month': ['%02d' % (mnth) for mnth in range(1, 13)],\n",
    "          \"version\": [\"2_7_reprocessed\"]\n",
    "    },\n",
    "    download_zip_file\n",
    ")\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "fixed version for new data format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = cdsapi.Client(url=URL, key=KEY)\n",
    "\n",
    "zip_files = []\n",
    "\n",
    "for year in range (1979, 2023):\n",
    "    # Filename for the zip file downloaded from the CDS\n",
    "    download_zip_file_year = os.path.join(DATADIR, '%d_olr-monthly_v02r07.zip' % year)\n",
    "    zip_files.append(download_zip_file_year)\n",
    "    \n",
    "    # Filename for the netCDF file which contain the merged contents of the monthly files.\n",
    "\n",
    "    dataset = 'satellite-earth-radiation-budget'\n",
    "    request = {\n",
    "        \"product_family\": \"hirs\",\n",
    "        \"origin\": \"noaa_ncei\",\n",
    "        'data_format': 'netcdf_legacy',\n",
    "        \"variable\": [\"outgoing_longwave_radiation\"],\n",
    "        \"climate_data_record_type\": \"thematic_climate_data_record\",\n",
    "        \"time_aggregation\": \"monthly_mean\",\n",
    "        'year': [year],\n",
    "        'month': ['%02d' % (mnth) for mnth in range(1, 13)],\n",
    "        \"version\": [\"2_7_reprocessed\"]\n",
    "    }\n",
    "\n",
    "    target = download_zip_file_year\n",
    "\n",
    "    client.retrieve(dataset, request, target)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Unpack and merge data\n",
    "\n",
    "The zip files are now requested and downloaded to the data directory that we specified earlier.\n",
    "For the purposes of this tutorial, we will unzip the archive and merge all files into one NetCDF file.\n",
    "After that, we delete all individual files.\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Unzip the data. The dataset is split in monthly files.\n",
    "# # Full HIRS dataset consists of more than 500 files.\n",
    "# with zipfile.ZipFile(download_zip_file, 'r') as zip_ref:\n",
    "#     filelist = [os.path.join(DATADIR, f) for f in zip_ref.namelist()]\n",
    "#     zip_ref.extractall(DATADIR)\n",
    "# # Ensure the filelist is in the correct order:\n",
    "# filelist = sorted(filelist)\n",
    "\n",
    "# # Merge all unpacked files into one.\n",
    "# ds = xr.open_mfdataset(filelist, concat_dim='time', combine='nested')\n",
    "# ds.to_netcdf(merged_netcdf_file)\n",
    "\n",
    "# # Recursively delete unpacked data, using library glob,\n",
    "# # that enables Unix style pathname pattern expansion\n",
    "# for f in filelist:\n",
    "#     os.remove(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# FIXED CODE FOR NEW DATA FORMAT\n",
    "\n",
    "# Initialize a list to hold the paths of all extracted files\n",
    "filelist = []\n",
    "\n",
    "# Process each zip file listed in zip_files\n",
    "for zip_file_path in zip_files:\n",
    "    # Define the extraction folder based on the zip file name (without .zip extension)\n",
    "    zip_file_name = os.path.basename(zip_file_path)  # Extracts the filename from the full path\n",
    "    extract_folder = os.path.join(DATADIR, zip_file_name.replace(\".zip\", \"\"))\n",
    "\n",
    "    # Ensure the extraction directory exists\n",
    "    os.makedirs(extract_folder, exist_ok=True)\n",
    "\n",
    "    # Extract and collect file paths\n",
    "    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
    "        # Extract all files into the specific directory for this zip\n",
    "        zip_ref.extractall(extract_folder)\n",
    "        \n",
    "        # Collect absolute paths of each extracted file\n",
    "        extracted_files = [os.path.join(extract_folder, f) for f in zip_ref.namelist() if os.path.isfile(os.path.join(extract_folder, f))]\n",
    "        filelist.extend(extracted_files)  # Add these files to the main list\n",
    "\n",
    "# Sort the list of all extracted files\n",
    "filelist = sorted(filelist)\n",
    "\n",
    "# Display the final list of all extracted file paths\n",
    "filelist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Merge all unpacked files into one.\n",
    "ds = xr.open_mfdataset(filelist, concat_dim='time', combine='nested')\n",
    "ds.to_netcdf(merged_netcdf_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Recursively delete unpacked data, using library glob,\n",
    "# that enables Unix style pathname pattern expansion\n",
    "for f in filelist:\n",
    "    os.remove(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "\n",
    "\n",
    "## 3. Use Cases\n",
    "\n",
    "\n",
    "\n",
    "### **Use case 1**: Climatology of the Outgoing Longwave Radiation (OLR)\n",
    "\n",
    "Firstly, we should get an overview of the parameter by plotting the time-averaged global distribution. \n",
    "The data are stored in NetCDF format, and we will use Xarray library to work with the data. \n",
    "We will then use Matplotlib and Catropy to visualise the data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load dataset, subselect and calculate temporal mean\n",
    "\n",
    "We load the NetCDF file with the library [xarray](https://docs.xarray.dev/en/stable/index.html) and the function `open_dataset()`. We receive the `xarray.Dataset` that has one data variable (OLR) and three dimensions: time: 480 steps, latitude: 72, and longitude: 144. This data type has also a number of attributes with auxiliary information about the data. <br>\n",
    "Next, we want to select the specific time range for the plotting from January 1979 to December 2018. Xarray has a method `sel` that indexes the data and dimensions by the appropriate indexers.\n",
    "We use the method `mean` to applying mean along the time dimension. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read data and calculate the global mean.\n",
    "xdf = xr.open_dataset(merged_netcdf_file, decode_times=True, mask_and_scale=True)\n",
    "xdf = xdf.sel(time=slice('1979-01-01', '2019-01-01'))\n",
    "\n",
    "# calculate the mean along the time dimension\n",
    "xdf_m = xdf.olr.mean(dim=['time'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Calculate temporal mean and convert longitude to [-180, 180] grid\n",
    "\n",
    "The code below shifts the longitude dimension from [1.25 to 358.75] to [-178.75, 178.75]. We also sort the longitude values in ascending order."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xdf_m.coords['lon'] = ((xdf_m['lon'] + 180) % 360) - 180\n",
    "xdf_m = xdf_m.loc[{'lon': sorted(xdf_m.coords['lon'].values)}]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Plot data\n",
    "\n",
    "First, we want to save objects `figure` and `axes` to use later. [Cartopy](https://scitools.org.uk/cartopy/docs/latest/) can transform data arrays on different geographic projections. We use Cartopy in combination with [Matplotlib](https://matplotlib.org/stable/index.html) to create a high-quality plot. `Pcolormesh` doesn't work with data arrays with NaNs. Before plotting we convert DataArray to the numpy MaskedArray."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-07-14T12:56:56.036409Z",
     "start_time": "2022-07-14T12:56:49.932657Z"
    }
   },
   "outputs": [],
   "source": [
    "fig1 = plt.figure(figsize=(16, 8))\n",
    "ax1 = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree())\n",
    "\n",
    "masked_average = xdf_m.to_masked_array()  # mask the data because pcolormesh cannot plot nan-values\n",
    "im = plt.pcolormesh(\n",
    "    xdf_m['lon'][:],\n",
    "    xdf_m['lat'][:],\n",
    "    masked_average,\n",
    "    cmap=plt.cm.get_cmap('YlOrRd'),\n",
    "    transform=ccrs.PlateCarree()\n",
    ")\n",
    "\n",
    "# Modify figure style; add lat/lon grid and labels\n",
    "# NOTE: There is a known issue with the coastlines for the pip installation of cartopy,\n",
    "#   The following is a work-around, and not required if using cartopy\n",
    "# ax1.coastlines(color='black')\n",
    "ax1.add_feature(cfeature.LAND, edgecolor='black', facecolor=\"none\", linewidth=1, zorder=3)\n",
    "\n",
    "gl = ax1.gridlines(draw_labels=True, linewidth=1, color='gray', alpha=0.5, linestyle='--')\n",
    "gl.top_labels = False  # hide top and right labels\n",
    "gl.right_labels = False\n",
    "'''\n",
    "Cartopy's matplotlib gridliner takes over the xlabel and ylabel and uses it to manage\n",
    "grid lines and labels.\n",
    "To add labels we need to add text to the Axes, such as:'''\n",
    "ax1.text(\n",
    "    -0.07, 0.49, 'Latitude [ $^o$ ]', fontsize=17, va='bottom', ha='center',\n",
    "    rotation='vertical', rotation_mode='anchor', transform=ax1.transAxes)\n",
    "ax1.text(\n",
    "    0.5, -0.12, 'Longitude [ $^o$ ]', fontsize=17, va='bottom', ha='center',\n",
    "    rotation='horizontal', rotation_mode='anchor', transform=ax1.transAxes)\n",
    "# Add colorbar\n",
    "cb = fig1.colorbar(im, ax=ax1, label='W m$^{-2}$', fraction=0.045, pad=0.07, extend='both')\n",
    "# Add title text\n",
    "ax1.set_title(\n",
    "    '$\\\\bf{Mean\\ OLR\\ from\\ HIRS\\ Ed:2.7\\ (January\\ 1979-December\\ 2018)}$',\n",
    "    fontsize=20, pad=25)\n",
    "\n",
    "# and save the figure\n",
    "fig1.savefig('./Example_1_HIRS_olr_mean.png', dpi=500, bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Figure 1** shows the time-averaged Outgoing Longwave Radiation over the period January 1979 -\n",
    "December 2018. The maximum values of the OLR are found in the tropics and it decreases toward the\n",
    "poles. The relative minimum near the equator, in yellow colour, corresponds to the ITCZ and the\n",
    "convection areas, where persistent and relatively high cloud cover reduces the LW radiation at the\n",
    "TOA. Antarctica is cooler than the corresponding northern latitudes ([Harrison et al.](#harrison))."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-07-14T12:11:22.446851Z",
     "start_time": "2022-07-14T12:11:13.240311Z"
    }
   },
   "source": [
    "\n",
    "\n",
    "### **Use case 2**: Global time series of the OLR\n",
    "\n",
    "After looking at the time-averaged global distribution, in the next step, we further investigate the dataset. The OLR dataset is more than 40 years long, and another useful way of visualizing is the time series. We will calculate the global time series, plot it, and discuss the most important features."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We start by opening the combined data with Xarray `open_dataset` function \n",
    "and subselect a time range from January 1979 to December 2018."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xdf =xr.open_dataset(merged_netcdf_file, decode_times=True, mask_and_scale=True)\n",
    "xdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xdf = xdf.sel(time=slice('1979-01-01', '2019-01-01'))\n",
    "xdf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Apply weights and calculate the rolling mean\n",
    "\n",
    "Next, we need to to account for differences in area of grid cells for polar and equatorial regions. We give different weights for polar and equatorial regions, and the way to do this is to use the cosine of the latitude. We then calculate the global values by using `sum` method. And we calculate the 12-month rolling mean by using the `rolling` method.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# calculate normalized weights\n",
    "_, lat_field = np.meshgrid(xdf.lon.values, xdf.lat.values)\n",
    "weight = np.cos(np.pi * lat_field / 180)\n",
    "data = np.ma.masked_invalid(xdf.olr)\n",
    "weight[data.mask[0, :, :]] = np.nan\n",
    "weight_normalized = weight / np.nansum(weight)\n",
    "\n",
    "# Add weights as second Xarray Data variable and apply weights to the OLR\n",
    "xdf['weight_normalized'] = xr.DataArray(\n",
    "    weight_normalized, coords=[xdf.lat.values, xdf.lon.values], dims=['lat', 'lon'])\n",
    "xdf['olr_norma'] = xdf.olr * xdf.weight_normalized\n",
    "\n",
    "# calculate the global values\n",
    "xdf_m = xdf.olr_norma.sum(dim=['lat', 'lon'])\n",
    "# calculate the 12-month rolling mean\n",
    "xdf_mean_rolling = xdf_m.rolling(time=12, center=True).mean().dropna('time')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Plot data\n",
    "\n",
    "Now, let's visualize the time series. Xarray offers built-in matplotlib methods to plot Data Arrays. We use the method `plot` to get the plot.<br>\n",
    "We also want to add custom xticks. Our x-axis is the time, and one of the ways to work with the time variable is to use library Pandas. This library can work with both date and time. We use method `date_range` to generate the ticks with frequency of two years."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plotting.\n",
    "xdf_mean_rolling.plot(figsize=[16, 8])\n",
    "\n",
    "# Add title, x- and y-labels\n",
    "plt.title('$\\\\bf{HIRS\\ OLR-Globe }$\\nYearly rolling mean')\n",
    "plt.ylabel(\"OLR [ W m$^{-2}$ ]\", fontsize=17)\n",
    "plt.xlabel(\"Year\", fontsize=17)\n",
    "\n",
    "# Custom x-ticks\n",
    "dateStart = pd.to_datetime('1979-01-01', format='%Y-%m-%d')\n",
    "date_End = pd.to_datetime('2019-01-01', format='%Y-%m-%d')\n",
    "dates_rng = pd.date_range(dateStart, date_End, freq='2YS')\n",
    "plt.xticks(dates_rng, [dtz.strftime('%Y') for dtz in dates_rng], rotation=45)\n",
    "\n",
    "plt.savefig('./Example_2_olr_timeserie_Globe.png', dpi=500, bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Figure 2**, shows the evolution of the global mean of the OLR using a 12-month rolling mean. This long\n",
    "CDR shows some interesting features such as the drop of OLR due to a global radiative perturbation\n",
    "in response to volcanic eruptions, for instance, El Chichon in 1982 and Pinatubo in 1991. The main\n",
    "atmospheric thermal effects of these two eruptions persist for about two years after the eruption.\n",
    "The figure also shows some inter-annual variations that are suggested to be related to “El Niño” or\n",
    "“La Niña” Southern Oscillation."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "### **Use case 3**: OLR evolution in the Arctic\n",
    "\n",
    "In the final use case, we will calculate, plot and discuss the OLR time series in the Arctic region. We can re-use code from the previous use case, but we need to add one additional step: select the Arctic region from the original global dataset. <br>\n",
    "\n",
    "We start by reading the dataset with xarray `open_dataset` method. We have used `sel` method before to select the data based on time coordinate. This time, we will select the latitude coordinate label."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read the dataset\n",
    "xdf = xr.open_dataset(merged_netcdf_file, decode_times=True, mask_and_scale=True)\n",
    "\n",
    "# Select time and latitude range\n",
    "xdf = xdf.sel(time=slice('1979-01-01', '2019-01-01'))\n",
    "lat_slice = slice(70, 90)\n",
    "xdf = xdf.sel(lat=lat_slice)\n",
    "\n",
    "# Calculate normalized weights\n",
    "_, lat_field = np.meshgrid(xdf.lon.values, xdf.lat.values)\n",
    "weight = np.cos(np.pi*lat_field/180)\n",
    "data = np.ma.masked_invalid(xdf.olr)\n",
    "weight[data.mask[0, :, :]] = np.nan\n",
    "weight_normalized = weight / np.nansum(weight)\n",
    "\n",
    "# Add weights as second Xarray Data variable and apply weights to the OLR\n",
    "xdf['weight_normalized'] = xr.DataArray(\n",
    "    weight_normalized, coords=[xdf.lat.values, xdf.lon.values], dims=['lat', 'lon'])\n",
    "xdf['olr_norma'] = xdf.olr * xdf.weight_normalized\n",
    "\n",
    "# calculate the global values\n",
    "xdf_m = xdf.olr_norma.sum(dim=['lat', 'lon'])\n",
    "# calculate the 12-month rolling mean\n",
    "xdf_Arctic_mean_rolling = xdf_m.rolling(time=12, center=True).mean().dropna('time')\n",
    "\n",
    "# Plotting.\n",
    "xdf_Arctic_mean_rolling.plot(figsize=[16, 8])\n",
    "\n",
    "# Add title, x- and y-labels\n",
    "plt.title('$\\\\bf{HIRS\\ OLR-Arctic\\ [70°N, 90°N] }$\\nYearly rolling mean')\n",
    "plt.ylabel(\"OLR [ W m$^{-2}$ ]\", fontsize=17)\n",
    "plt.xlabel(\"Year\", fontsize=17)\n",
    "\n",
    "# Custom x ticks\n",
    "dateStart = pd.to_datetime('1979-01-01', format='%Y-%m-%d')\n",
    "date_End = pd.to_datetime('2019-01-01', format='%Y-%m-%d')\n",
    "dates_rng = pd.date_range(dateStart, date_End, freq='2YS')\n",
    "plt.xticks(dates_rng, [dtz.strftime('%Y') for dtz in dates_rng], rotation=45)\n",
    "\n",
    "# Save figure to the disk \n",
    "plt.savefig('./Example_3_olr_timeserie_Arctic.png', dpi=500, bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Figure 3**, shows the evolution of the OLR in the Arctic region [70°N-90°N] for the period 1979-2019\n",
    "also using a yearly rolling mean. From the image, we can infer an increase in the OLR in the Arctic\n",
    "region with time. This increase is strongly marked as from 1998 and in line with the increase of\n",
    "temperature in the Arctic ([Hansen et al, 2010](#hansen)), while the relative stability of the first part of the time\n",
    "series is more difficult to interpret due to the major volcanic eruptions in this period."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get more information about Earth Radiation Budget:\n",
    "\n",
    "- [Earth's radiation budget from 1979 to present derived from satellite observations](https://cds-beta.climate.copernicus.eu/datasets/satellite-earth-radiation-budget?tab=overview)\n",
    "- [Climate Data Store](https://cds-beta.climate.copernicus.eu/)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "## References\n",
    "\n",
    "Hansen, J., Ruedy, R., Sato, M., & Lo, K., (2010). _Global surface temperature change_. Reviews of Geophysics, 48(4).\n",
    "\n",
    "Harrison, E.F., Gurney, R. J., Foster, J. L., Gurney, R. J., & Parkinson, C. L. (1993). _Atlas of satellite observations related to global change_. Chapter: Radiation Budget at the top of the atmosphere. Cambridge University Press."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[Back to top the Page](#page-top)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "292.6px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "vscode": {
   "interpreter": {
    "hash": "d3b527990b178e211e62cc036a598e7ce4e291c47b05a4db650ff2ea28e5c4b6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/tutorials/03_climate_projection/03x01_projections-cmip6.ipynb
+++ b/tutorials/03_climate_projection/03x01_projections-cmip6.ipynb
--- a/tutorials/03_climate_projection/03x01_projections-cmip6_parallel.ipynb
+++ b/tutorials/03_climate_projection/03x01_projections-cmip6_parallel.ipynb
--- a/tutorials/03_climate_projection/03x02_projections-cordex.ipynb
+++ b/tutorials/03_climate_projection/03x02_projections-cordex.ipynb
--- a/tutorials/04_seasonal_forecast/04x01_sf-anomalies.ipynb
+++ b/tutorials/04_seasonal_forecast/04x01_sf-anomalies.ipynb
--- a/tutorials/04_seasonal_forecast/04x02_sf-verification.ipynb
+++ b/tutorials/04_seasonal_forecast/04x02_sf-verification.ipynb
--- a/tutorials/05_climate_index/05x01_ci-windchill.ipynb
+++ b/tutorials/05_climate_index/05x01_ci-windchill.ipynb
--- a/tutorials/06_bias_correction/06x01
+++ b/tutorials/06_bias_correction/06x01
--- a/tutorials/06_bias_correction/06x02
+++ b/tutorials/06_bias_correction/06x02
--- a/tutorials/06_bias_correction/06x03
+++ b/tutorials/06_bias_correction/06x03
--- a/tutorials/06_bias_correction/06x04
+++ b/tutorials/06_bias_correction/06x04
@ -0,0 +1,431 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0c6cb208-a187-47b9-84cd-c608be232e1b",
   "metadata": {},
   "source": [
    "# 04 Parallezation and Advanced Topics"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99c1efb2-4561-4c25-8337-124c36119925",
   "metadata": {},
   "source": [
    "This notebooks demonstrates different options for running ibicus bias adjustment on larger areas and larger computing environments using the built-in parallelization and integration with dask. In the second part it looks at some advanced topics: logging as well as extending the package with own methods.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4578b9a2-8195-45db-8bc3-b0eee89d4755",
   "metadata": {},
   "source": [
    "## 1. Running ibicus in larger environments: parallelization and dask"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2b759e72-6385-4213-8c21-a2129727e9ae",
   "metadata": {},
   "source": [
    "ibicus comes with an integrated parallelization option building upon the `multiprocessing` module. It also integrates easily with dask to run in HPC environments. In this notebook, we demonstrate these options using a CDFt and QuantileMapping debiaser."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8bb4d6b0-f860-42b0-9453-f61290298591",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ibicus.debias import CDFt, QuantileMapping"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c28b9a5d-4a77-442b-89b1-3017faffd977",
   "metadata": {},
   "source": [
    "Let's get some testing data. For an explanation of the steps please refer to the \"Getting started\" notebook:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "40808b4e-c517-4490-9fae-1ebd86eba5d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def get_data(variable, data_path = \"testing_data/\"):\n",
    "    # Load in the data \n",
    "    data = np.load(f\"{data_path}{variable}.npz\", allow_pickle = True)\n",
    "    # Return arrays\n",
    "    return data[\"obs\"], data[\"cm_hist\"], data[\"cm_future\"], {\"time_obs\": data[\"time_obs\"], \"time_cm_hist\": data[\"time_cm_hist\"], \"time_cm_future\": data[\"time_cm_future\"]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "bcfdcd7b-f01c-4b04-99b6-343e4c5e9c35",
   "metadata": {},
   "outputs": [],
   "source": [
    "obs, cm_hist, cm_future, dates = get_data(\"tas\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d0da871-005b-46c2-8187-b3aade373c00",
   "metadata": {},
   "source": [
    "### 1.1. Parallelization"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0b6d679-8bcf-4823-bb0c-2682179a735d",
   "metadata": {},
   "source": [
    "Parallelization can be activated in the existing ibicus functionalities by simply specifying `parallel = True` in the `debiaser.apply`-function:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5ec9cc9d-f009-4a98-b245-d7a14cce9f38",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jakobwes/Desktop/ESoWC/ibicus/notebooks/own_testing_notebooks/../../ibicus/debias/_debiaser.py:535: UserWarning: progressbar argument is ignored when parallel = True.\n",
      "  warnings.warn(\"progressbar argument is ignored when parallel = True.\")\n"
     ]
    }
   ],
   "source": [
    "debiaser = CDFt.from_variable(\"tas\")\n",
    "debiased_cm_future = debiaser.apply(obs, cm_hist, cm_future, **dates, parallel = True, nr_processes = 8)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4185668f-c046-4156-8310-3a9f9d4d99c1",
   "metadata": {},
   "source": [
    "The number of processes that run in parallel can be controlled using the `nr_processes` option. The default option are 4 processes. For more details see the [ibicus API reference](https://ibicus.readthedocs.io/en/latest/reference/debias.html#ibicus.debias.Debiaser). Important to note: no progressbar is shown in parallelized execution. \n",
    "\n",
    "We recommend using parallelization if users are interested in speeding up the execution of bias adjustment on a single machine."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "84347a9a-daf2-4ce5-a4eb-46432c729468",
   "metadata": {},
   "source": [
    "### 1.2. Dask"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47d9b906-3738-4e32-a661-45194f7e7445",
   "metadata": {},
   "source": [
    "For some problems the speedup provided by the simple parallelization presented above does not provide enough flexibility: for example if users are interested in scaling debiasing in an HPC environment on many machines or if the observation and climate model data does not fit into RAM. \n",
    "\n",
    "To address these issues, ibicus integrates easily with `dask`.  `dask` is an open-source python library for parallel computing allowing users to easily scale their python code from multi-core machines to large clusters. It is integrated in both `xarray` and `iris` (see here for the [xarray dask integration](https://docs.xarray.dev/en/stable/user-guide/dask.html) and here for [the iris one](https://scitools-iris.readthedocs.io/en/latest/userguide/real_and_lazy_data.html)). In both both libraries, it is possible to extract the underlying dask arrays needed for computation. \n",
    "\n",
    "For a dask introduction see [here](https://tutorial.dask.org/00_overview.html) and for a practical introduction on how to use dask on a HPC cluster see [this tutorial](https://www.youtube.com/watch?v=FXsgmwpRExM&t=441s). We will only use the `dask.array` module here:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4f6c82e2-7724-4fa3-8e6c-f91c32c8e8f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask.array as da"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5e392f6c-de41-4fab-980f-1fcbe9a9435b",
   "metadata": {},
   "source": [
    "Let's get some larger testing data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1aaca898-80e6-4111-bc58-c2473aca1b40",
   "metadata": {},
   "outputs": [],
   "source": [
    "obs = da.from_array(np.random.normal(270, 20, size = 50*50*10000).reshape((50, 50, 10000)), chunks=(5, 10, 10000))\n",
    "cm_hist = da.from_array(np.random.normal(265, 15, size = 50*50*10000).reshape((50, 50, 10000)), chunks=(5, 10, 10000))\n",
    "cm_future = da.from_array(np.random.normal(280, 30, size = 50*50*10000).reshape((50, 50, 10000)), chunks=(5, 10, 10000))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "76edda2e-92e7-408f-8cd6-d918ee28b3c2",
   "metadata": {},
   "source": [
    "For our purposes it is crucial that the dask arrays are **chunked in the spatial dimension** meaning chunks can be defined in the first two dimensions, but always need to include the full time dimension at each location. This is required to calculate the climatology at each location.\n",
    "\n",
    "Given correctly chunked arrays applying dask is easily possible by just mapping the `debiaser.apply` function over all chunks using eg. `map_blocks`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "de7b4668-7799-4bed-a9d4-c22d2f030eac",
   "metadata": {},
   "outputs": [],
   "source": [
    "debiaser = QuantileMapping.from_variable(\"tas\")\n",
    "\n",
    "collection = da.map_blocks(debiaser.apply, obs, cm_hist, cm_future, dtype=obs.dtype, progressbar = False, parallel = False)\n",
    "debiased_cm_future = collection.compute(num_workers=8)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5fedbaed-5f2d-44bf-aeb2-8e2ddb9b4323",
   "metadata": {},
   "source": [
    "It is also possible to use other dask mapping functions such as `blockwise`. To use the ibicus `apply` function together with dask it is important to specify two arguments:\n",
    "\n",
    "- `progressbar = False` otherwise the progressbar output will fill the output log. A dask progressbar can be used by importing `dask.diagnostics.ProgressBar`.\n",
    "- `parallel = False` (default) because otherwise ibicus parallelisation will interfere with the dask one. \n",
    "\n",
    "In the case of bias adjustment methods where the apply function requires additional information such as time/dates,  this can be specified as keywords arguments to `map_blocks`. For very big runs it is also recommended to specify `failsafe = True` to make sure that if the debiaser fails at some locations the output for the other ones can still be saved. When doing so it is even more important to check the logs for any errors and to evaluate the output carefully.\n",
    "\n",
    "Dask itself provides a big variety of customization options and we recommend checking those out."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f670440e-6217-491d-941c-e8cb3ab2c913",
   "metadata": {},
   "source": [
    "## 2. What about logging and warnings?"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44705e77-c97d-40dd-ba3f-8aec31b17a62",
   "metadata": {},
   "source": [
    "A brief note on logging and warnings: when ibicus encounters issues during code execution a warning or error message will be raised and the standard python tools to handle these can be used. ibicus also writes logs during the execution and logs errors during failsafe mode. The logs are written to the \"ibicus\" logger (`ibicus.utils.get_library_logger()`) and utils provides some [options to set the logging level for ibicus](). The logging outputs can be handled in the usual way as specified by the [logging library](https://docs.python.org/3/howto/logging.html#logging-basic-tutorial): they can be formatted, written to file, ignored, etc. "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bceb0121-a8d1-480c-b5c9-737e2476dc72",
   "metadata": {},
   "source": [
    "## 3. Creating your own bias adjustment methods"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e457cfe-1c4a-4989-a471-538e51f77c7e",
   "metadata": {},
   "source": [
    "By building upon the common framework and interface developed in the ibicus package, it is straightforward to implement your own bias adjustment methods when using the package. A new bias adjustment method can be set up as an attrs-child-class of the abstract `Debiaser`-class ([see here for the documentation](https://ibicus.readthedocs.io/en/latest/reference/debias.html#ibicus.debias.Debiaser)). A child class needs to include two functions:\n",
    "\n",
    "-  an `apply_location()` function which applies an initialised debiaser at one location. Arguments are 1d-vectors of obs, cm_hist, and cm_future representing observations, and climate model values during the reference (cm_hist) and future period (cm_future). Additionally kwargs passed to the debiaser apply()-function are passed down to the apply_location()-function.\n",
    "\n",
    "- a `from_variable()` function which initialises a debiaser with default arguments given a climatic variable either as str or member of the `Variable`-class. kwargs are meant to overwrite default arguments for this variable. Given a dict of default arguments with variables of the `Variable` class as keys and dict of default arguments as values the `cls._from_variable()`-function can be used to automatically map variable arguments to default settings.\n",
    "\n",
    "Given these two functions are provided, the abstract debiaser class then takes care of setup, iterating the application of the method over locations, parallelization, input sanitization, etc.\n",
    "\n",
    "Alternatively a user can also create a subclass of the `RunningWindowDebiaser` class. This enables the user to apply the new method in a running window setting. This subclass needs specification of an `apply_on_window` function instead of an `apply_location`-function. Below is an example of how a new version of LinearScaling could be set up using the RunningWindowDebiaser:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "27b0211d-2b80-4c09-9912-32b28bc05730",
   "metadata": {},
   "outputs": [],
   "source": [
    "import attrs\n",
    "import numpy as np\n",
    "\n",
    "# Import the RunningWindowDebiaser from ibicus to subclass\n",
    "from ibicus.debias import RunningWindowDebiaser\n",
    "\n",
    "# Define the new debiaser as an attrs-subclass. Slotted classes don't work well with inheritance so we use slots=False\n",
    "@attrs.define(slots=False)\n",
    "class LinearScaling(RunningWindowDebiaser):\n",
    "\n",
    "    # Define an argument of the debiaser: the type of transformation used\n",
    "    delta_type: str = \"additive\"\n",
    "\n",
    "    # Define the from_variable-method to initialize the debiaser. \n",
    "    @classmethod\n",
    "    def from_variable(cls, variable, delta_type, **kwargs):\n",
    "        return cls(variable = variable, delta_type = delta_type)\n",
    "        \n",
    "    # Define the apply_on_window method to apply the debiaser\n",
    "    def apply_on_window(self, obs, cm_hist, cm_future, **kwargs):\n",
    "        \n",
    "        # Depending on delta_type apply a different transformation\n",
    "        if self.delta_type == \"additive\":\n",
    "            return cm_future - (np.mean(cm_hist) - np.mean(obs))\n",
    "        \n",
    "        elif self.delta_type == \"multiplicative\":\n",
    "            return cm_future * (np.mean(obs) / np.mean(cm_hist))\n",
    "        \n",
    "        else:\n",
    "            raise ValueError('self.delta_type needs to be one of [\"additive\", \"multiplicative\"].')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42e5ba74-f24d-40ff-b410-bd6439103190",
   "metadata": {},
   "source": [
    "We can then instantiate and apply the class as follows over a grid of locations:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "9a8e89dd-92cd-4cb4-805d-f36c6cda6f69",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████| 9/9 [00:00<00:00, 5183.84it/s]\n"
     ]
    }
   ],
   "source": [
    "debiaser = LinearScaling.from_variable(\"tas\", delta_type = \"additive\")\n",
    "output = debiaser.apply(np.random.random((100, 3, 3))+280, np.random.random((100, 3, 3))+282, np.random.random((100, 3, 3))+284)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0e51c8f7-00ab-4bd8-9222-3a4c2c815122",
   "metadata": {},
   "source": [
    "Class-attributes such as the `delta_type` can also be set up as `attrs.field` attributes. This has the advantage of enabling the automatic checking and sanitization of inputs. For example we could write the `delta_type`-definition as:\n",
    "\n",
    "```python\n",
    "delta_type: str = attrs.field(default=\"additive\", validator=attrs.validators.in_([\"additive\", \"multiplicative\"]))\n",
    "```\n",
    "\n",
    "In this example, the objective can only be created if `delta_type` is either *additive* or *multiplicative*. Otherwise, an error is given."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ec24bda",
   "metadata": {},
   "source": [
    "Furthermore, the user can also define default settings and experimental default settings for different variables and use the `cls._from_variable()` function to map variable arguments to their settings when implementing a new method, as shown in the following example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "847f5e8a-d475-43da-8776-e00ebe959a6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ibicus.variables import tas, pr, hurs, psl\n",
    "\n",
    "# Define default setting and experimental default settings:\n",
    "default_settings = {tas: {\"delta_type\": \"additive\"}, pr: {\"delta_type\": \"multiplicative\"},}\n",
    "experimental_default_settings = {hurs: {\"delta_type\": \"multiplicative\"}, psl: {\"delta_type\": \"additive\"}}\n",
    "\n",
    "@attrs.define(slots=False)\n",
    "class LinearScaling(RunningWindowDebiaser):\n",
    "    delta_type: str = \"additive\"\n",
    "\n",
    "    @classmethod\n",
    "    def from_variable(cls, variable, **kwargs):\n",
    "        # Use the cls._from_variable helper functions to map a variable onto it's settings\n",
    "        return cls._from_variable(cls, variable, default_settings, experimental_default_settings, **kwargs)\n",
    "        \n",
    "    def apply_on_window(self, obs, cm_hist, cm_future, **kwargs):\n",
    "        if self.delta_type == \"additive\":\n",
    "            return cm_future - (np.mean(cm_hist) - np.mean(obs))\n",
    "        elif self.delta_type == \"multiplicative\":\n",
    "            return cm_future * (np.mean(obs) / np.mean(cm_hist))\n",
    "        else:\n",
    "            raise ValueError('self.delta_type needs to be one of [\"additive\", \"multiplicative\"].')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c65de448-be55-4950-ad91-51df4c5f24c4",
   "metadata": {},
   "source": [
    "This allows instantiation and application as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "71d236c7-97bb-48ce-a9ef-4314da67210a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_44291/79605568.py:14: UserWarning: The default settings for variable psl in debiaser LinearScaling are currently still experimental and may not have been evaluated in the peer-reviewed literature. Please review the results with care!\n",
      "  return cls._from_variable(cls, variable, default_settings, experimental_default_settings, **kwargs)\n",
      "100%|███████████████████████████████████████████| 9/9 [00:00<00:00, 4026.96it/s]\n"
     ]
    }
   ],
   "source": [
    "debiaser = LinearScaling.from_variable(\"psl\")\n",
    "output = debiaser.apply(np.random.random((100, 3, 3))+1, np.random.random((100, 3, 3))+2, np.random.random((100, 3, 3))+3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "77b77268-9c7d-4fb2-977a-00fd6aa30fc8",
   "metadata": {},
   "source": [
    "The LinearScaling debiaser set up here includes a running window functionality. If this is not required then we could also subclass the `Debiaser` instead of `RunningWindowDebiaser` to set up a new debiaser."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }