Figure settings
Contents
#W2D1 Tutorial 2: Time series, global averages, and scenario comparison
Indented block
Indented block
Week 2, Day 1, Future Climate: The Physical Basis
Content creators: Brodie Pearson (Day Lead), Julius Busecke (Tutorial co-lead), Tom Nicholas (Tutorial co-lead)
Content reviewers: Jenna Pearson, Ohad Zivan
Content editors: TBD
Production editors: TBD
Our 2023 Sponsors: TBD
#Tutorial Objectives
Today’s tutorials demonstrate how to work with data from Earth System Models (ESMs) simulations conducted for the recent Climate Model Intercomparison Project (CMIP6)
By the end of today’s tutorials you will be able to:
Manipulate raw data from multiple CMIP6 models
Evaluate the spread of future projections from several CMIP6 models
Synthesize climate data from observations and models
#Setup
# #Imports
# !pip install condacolab &> /dev/null
# import condacolab
# condacolab.install()
# # Install all packages in one call (+ use mamba instead of conda)
# # hopefully this improves speed
# !mamba install xarray-datatree intake-esm gcsfs xmip aiohttp nc-time-axis cf_xarray xarrayutils &> /dev/null
import time
tic = time.time()
import intake
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
from xmip.preprocessing import combined_preprocessing
from xarrayutils.plotting import shaded_line_plot
from datatree import DataTree
from xmip.postprocessing import _parse_metric
Figure settings#
# @title Figure settings
import ipywidgets as widgets # interactive display
%config InlineBackend.figure_format = 'retina'
plt.style.use(
"https://raw.githubusercontent.com/ClimateMatchAcademy/course-content/main/cma.mplstyle"
)
# model_colors = {k:f"C{ki}" for ki, k in enumerate(source_ids)}
Plotting functions#
# @title Plotting functions
# You may have functions that plot results that aren't
# particularly interesting. You can add these here to hide them.
def plotting_z(z):
"""This function multiplies every element in an array by a provided value
Args:
z (ndarray): neural activity over time, shape (T, ) where T is number of timesteps
"""
fig, ax = plt.subplots()
ax.plot(z)
ax.set(xlabel="Time (s)", ylabel="Z", title="Neural activity over time")
Helper functions#
# @title Helper functions
# If any helper functions you want to hide for clarity (that has been seen before
# or is simple/uniformative), add here
# If helper code depends on libraries that aren't used elsewhere,
# import those libaries here, rather than in the main import cell
Tutorial 2: Time series, global averages, and scenario comparison#
Video 1: Video 1 Name#
# @title Video 1: Video 1 Name
# Tech team will add code to format and display the video
##Section 2.1: Load CMIP6 SST data from several models using xarray
Let’s expand on Tutorial 1 by loading five different CMIP6 models on last week’s Climate Modelling day.
col = intake.open_esm_datastore(
"https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
) # open an intake catalog containing the Pangeo CMIP cloud data
# pick our five example models
# There are many more to test out! Try executing `col.df['source_id'].unique()` to get a list of all available models
source_ids = ["IPSL-CM6A-LR", "GFDL-ESM4", "ACCESS-CM2", "MPI-ESM1-2-LR", "TaiESM1"]
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[7], line 1
----> 1 col = intake.open_esm_datastore(
2 "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
3 ) # open an intake catalog containing the Pangeo CMIP cloud data
5 # pick our five example models
6 # There are many more to test out! Try executing `col.df['source_id'].unique()` to get a list of all available models
7 source_ids = ["IPSL-CM6A-LR", "GFDL-ESM4", "ACCESS-CM2", "MPI-ESM1-2-LR", "TaiESM1"]
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/intake_esm/core.py:107, in esm_datastore.__init__(self, obj, progressbar, sep, registry, read_csv_kwargs, columns_with_iterables, storage_options, **intake_kwargs)
105 self.esmcat = ESMCatalogModel.from_dict(obj)
106 else:
--> 107 self.esmcat = ESMCatalogModel.load(
108 obj, storage_options=self.storage_options, read_csv_kwargs=read_csv_kwargs
109 )
111 self.derivedcat = registry or default_registry
112 self._entries = {}
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/intake_esm/cat.py:264, in ESMCatalogModel.load(cls, json_file, storage_options, read_csv_kwargs)
262 csv_path = f'{os.path.dirname(_mapper.root)}/{cat.catalog_file}'
263 cat.catalog_file = csv_path
--> 264 df = pd.read_csv(
265 cat.catalog_file,
266 storage_options=storage_options,
267 **read_csv_kwargs,
268 )
269 else:
270 df = pd.DataFrame(cat.catalog_dict)
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
899 kwds_defaults = _refine_defaults_read(
900 dialect,
901 delimiter,
(...)
908 dtype_backend=dtype_backend,
909 )
910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/parsers/readers.py:577, in _read(filepath_or_buffer, kwds)
574 _validate_names(kwds.get("names", None))
576 # Create the parser.
--> 577 parser = TextFileReader(filepath_or_buffer, **kwds)
579 if chunksize or iterator:
580 return parser
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1407, in TextFileReader.__init__(self, f, engine, **kwds)
1404 self.options["has_index_names"] = kwds["has_index_names"]
1406 self.handles: IOHandles | None = None
-> 1407 self._engine = self._make_engine(f, self.engine)
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1661, in TextFileReader._make_engine(self, f, engine)
1659 if "b" not in mode:
1660 mode += "b"
-> 1661 self.handles = get_handle(
1662 f,
1663 mode,
1664 encoding=self.options.get("encoding", None),
1665 compression=self.options.get("compression", None),
1666 memory_map=self.options.get("memory_map", False),
1667 is_text=is_text,
1668 errors=self.options.get("encoding_errors", "strict"),
1669 storage_options=self.options.get("storage_options", None),
1670 )
1671 assert self.handles is not None
1672 f = self.handles.handle
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/common.py:716, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
713 codecs.lookup_error(errors)
715 # open URLs
--> 716 ioargs = _get_filepath_or_buffer(
717 path_or_buf,
718 encoding=encoding,
719 compression=compression,
720 mode=mode,
721 storage_options=storage_options,
722 )
724 handle = ioargs.filepath_or_buffer
725 handles: list[BaseBuffer]
File ~/miniconda3/envs/climatematch/lib/python3.10/site-packages/pandas/io/common.py:373, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
370 if content_encoding == "gzip":
371 # Override compression based on Content-Encoding header
372 compression = {"method": "gzip"}
--> 373 reader = BytesIO(req.read())
374 return IOArgs(
375 filepath_or_buffer=reader,
376 encoding=encoding,
(...)
379 mode=fsspec_mode,
380 )
382 if is_fsspec_url(filepath_or_buffer):
File ~/miniconda3/envs/climatematch/lib/python3.10/http/client.py:482, in HTTPResponse.read(self, amt)
480 else:
481 try:
--> 482 s = self._safe_read(self.length)
483 except IncompleteRead:
484 self._close_conn()
File ~/miniconda3/envs/climatematch/lib/python3.10/http/client.py:631, in HTTPResponse._safe_read(self, amt)
624 def _safe_read(self, amt):
625 """Read the number of bytes requested.
626
627 This function should be used when <amt> bytes "should" be present for
628 reading. If the bytes are truly not available (due to EOF), then the
629 IncompleteRead exception can be used to detect the problem.
630 """
--> 631 data = self.fp.read(amt)
632 if len(data) < amt:
633 raise IncompleteRead(data, amt-len(data))
File ~/miniconda3/envs/climatematch/lib/python3.10/socket.py:705, in SocketIO.readinto(self, b)
703 while True:
704 try:
--> 705 return self._sock.recv_into(b)
706 except timeout:
707 self._timeout_occurred = True
File ~/miniconda3/envs/climatematch/lib/python3.10/ssl.py:1274, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1270 if flags != 0:
1271 raise ValueError(
1272 "non-zero flags not allowed in calls to recv_into() on %s" %
1273 self.__class__)
-> 1274 return self.read(nbytes, buffer)
1275 else:
1276 return super().recv_into(buffer, nbytes, flags)
File ~/miniconda3/envs/climatematch/lib/python3.10/ssl.py:1130, in SSLSocket.read(self, len, buffer)
1128 try:
1129 if buffer is not None:
-> 1130 return self._sslobj.read(len, buffer)
1131 else:
1132 return self._sslobj.read(len)
KeyboardInterrupt:
If the following cell crashes, run the cell a second time#
# from the full `col` object, create a subset using facet search
cat = col.search(
source_id=source_ids,
variable_id="tos",
member_id="r1i1p1f1",
table_id="Omon",
grid_label="gn",
experiment_id=["historical", "ssp126", "ssp585"],
require_all_on=[
"source_id"
], # make sure that we only get models which have all of the above experiments
)
# convert the sub-catalog into a datatree object, by opening each dataset into an xarray.Dataset (without loading the data)
kwargs = dict(
preprocess=combined_preprocessing, # apply xMIP fixes to each dataset
xarray_open_kwargs=dict(
use_cftime=True
), # ensure all datasets use the same time index
storage_options={
"token": "anon"
}, # anonymous/public authentication to google cloud storage
)
# hopefully we can implement https://github.com/intake/intake-esm/issues/562 before the
# actual tutorial, so this would be a lot cleaner
cat.esmcat.aggregation_control.groupby_attrs = ["source_id", "experiment_id"]
dt = cat.to_datatree(**kwargs)
###Coding Exercise 2.1: Load additional CMIP6 data sets
In the following tutorials we will be looking at the global mean sea surface temperature. To calculate this global mean, will need to know the horizontal area of every ocean grid cell in all the models we are using.
Write code to load this ocean-grid area data using the previously shown method for SST data, noting that:
We now need a variable called areacello (area of cells in the ocean)
This variable is stored in table_id Ofx (it is from the ocean model and is fixed/constant in time)
A model’s grid does not change between experiments so you only need to get grid data from the historical experiment for each model
#################################################
## TODO for students: details of what they should do ##
# Fill out function and remove
raise NotImplementedError("Student exercise: load the ocean-grid area data from the 5 CMIP6 models using the information above")
#################################################
cat_area = col.search(
source_id=source_ids,
# Add the appropriate variable_id
variable_id=...,
member_id='r1i1p1f1',
# Add the appropriate table_id
table_id=...,
grid_label='gn',
# Add the appropriate experiment_id
experiment_id = [...],
require_all_on = ['source_id']
)
# hopefully we can implement https://github.com/intake/intake-esm/issues/562 before the
# actual tutorial, so this would be a lot cleaner
cat_area.esmcat.aggregation_control.groupby_attrs = ['source_id', 'experiment_id']
dt_area = cat_area.to_datatree(**kwargs)
dt_with_area = DataTree()
for model,subtree in dt.items():
metric = dt_area[model]['historical'].ds['areacello']
dt_with_area[model] = subtree.map_over_subtree(_parse_metric,metric)
# to_remove solution
cat_area = col.search(
source_id=source_ids,
# Add the appropriate variable_id
variable_id="areacello",
member_id="r1i1p1f1",
# Add the appropriate table_id
table_id="Ofx",
grid_label="gn",
# Add the appropriate experiment_id
experiment_id=["historical"],
require_all_on=["source_id"],
)
# hopefully we can implement https://github.com/intake/intake-esm/issues/562 before the
# actual tutorial, so this would be a lot cleaner
cat_area.esmcat.aggregation_control.groupby_attrs = ["source_id", "experiment_id"]
dt_area = cat_area.to_datatree(**kwargs)
dt_with_area = DataTree()
for model, subtree in dt.items():
metric = dt_area[model]["historical"].ds["areacello"]
dt_with_area[model] = subtree.map_over_subtree(_parse_metric, metric)
###Coding Exercise 2.2: Calculate and plot projected global mean sea surface temperature
The data files above contain spatial maps of the sea surface temperature for every month of each experiment’s time period. For the rest of today’s tutorials, we’re going to focus on the global mean sea surface temperature, rather than maps, as a way to visualize the ocean’s changing temperature at a global scale*.
Complete the following code so that it calculates and plots a timeseries of global mean sea surface temperature from the TaiESM1 model for both the historical experiment and the two future projection experiments, SSP1-2.6 (low emissions) and SSP5-8.5 (high emissions).
As you complete this exercise this, consider the following questions:
In the first function, what xarray operation is the following line doing, and why is it neccessary?
return ds.weighted(ds.areacello.fillna(0)).mean(['x', 'y'], keep_attrs=True)
How would your time series plot might change if you instead used took a simple mean of all the sea surface temperatures across all grid cells? (Perhaps your previous maps could provide some help here)
*Note: we could alternatively look at ocean heat content, which depends on temperature at all depths, but it is a more intensive computation that would take too long to calculate in these tutorials.
#################################################
## TODO for students: details of what they should do ##
# Fill out function and remove
raise NotImplementedError("Student exercise: Calculate the global mean of each model, experiment, and time")
#################################################
%matplotlib inline
def global_mean(ds:xr.Dataset) -> xr.Dataset:
"""Global average, weighted by the cell area"""
return ds.weighted(ds.areacello.fillna(0)).mean(['x', 'y'], keep_attrs=True)
# average every dataset in the tree globally
dt_gm = dt_with_area.map_over_subtree(...)
for experiment in ['historical', 'ssp126', 'ssp585']:
da = dt_gm['TaiESM1'][experiment].ds.tos
da.plot(label=experiment)
plt.title('Global Mean SST from TaiESM1')
plt.ylabel('Global Mean SST [$^\circ$C]')
plt.xlabel('Year')
plt.legend()
plt.show()
# to_remove solution
%matplotlib inline
def global_mean(ds: xr.Dataset) -> xr.Dataset:
"""Global average, weighted by the cell area"""
return ds.weighted(ds.areacello.fillna(0)).mean(["x", "y"], keep_attrs=True)
# average every dataset in the tree globally
dt_gm = dt_with_area.map_over_subtree(global_mean)
with plt.xkcd():
for experiment in ["historical", "ssp126", "ssp585"]:
da = dt_gm["TaiESM1"][experiment].ds.tos
da.plot(label=experiment)
plt.title("Global Mean SST from TaiESM1")
plt.ylabel("Global Mean SST [$^\circ$C]")
plt.xlabel("Year")
plt.legend()
plt.show()
Post-figure questions#
Is anything about this plot interesting or surprising to you?
Do you find this plot easy to read? (BONUS TASK: If you do not find it easy to read, try improving this plot to address this)