Source code for esmvaltool.diag_scripts.ocean.diagnostic_model_vs_obs

"""
Model vs Observations maps Diagnostic.
======================================

Diagnostic to produce comparison of model and data.
The first kind of image shows four maps and the other shows a scatter plot.

The four pane image is a latitude vs longitude figures showing:

* Top left: model
* Top right: observations
* Bottom left: model minus observations
* Bottom right: model over observations


The scatter plots plot the matched model coordinate on the x axis, and the
observational dataset on the y coordinate, then performs a linear
regression of those data and plots the line of best fit on the plot.
The parameters of the fit are also shown on the figure.

Note that this diagnostic assumes that the preprocessors do the bulk of the
hard work, and that the cube received by this diagnostic (via the settings.yml
and metadata.yml files) has no time component, a small number of depth layers,
and a latitude and longitude coordinates.

An appropriate preprocessor for a 3D + time field would be::

  preprocessors:
    prep_map:
      extract_levels:
        levels:  [100., ]
        scheme: linear_extrap
      climate_statistics:
        operator: mean
      regrid:
        target_grid: 1x1
        scheme: linear

This tool is part of the ocean diagnostic tools package in the ESMValTool,
and was based on the plots produced by the Ocean Assess/Marine Assess toolkit.

Author: Lee de Mora (PML)
        ledm@pml.ac.uk
"""
import logging
import math
import os
import sys
from pathlib import Path

import iris
import iris.quickplot as qplt
import matplotlib.pyplot as plt
import numpy as np
import yaml
from matplotlib.colors import LogNorm
from scipy.stats import linregress

from esmvaltool.diag_scripts.ocean import diagnostic_tools as diagtools
from esmvaltool.diag_scripts.shared import ProvenanceLogger, run_diagnostic

# This part sends debug statements to stdout
logger = logging.getLogger(os.path.basename(__file__))
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))


[docs]def add_map_subplot(subplot,
                    cube,
                    nspace,
                    title='',
                    cmap='',
                    extend='neither',
                    log=False):
    """Add a map subplot to the current pyplot figure.

    Parameters
    ----------
    subplot: int
        The matplotlib.pyplot subplot number. (ie 221)
    cube: iris.cube.Cube
        the iris cube to be plotted.
    nspace: numpy.array
        An array of the ticks of the colour part.
    title: str
        A string to set as the subplot title.
    cmap: str
        A string to describe the matplotlib colour map.
    extend: str
        Contourf-coloring of values outside the levels range
    log: bool
        Flag to plot the colour scale linearly (False) or
        logarithmically (True)
    """
    plt.subplot(subplot)
    logger.info('add_map_subplot: %s', subplot)
    if log:
        qplot = qplt.contourf(cube,
                              nspace,
                              linewidth=0,
                              cmap=plt.cm.get_cmap(cmap),
                              norm=LogNorm(),
                              zmin=nspace.min(),
                              zmax=nspace.max())
        qplot.colorbar.set_ticks([0.1, 1., 10.])
    else:
        qplot = iris.plot.contourf(cube,
                                   nspace,
                                   linewidth=0,
                                   cmap=plt.cm.get_cmap(cmap),
                                   extend=extend,
                                   zmin=nspace.min(),
                                   zmax=nspace.max())
        cbar = plt.colorbar(orientation='horizontal')
        cbar.set_ticks(
            [nspace.min(), (nspace.max() + nspace.min()) / 2.,
             nspace.max()])

    plt.gca().coastlines()
    plt.title(title)


[docs]def make_model_vs_obs_plots(cfg, metadata, model_filename, obs_filename):
    """Make a figure showing four maps and the other shows a scatter plot.

    The four pane image is a latitude vs longitude figures showing:

    * Top left: model
    * Top right: observations
    * Bottom left: model minus observations
    * Bottom right: model over observations

    Parameters
    ----------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    metadata: dict
        the input files dictionairy
    model_filename: str
        the preprocessed model file.
    obs_filename: str
        the preprocessed observations file.
    """
    filenames = {'model': model_filename, 'obs': obs_filename}
    logger.debug('make_model_vs_obs_plots filenames: %s', filenames)
    # ####
    # Load the data for each layer as a separate cube
    input_file = None
    layers = {}
    cubes = {}
    for model_type, input_file in filenames.items():
        logger.debug('loading: \t%s, \t%s', model_type, input_file)
        cube = iris.util.squeeze(iris.load_cube(input_file))
        cube = diagtools.bgc_units(cube, metadata[input_file]['short_name'])
        cubes[model_type] = diagtools.make_cube_layer_dict(cube)
        for layer in cubes[model_type]:
            layers[layer] = True

    logger.debug('layers: %s', layers)
    logger.debug('cubes: %s', ', '.join(cubes.keys()))

    # ####
    # load names:
    model = metadata[filenames['model']]['dataset']
    obs = metadata[filenames['obs']]['dataset']

    long_name = cubes['model'][list(layers.keys())[0]].long_name
    units = str(cubes['model'][list(layers.keys())[0]].units)

    # Load image format extension
    image_extension = diagtools.get_image_format(cfg)

    # Make a plot for each layer
    for layer in layers:

        fig = plt.figure()
        fig.set_size_inches(9, 6)

        # Create the cubes
        cube221 = cubes['model'][layer]
        cube222 = cubes['obs'][layer]
        cube223 = cubes['model'][layer] - cubes['obs'][layer]
        cube224 = cubes['model'][layer] / cubes['obs'][layer]

        # create the z axis for plots 2, 3, 4.
        extend = 'neither'
        zrange12 = diagtools.get_cube_range([cube221, cube222])
        if 'maps_range' in metadata[input_file]:
            zrange12 = metadata[input_file]['maps_range']
            extend = 'both'
        zrange3 = diagtools.get_cube_range_diff([cube223])
        if 'diff_range' in metadata[input_file]:
            zrange3 = metadata[input_file]['diff_range']
            extend = 'both'

        cube224.data = np.ma.clip(cube224.data, 0.1, 10.)

        n_points = 12
        linspace12 = np.linspace(zrange12[0],
                                 zrange12[1],
                                 n_points,
                                 endpoint=True)
        linspace3 = np.linspace(zrange3[0],
                                zrange3[1],
                                n_points,
                                endpoint=True)
        logspace4 = np.logspace(-1., 1., 12, endpoint=True)

        # Add the sub plots to the figure.
        add_map_subplot(221,
                        cube221,
                        linspace12,
                        cmap='viridis',
                        title=model,
                        extend=extend)
        add_map_subplot(222,
                        cube222,
                        linspace12,
                        cmap='viridis',
                        title=' '.join([obs]),
                        extend=extend)
        add_map_subplot(223,
                        cube223,
                        linspace3,
                        cmap='bwr',
                        title=' '.join([model, 'minus', obs]),
                        extend=extend)
        if np.min(zrange12) > 0.:
            add_map_subplot(224,
                            cube224,
                            logspace4,
                            cmap='bwr',
                            title=' '.join([model, 'over', obs]),
                            log=True)

        caption = f'{long_name} [{units}]'
        # Add overall title
        fig.suptitle(caption, fontsize=14)

        # Determine image filename:
        fn_list = ['model_vs_obs', long_name, model, obs, str(layer), 'maps']
        path = diagtools.folder(cfg['plot_dir']) + '_'.join(fn_list)
        path = path.replace(' ', '') + image_extension

        # Saving files:
        logger.info('Saving plots to %s', path)
        plt.savefig(path, dpi=200)

        plt.close()

        provenance_record = _prepare_provenance_record(
            cfg,
            caption=caption,
            statistics=['mean', 'clim', 'diff'],
            domain=['global'],
            plot_type=['map'],
            ancestors=[model_filename, obs_filename],
        )

        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(path, provenance_record)


[docs]def rounds_sig(value, sig=3):
    """Round a float to sig significant digits & return it as a string.

    Parameters
    ----------
    value: float
       The float that is to be rounded.
    sig: int
        The number of significant figures.

    Returns
    ----------
    str:
        The rounded output string.
    """
    if value == 0.:
        return str(0.)
    if value < 0.:
        value = abs(value)
        return str(-1. *
                   round(value, sig - int(math.floor(math.log10(value))) - 1))
    return str(round(value, sig - int(math.floor(math.log10(value))) - 1))


[docs]def add_linear_regression(plot_axes,
                          arr_x,
                          arr_y,
                          showtext=True,
                          add_diagonal=False,
                          extent=None):
    """Add a straight line fit to an axis.

    Parameters
    ----------
    plot_axes: matplotlib.pyplot.axes
        The matplotlib axes on which to plot the linear regression.
    arr_x: numpy.array
        The data for the x coordinate.
    arr_y: numpy array
        The data for the y coordinate.
    showtext: bool
        A flag to turn on or off the result of the fit on the plot.
    add_diagonal: bool
        A flag to also add the 1:1 diagonal line to the figure
    extent: list of floats
        The extent of the plot axes.
    """
    beta1, beta0, r_value, p_value, std_err = linregress(arr_x, arr_y)
    texts = [
        r'$\^\beta_0$ = ' + rounds_sig(beta0),
        r'$\^\beta_1$ = ' + rounds_sig(beta1), r'R = ' + rounds_sig(r_value),
        r'P = ' + rounds_sig(p_value), r'N = ' + str(int(len(arr_x)))
    ]
    thetext = '\n'.join(texts)

    if showtext:
        plt.text(0.04,
                 0.96,
                 thetext,
                 horizontalalignment='left',
                 verticalalignment='top',
                 transform=plot_axes.transAxes)

    if extent is None:
        x_values = np.arange(arr_x.min(), arr_x.max(),
                             (arr_x.max() - arr_x.min()) / 20.)
        y_values = [beta0 + beta1 * a for a in x_values]
    else:
        minv = min(extent)
        maxv = max(extent)
        x_values = np.arange(minv, maxv, (maxv - minv) / 1000.)
        y_values = np.array([beta0 + beta1 * a for a in x_values])

        mask = (x_values < minv) + (y_values < minv) \
            + (x_values > maxv) + (y_values > maxv)
        x_values = np.ma.masked_where(mask, x_values)
        y_values = np.ma.masked_where(mask, y_values)

    plt.plot(x_values, y_values, 'k')

    if add_diagonal:
        axis = plt.gca().axis()
        step = (max(axis) - min(axis)) / 100.
        one_to_one = np.arange(min(axis), max(axis) + step, step)
        plt.plot(one_to_one, one_to_one, 'k--')


[docs]def make_scatter(cfg, metadata, model_filename, obs_filename):
    """Makes Scatter plots of model vs observational data.

    Make scatter plot showing the matched model and observational data with the
    model data as the x-axis coordinate and the observational data as the
    y-axis coordinate. A linear regression is also applied to the matched
    data and the result of the fit is shown on the figure.

    Parameters
    ----------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    metadata: dict
        the input files dictionairy
    model_filename: str
        the preprocessed model file.
    obs_filename: str
        the preprocessed observations file.
    """
    filenames = {'model': model_filename, 'obs': obs_filename}
    logger.debug('make_model_vs_obs_plots: \t%s', filenames)
    # ####
    # Load the data for each layer as a separate cube
    layers = {}
    cubes = {}
    for model_type, input_file in filenames.items():
        logger.debug('loading: \t%s, \t%s', model_type, input_file)
        cube = iris.load_cube(input_file)
        cube = diagtools.bgc_units(cube, metadata[input_file]['short_name'])
        cubes[model_type] = diagtools.make_cube_layer_dict(cube)
        for layer in cubes[model_type]:
            layers[layer] = True

    logger.debug('layers: %s', layers)
    logger.debug('cubes: %s', ', '.join(cubes))

    # ####
    # load names:
    model = metadata[filenames['model']]['dataset']
    obs = metadata[filenames['obs']]['dataset']

    long_name = cubes['model'][list(layers.keys())[0]].long_name

    # Load image format extension
    image_extension = diagtools.get_image_format(cfg)

    # Make a plot for each layer
    for layer in layers:

        fig = plt.figure()
        fig.set_size_inches(7, 6)

        # Create the cubes
        model_data = cubes['model'][layer].data.squeeze()
        obs_data = cubes['obs'][layer].data.squeeze()

        mask = np.ma.getmask(model_data) + np.ma.getmask(obs_data)
        model_data = np.ma.masked_where(mask, model_data).compressed()
        obs_data = np.ma.masked_where(mask, obs_data).compressed()

        colours = 'gist_yarg'
        zrange = diagtools.get_array_range([model_data, obs_data])
        plotrange = [zrange[0], zrange[1], zrange[0], zrange[1]]

        x_scale = 'log'
        if np.min(zrange) * np.max(zrange) < -1:
            x_scale = 'linear'
        if np.min(zrange) < 0.:
            logger.info('Skip scatter for %s. Min is < 0', long_name)
            return

        plt.hexbin(
            model_data,
            obs_data,
            xscale=x_scale,
            yscale=x_scale,
            bins='log',
            # extent=np.log10(plotrange),
            gridsize=50,
            cmap=plt.get_cmap(colours),
            mincnt=0)
        cbar = plt.colorbar()
        cbar.set_label('log10(N)')

        plt.gca().set_aspect("equal")
        plt.axis(plotrange)

        add_linear_regression(plt.gca(),
                              model_data,
                              obs_data,
                              showtext=True,
                              add_diagonal=True,
                              extent=plotrange)

        plt.title(long_name)
        plt.xlabel(model)
        plt.ylabel(obs)

        # Determine image filename:
        fn_list = [
            'model_vs_obs', long_name, model, obs,
            str(layer), 'scatter'
        ]
        path = diagtools.folder(cfg['plot_dir']) + '_'.join(fn_list)
        path = path.replace(' ', '') + image_extension

        # Saving files:
        logger.info('Saving plots to %s', path)
        plt.savefig(path)

        plt.close()

        provenance_record = _prepare_provenance_record(
            cfg,
            caption=long_name,
            statistics=['mean', 'clim', 'diff'],
            domain=['global'],
            plot_type=['scatter'],
            ancestors=[model_filename, obs_filename],
        )

        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(path, provenance_record)


def _prepare_provenance_record(cfg, **provenance_record):
    recipe_path = Path(cfg['run_dir']).parents[1] / cfg['recipe']
    with recipe_path.open() as recipe_file:
        recipe = yaml.safe_load(recipe_file)

    doc = recipe['documentation']
    authors = doc.get('authors', [])
    authors += [
        maintainer for maintainer in doc.get('maintainer', [])
        if maintainer not in authors
    ]
    provenance_record['authors'] = authors
    for key in ['title', 'description', 'projects']:
        val = doc[key]
        if val:
            provenance_record[key] = val
    for key in ['realms', 'themes']:
        val = cfg.get(key)
        if val:
            provenance_record[key] = val
    return provenance_record


[docs]def main(cfg):
    """Load the config file, and send it to the plot maker.

    Parameters
    ----------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    """
    for index, metadata_filename in enumerate(cfg['input_files']):
        logger.info(
            'metadata filename:\t%s, %s',
            index,
            metadata_filename,
        )
        metadatas = diagtools.get_input_files(cfg, index=index)

        model_type = 'observational_dataset'
        logger.debug(
            'model_type: %s, %s',
            index,
            model_type,
        )
        logger.debug(
            'metadatas:  %s, %s',
            index,
            metadatas,
        )
        obs_filename = diagtools.match_model_to_key('observational_dataset',
                                                    cfg[model_type], metadatas)
        for filename in sorted(metadatas.keys()):

            if filename == obs_filename:
                continue
            if not os.path.exists(obs_filename):
                continue
            logger.info('-----------------')
            logger.info(
                'model filenames:\t%s',
                filename,
            )

            # #####
            # model vs obs scatter plots
            make_scatter(cfg, metadatas, filename, obs_filename)

            # #####
            # model vs obs map plots
            make_model_vs_obs_plots(cfg, metadatas, filename, obs_filename)
    logger.info('Success')


if __name__ == '__main__':
    with run_diagnostic() as config:
        main(config)