Source code for esmvaltool.diag_scripts.ocean.diagnostic_tools

"""
Diagnostic tools
================

This module contains several python tools used elsewhere by the ocean
diagnostics package.

This tool is part of the ocean diagnostic tools package in the ESMValTool.

Author: Lee de Mora (PML)
    ledm@pml.ac.uk
"""
import logging
import os
import sys
import iris

import numpy as np
import cftime
import matplotlib.pyplot as plt
import yaml

from esmvaltool.diag_scripts.shared._base import _get_input_data_files

# This part sends debug statements to stdout
logger = logging.getLogger(os.path.basename(__file__))
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))


[docs]def get_obs_projects():
    """
    Return a list of strings with the names of observations projects.

    Please keep this list up to date, or replace it with something more
    sensible.

    Returns
    ---------
    list
        Returns a list of strings of the various types of observational data.
    """
    obs_projects = [
        'obs4mips',
    ]
    return obs_projects


[docs]def folder(name):
    """
    Make a directory out of a string or list or strings.

    Take a string or a list of strings, convert it to a directory style,
    then make the folder and the string.
    Returns folder string and final character is always os.sep. ('/')

    Arguments
    ---------
    name: list or string
        A list of nested directories, or a path to a directory.

    Returns
    ---------
    str
        Returns a string of a full (potentially new) path of the directory.
    """
    sep = os.sep
    if isinstance(name, list):
        name = os.sep.join(name)
    if name[-1] != sep:
        name = name + sep
    if os.path.exists(name) is False:
        os.makedirs(name)
        logger.info('Making new directory:\t%s', str(name))
    return name


[docs]def get_input_files(cfg, index=''):
    """
    Load input configuration file as a Dictionairy.

    Get a dictionary with input files from the metadata.yml files.
    This is a wrappper for the _get_input_data_files function from
    diag_scripts.shared._base.

    Arguments
    ---------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    index: int
        the index of the file in the cfg file.

    Returns
    ---------
    dict
        A dictionairy of the input files and their linked details.
    """
    if isinstance(index, int):
        metadata_file = cfg['input_files'][index]
        with open(metadata_file) as input_file:
            metadata = yaml.safe_load(input_file)
        return metadata
    return _get_input_data_files(cfg)


[docs]def bgc_units(cube, name):
    """
    Convert the cubes into some friendlier units.

    This is because many CMIP standard units are not the standard units
    used by the BGC community (ie, Celsius is prefered over Kelvin, etc.)

    Parameters
    ----------
    cube: iris.cube.Cube
        the opened dataset as a cube.
    name: str
        The string describing the data field.

    Returns
    -------
    iris.cube.Cube
        the cube with the new units.
    """
    new_units = ''
    if name in ['tos', 'thetao']:
        new_units = 'celsius'

    if name in ['no3', ]:
        new_units = 'mmol m-3'

    if name in ['chl', ]:
        new_units = 'mg m-3'

    if name in ['intpp', ]:
        new_units = 'mol m-2 d-1'

    if name in ['fgco2', ]:
        new_units = 'g m-2 d-1'

    if name in ['spco2', 'dpco2', ]:
        new_units = 'uatm'

    if name in ['mfo', 'amoc', 'msftmyz']:
        # sverdrup are 1000000 m3.s-1, but mfo is kg s-1.
        new_units = 'Tg s-1'

    if new_units != '':
        logger.info(' '.join(
            ["Changing units from",
             str(cube.units), 'to', new_units]))
        cube.convert_units(new_units)

    return cube


[docs]def match_model_to_key(
        model_type,
        cfg_dict,
        input_files_dict,
):
    """
    Match up model or observations dataset dictionairies from config file.

    This function checks that the control_model, exper_model and
    observational_dataset dictionairies from the recipe are matched with the
    input file dictionairy in the cfg metadata.

    Arguments
    ---------
    model_type: str
        The string model_type to match (only used in debugging).
    cfg_dict: dict
        the config dictionairy item for this model type, parsed directly from
        the diagnostics/ scripts, part of the recipe.
    input_files_dict: dict
        The input file dictionairy, loaded directly from the get_input_files()
         function, in diagnostics_tools.py.

    Returns
    ---------
    dict
        A dictionairy of the input files and their linked details.
    """
    for input_file, intput_dict in input_files_dict.items():
        intersect_keys = intput_dict.keys() & cfg_dict.keys()
        match = True
        for key in intersect_keys:
            if intput_dict[key] == cfg_dict[key]:
                continue
            match = False
        if match:
            return input_file
    logger.warning("Unable to match model: %s", model_type)
    return ''


[docs]def cube_time_to_float(cube):
    """
    Convert from time coordinate into decimal time.

    Takes an iris time coordinate and returns a list of floats.
    Parameters
    ----------
    cube: iris.cube.Cube
        the opened dataset as a cube.

    Returns
    -------
    list
        List of floats showing the time coordinate in decimal time.

    """
    times = cube.coord('time')
    datetime = guess_calendar_datetime(cube)

    dtimes = times.units.num2date(times.points)
    floattimes = []
    for dtime in dtimes:
        # TODO: it would be better to have a calendar dependent value
        # for daysperyear, as this is not accurate for 360 day calendars.
        daysperyear = 365.25

        try:
            dayofyr = dtime.dayofyr
        except AttributeError:
            time = datetime(dtime.year, dtime.month, dtime.day)
            time0 = datetime(dtime.year, 1, 1, 0, 0)
            dayofyr = (time - time0).days

        floattime = dtime.year + dayofyr / daysperyear + dtime.hour / (
            24. * daysperyear)
        if dtime.hour:
            floattime += dtime.hour / (24. * daysperyear)
        if dtime.minute:
            floattime += dtime.minute / (24. * 60. * daysperyear)
        floattimes.append(floattime)
    return floattimes


[docs]def guess_calendar_datetime(cube):
    """
    Guess the cftime.datetime form to create datetimes.

    Parameters
    ----------
    cube: iris.cube.Cube
        the opened dataset as a cube.

    Returns
    -------
    cftime.datetime
        A datetime creator function from cftime, based on the cube's calendar.
    """
    time_coord = cube.coord('time')

    if time_coord.units.calendar in ['360_day', ]:
        datetime = cftime.Datetime360Day
    elif time_coord.units.calendar in ['365_day', 'noleap']:
        datetime = cftime.DatetimeNoLeap
    elif time_coord.units.calendar in ['julian', ]:
        datetime = cftime.DatetimeJulian
    elif time_coord.units.calendar in ['gregorian', ]:
        datetime = cftime.DatetimeGregorian
    elif time_coord.units.calendar in ['proleptic_gregorian', ]:
        datetime = cftime.DatetimeProlepticGregorian
    else:
        logger.warning('Calendar set to Gregorian, instead of %s',
                       time_coord.units.calendar)
        datetime = cftime.DatetimeGregorian
    return datetime


[docs]def get_decade(coord, value):
    """
    Determine the decade.

    Called by iris.coord_categorisation.add_categorised_coord.
    """
    date = coord.units.num2date(value)
    return date.year - date.year % 10


[docs]def decadal_average(cube):
    """
    Calculate the decadal_average.

    Parameters
    ----------
    cube: iris.cube.Cube
        The input cube

    Returns
    -------
    iris.cube
    """
    iris.coord_categorisation.add_categorised_coord(cube, 'decade', 'time',
                                                    get_decade)
    return cube.aggregated_by('decade', iris.analysis.MEAN)


[docs]def load_thresholds(cfg, metadata):
    """
    Load the thresholds for contour plots from the config files.

    Parameters
    ----------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    metadata: dict
        the metadata dictionairy

    Returns
    -------
    list:
        List of thresholds
    """
    thresholds = set()

    if 'threshold' in cfg:
        thresholds.update(float(cfg['threshold']))

    if 'threshold' in metadata:
        thresholds.update(float(metadata['threshold']))

    if 'thresholds' in cfg:
        thresholds.update([float(thres) for thres in cfg['thresholds']])

    if 'thresholds' in metadata:
        thresholds.update([float(thres) for thres in metadata['thresholds']])

    return sorted(list(thresholds))


[docs]def get_colour_from_cmap(number, total, cmap='jet'):
    """
    Get a colour `number` of `total` from a cmap.

    This function is used when several lines are created evenly along a
    colour map.

    Parameters
    ----------
    number: int, float
        The
    total: int

    cmap: string,  plt.cm
        A colour map, either by name (string) or from matplotlib
    """
    if isinstance(cmap, str):
        cmap = plt.get_cmap(cmap)

    if number > total:
        raise ValueError('The cannot be larger than the total length '
                         'of the list ie: {} > {}'.format(number, total))

    if total > 1:
        colour = cmap(float(number) / float(total - 1.))
    else:
        colour = cmap(0.)
    return colour


[docs]def add_legend_outside_right(plot_details, ax1, column_width=0.1, loc='right'):
    """
    Add a legend outside the plot, to the right.

    plot_details is a 2 level dict,
    where the first level is some key (which is hidden)
    and the 2nd level contains the keys:
    'c': color
    'lw': line width
    'label': label for the legend.
    ax1 is the axis where the plot was drawn.

    Parameters
    ----------
    plot_details: dict
        A dictionary of the plot details (color, linestyle, linewidth, label)
    ax1: matplotlib.pyplot.axes
        The pyplot axes to add the
    column_width: float
        The width of the legend column. This is used to adjust for longer words
        in the legends
    loc: string
       Location of the legend. Options are "right" and "below".

    Returns
    -------
    cftime.datetime
        A datetime creator function from cftime, based on the cube's calendar.

    """
    # ####
    # Create dummy axes:
    legend_size = len(plot_details) + 1
    box = ax1.get_position()
    if loc.lower() == 'right':
        nrows = 25
        ncols = int(legend_size / nrows) + 1
        ax1.set_position([
            box.x0, box.y0, box.width * (1. - column_width * ncols), box.height
        ])

    if loc.lower() == 'below':
        ncols = 4
        nrows = int(legend_size / ncols) + 1
        ax1.set_position([
            box.x0, box.y0 + (nrows * column_width), box.width,
            box.height - (nrows * column_width)
        ])

    # Add emply plots to dummy axis.
    for index in sorted(plot_details):
        colour = plot_details[index]['c']

        linewidth = plot_details[index].get('lw', 1)

        linestyle = plot_details[index].get('ls', '-')

        label = plot_details[index].get('label', str(index))

        plt.plot([], [], c=colour, lw=linewidth, ls=linestyle, label=label)

    if loc.lower() == 'right':
        legd = ax1.legend(
            loc='center left',
            ncol=ncols,
            prop={'size': 10},
            bbox_to_anchor=(1., 0.5))
    if loc.lower() == 'below':
        legd = ax1.legend(
            loc='upper center',
            ncol=ncols,
            prop={'size': 10},
            bbox_to_anchor=(0.5, -2. * column_width))
    legd.draw_frame(False)
    legd.get_frame().set_alpha(0.)


[docs]def get_image_format(cfg, default='png'):
    """
    Load the image format from the global config file.

    Current tested options are svg, png.

    The cfg is the opened global config.
    The default format is used if no specific format is requested.
    The default is set in the user config.yml
    Individual diagnostics can set their own format which will
    supercede the main config.yml.

    Arguments
    ---------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.

    Returns
    ---------
    str
        The image format extention.
    """
    image_extention = default

    # Load format from config.yml and set it as default
    if 'output_file_type' in cfg:
        image_extention = cfg['output_file_type']

    # Load format from config.yml and set it as default
    if 'image_format' in cfg:
        image_extention = cfg['image_format']

    matplotlib_image_formats = plt.gcf().canvas.get_supported_filetypes()
    if image_extention not in matplotlib_image_formats:
        logger.warning(' '.join([
            'Image format ', image_extention, 'not in matplot:',
            ', '.join(matplotlib_image_formats)
        ]))

    image_extention = '.' + image_extention
    image_extention = image_extention.replace('..', '.')
    return image_extention


[docs]def get_image_path(
        cfg,
        metadata,
        prefix='diag',
        suffix='image',
        metadata_id_list='default',
):
    """
    Produce a path to the final location of the image.

    The cfg is the opened global config,
    metadata is the metadata dictionairy (for the individual dataset file)

    Arguments
    ---------
    cfg: dict
        the opened global config dictionairy, passed by ESMValTool.
    metadata: dict
        The metadata dictionairy for a specific model.
    prefix: str
        A string to prepend to the image basename.
    suffix: str
        A string to append to the image basename
    metadata_id_list: list
        A list of strings to add to the file path. It loads these from the cfg.

    Returns
    ---------
    str
        The ultimate image path

    """
    #####
    if metadata_id_list == 'default':
        metadata_id_list = [
            'project',
            'dataset',
            'mip',
            'exp',
            'ensemble',
            'field',
            'short_name',
            'preprocessor',
            'diagnostic',
            'start_year',
            'end_year',
        ]

    path = folder(cfg['plot_dir'])
    if prefix:
        path += prefix + '_'
    # Check that the keys are in the dict.
    intersection = [va for va in metadata_id_list if va in metadata]
    path += '_'.join([str(metadata[b]) for b in intersection])
    if suffix:
        path += '_' + suffix

    image_extention = get_image_format(cfg)

    if path.find(image_extention) == -1:
        path += image_extention

    path = path.replace(' ', '_')

    logger.info("Image path will be: %s", path)
    return path


[docs]def make_cube_layer_dict(cube):
    """
    Take a cube and return a dictionairy layer:cube

    Each item in the dict is a layer with a separate cube for each layer.
    ie: cubes[depth] = cube from specific layer

    Cubes with no depth component are returned as dict, where the dict key
    is a blank empty string, and the value is the cube.

    Parameters
    ----------
    cube: iris.cube.Cube
        the opened dataset as a cube.

    Returns
    ---------
    dict
        A dictionairy of layer name : layer cube.
    """
    #####
    # Check layering:
    coords = cube.coords()
    layers = []
    for coord in coords:
        if coord.standard_name in ['depth', 'region']:
            layers.append(coord)

    cubes = {}
    if layers == []:
        cubes[''] = cube
        return cubes

    # if len(layers) > 1:
    #     # This field has a strange number of layer dimensions.
    #     # depth and regions?
    #     print(cube)
    #     raise ValueError('This cube has both `depth` & `region` coordinates:'
    #                      ' %s', layers)

    # iris stores coords as a list with one entry:
    layer_dim = layers[0]
    if len(layer_dim.points) in [
            1,
    ]:
        cubes[''] = cube
        return cubes

    if layer_dim.standard_name == 'depth':
        coord_dim = cube.coord_dims('depth')[0]
        for layer_index, layer in enumerate(layer_dim.points):
            slices = [slice(None) for index in cube.shape]
            slices[coord_dim] = layer_index
            cubes[layer] = cube[tuple(slices)]

    if layer_dim.standard_name == 'region':
        coord_dim = cube.coord_dims('region')[0]
        for layer_index, layer in enumerate(layer_dim.points):
            slices = [slice(None) for index in cube.shape]
            slices[coord_dim] = layer_index
            layer = layer.replace('_', ' ').title()
            cubes[layer] = cube[tuple(slices)]
    return cubes


[docs]def get_cube_range(cubes):
    """
    Determinue the minimum and maximum values of a list of cubes.

    Parameters
    ----------
    cubes: list of iris.cube.Cube
        A list of cubes.

    Returns
    ----------
    list:
        A list of two values: the overall minumum and maximum values of the
        list of cubes.

    """
    mins = []
    maxs = []
    for cube in cubes:
        mins.append(cube.data.min())
        maxs.append(cube.data.max())
    return [np.min(mins), np.max(maxs), ]


[docs]def get_cube_range_diff(cubes):
    """
    Determinue the largest deviation from zero in an list of cubes.

    Parameters
    ----------
    cubes: list of iris.cube.Cube
        A list of cubes.

    Returns
    ----------
    list:
        A list of two values: the maximum deviation from zero and its opposite.
    """
    ranges = []
    for cube in cubes:
        ranges.append(np.abs(cube.data.min()))
        ranges.append(np.abs(cube.data.max()))
    return [-1. * np.max(ranges), np.max(ranges)]


[docs]def get_array_range(arrays):
    """
    Determinue the minimum and maximum values of a list of arrays..

    Parameters
    ----------
    arrays: list of numpy.array
        A list of numpy.array.

    Returns
    ----------
    list:
        A list of two values, the overall minumum and maximum values of the
        list of cubes.
    """
    mins = []
    maxs = []
    for arr in arrays:
        mins.append(arr.min())
        maxs.append(arr.max())
    logger.info('get_array_range: %s, %s', np.min(mins), np.max(maxs))
    return [np.min(mins), np.max(maxs), ]