"""Apply automatic fixes for known errors in cmorized data.
All functions in this module will work even if no fixes are available
for the given dataset. Therefore is recommended to apply them to all
variables to be sure that all known errors are fixed.
"""
import logging
from collections import defaultdict
from pathlib import Path
from iris.cube import CubeList
from ._fixes.fix import Fix
from .check import CheckLevels, _get_cmor_checker
logger = logging.getLogger(__name__)
[docs]def fix_file(
file: Path,
short_name: str,
project: str,
dataset: str,
mip: str,
output_dir: Path,
add_unique_suffix: bool = False,
**extra_facets,
) -> Path:
"""Fix files before ESMValTool can load them.
This fixes are only for issues that prevent iris from loading the cube or
that cannot be fixed after the cube is loaded.
Original files are not overwritten.
Parameters
----------
file: Path
Path to the original file.
short_name: str
Variable's short name.
project: str
Project of the dataset.
dataset: str
Name of the dataset.
mip: str
Variable's MIP.
output_dir: Path
Output directory for fixed files.
add_unique_suffix: bool, optional (default: False)
Adds a unique suffix to `output_dir` for thread safety.
**extra_facets: dict, optional
Extra facets are mainly used for data outside of the big projects like
CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`.
Returns
-------
Path:
Path to the fixed file.
"""
# Update extra_facets with variable information given as regular arguments
# to this function
extra_facets.update({
'short_name': short_name,
'project': project,
'dataset': dataset,
'mip': mip,
})
for fix in Fix.get_fixes(project=project,
dataset=dataset,
mip=mip,
short_name=short_name,
extra_facets=extra_facets):
file = fix.fix_file(
file, output_dir, add_unique_suffix=add_unique_suffix
)
return file
def _get_single_cube(cube_list, short_name, project, dataset):
if len(cube_list) == 1:
return cube_list[0]
cube = None
for raw_cube in cube_list:
if raw_cube.var_name == short_name:
cube = raw_cube
break
if not cube:
raise ValueError(
f'More than one cube found for variable {short_name} in '
f'{project}:{dataset} but none of their var_names match the '
f'expected.\nFull list of cubes encountered: {cube_list}'
)
logger.warning(
'Found variable %s in %s:%s, but there were other present in '
'the file. Those extra variables are usually metadata '
'(cell area, latitude descriptions) that was not saved '
'according to CF-conventions. It is possible that errors appear '
'further on because of this. \nFull list of cubes encountered: %s',
short_name, project, dataset, cube_list)
return cube
[docs]def fix_data(cube,
short_name,
project,
dataset,
mip,
frequency=None,
check_level=CheckLevels.DEFAULT,
**extra_facets):
"""Fix cube data if fixes add present and check it anyway.
This method assumes that metadata is already fixed and checked.
This method collects all the relevant fixes for a given variable, applies
them and checks resulting cube (or the original if no fixes were
needed) metadata to ensure that it complies with the standards of its
project CMOR tables.
Parameters
----------
cube: iris.cube.Cube
Cube to fix.
short_name: str
Variable's short name.
project: str
Project of the dataset.
dataset: str
Name of the dataset.
mip: str
Variable's MIP.
frequency: str, optional
Variable's data frequency, if available.
check_level: CheckLevels
Level of strictness of the checks. Set to default.
**extra_facets: dict, optional
Extra facets are mainly used for data outside of the big projects like
CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`.
Returns
-------
iris.cube.Cube:
Fixed and checked cube.
Raises
------
CMORCheckError
If the checker detects errors in the data that it can not fix.
"""
# Update extra_facets with variable information given as regular arguments
# to this function
extra_facets.update({
'short_name': short_name,
'project': project,
'dataset': dataset,
'mip': mip,
'frequency': frequency,
})
for fix in Fix.get_fixes(project=project,
dataset=dataset,
mip=mip,
short_name=short_name,
extra_facets=extra_facets):
cube = fix.fix_data(cube)
checker = _get_cmor_checker(frequency=frequency,
table=project,
mip=mip,
short_name=short_name,
fail_on_error=False,
automatic_fixes=True,
check_level=check_level)
cube = checker(cube).check_data()
return cube