"""Module containing mappings from our names to ESGF names."""
from esmvalcore.esgf._logon import get_connection
FACETS = {
'CMIP3': {
'dataset': 'model',
'ensemble': 'ensemble',
'exp': 'experiment',
'frequency': 'time_frequency',
'short_name': 'variable',
},
'CMIP5': {
'dataset': 'model',
'ensemble': 'ensemble',
'exp': 'experiment',
'mip': 'cmor_table',
'product': 'product',
'short_name': 'variable',
},
'CMIP6': {
'dataset': 'source_id',
'ensemble': 'variant_label',
'exp': 'experiment_id',
'grid': 'grid_label',
'mip': 'table_id',
'short_name': 'variable',
},
'CORDEX': {
'dataset': 'rcm_name',
'driver': 'driving_model',
'domain': 'domain',
'ensemble': 'ensemble',
'exp': 'experiment',
'frequency': 'time_frequency',
'short_name': 'variable',
},
'obs4MIPs': {
'dataset': 'source_id',
'frequency': 'time_frequency',
'short_name': 'variable',
}
}
"""Mapping between the recipe and ESGF facet names."""
DATASET_MAP = {
'CMIP3': {},
'CMIP5': {
'ACCESS1-0': 'ACCESS1.0',
'ACCESS1-3': 'ACCESS1.3',
'bcc-csm1-1': 'BCC-CSM1.1',
'bcc-csm1-1-m': 'BCC-CSM1.1(m)',
'CESM1-BGC': 'CESM1(BGC)',
'CESM1-CAM5': 'CESM1(CAM5)',
'CESM1-CAM5-1-FV2': 'CESM1(CAM5.1,FV2)',
'CESM1-FASTCHEM': 'CESM1(FASTCHEM)',
'CESM1-WACCM': 'CESM1(WACCM)',
'CSIRO-Mk3-6-0': 'CSIRO-Mk3.6.0',
'fio-esm': 'FIO-ESM',
'GFDL-CM2p1': 'GFDL-CM2.1',
'inmcm4': 'INM-CM4',
'MRI-AGCM3-2H': 'MRI-AGCM3.2H',
'MRI-AGCM3-2S': 'MRI-AGCM3.2S'
},
'CMIP6': {},
'CORDEX': {},
'obs4MIPs': {},
}
"""Cache for the mapping between recipe/filesystem and ESGF dataset names."""
[docs]def create_dataset_map():
"""Create the DATASET_MAP from recipe datasets to ESGF dataset names.
Run `python -m esmvalcore.esgf.facets` to print an up to date map.
"""
connection = get_connection()
dataset_map = {}
indices = {
'CMIP3': 2,
'CMIP5': 3,
'CMIP6': 3,
'CORDEX': 7,
'obs4MIPs': 2,
}
for project in FACETS:
dataset_map[project] = {}
dataset_key = FACETS[project]['dataset']
ctx = connection.new_context(
project=project,
facets=[dataset_key],
fields=['id'],
latest=True,
)
available_datasets = sorted(ctx.facet_counts[dataset_key])
print(f"The following datasets are available for project {project}:")
for dataset in available_datasets:
print(dataset)
# Figure out the ESGF name of the requested dataset
n_available = len(available_datasets)
for i, dataset in enumerate(available_datasets, 1):
print(f"Looking for dataset name of facet name"
f" {dataset} ({i} of {n_available})")
query = {dataset_key: dataset}
dataset_result = next(iter(ctx.search(batch_size=1, **query)))
print(f"Dataset id: {dataset_result.dataset_id}")
dataset_id = dataset_result.dataset_id
if dataset not in dataset_id:
idx = indices[project]
dataset_alias = dataset_id.split('.')[idx]
print(f"Found dataset name '{dataset_alias}'"
f" for facet '{dataset}',")
dataset_map[project][dataset_alias] = dataset
return dataset_map
if __name__ == '__main__':
# Run this module to create an up to date DATASET_MAP
print(create_dataset_map())