Source code for esmvalcore.esgf.facets

"""Module containing mappings from our names to ESGF names."""

import pyesgf.search

from ..config._esgf_pyclient import get_esgf_config

FACETS = {
    "CMIP3": {
        "dataset": "model",
        "ensemble": "ensemble",
        "exp": "experiment",
        "frequency": "time_frequency",
        "short_name": "variable",
    },
    "CMIP5": {
        "dataset": "model",
        "ensemble": "ensemble",
        "exp": "experiment",
        "frequency": "time_frequency",
        "institute": "institute",
        "mip": "cmor_table",
        "product": "product",
        "short_name": "variable",
    },
    "CMIP6": {
        "activity": "activity_drs",
        "dataset": "source_id",
        "ensemble": "member_id",
        "exp": "experiment_id",
        "institute": "institution_id",
        "grid": "grid_label",
        "mip": "table_id",
        "short_name": "variable",
    },
    "CORDEX": {
        "dataset": "rcm_name",
        "driver": "driving_model",
        "domain": "domain",
        "ensemble": "ensemble",
        "exp": "experiment",
        "frequency": "time_frequency",
        "institute": "institute",
        "product": "product",
        "short_name": "variable",
    },
    "obs4MIPs": {
        "dataset": "source_id",
        "frequency": "time_frequency",
        "institute": "institute",
        "short_name": "variable",
    },
}
"""Mapping between the recipe and ESGF facet names."""

DATASET_MAP = {
    "CMIP3": {},
    "CMIP5": {
        "ACCESS1-0": "ACCESS1.0",
        "ACCESS1-3": "ACCESS1.3",
        "bcc-csm1-1": "BCC-CSM1.1",
        "bcc-csm1-1-m": "BCC-CSM1.1(m)",
        "CESM1-BGC": "CESM1(BGC)",
        "CESM1-CAM5": "CESM1(CAM5)",
        "CESM1-CAM5-1-FV2": "CESM1(CAM5.1,FV2)",
        "CESM1-FASTCHEM": "CESM1(FASTCHEM)",
        "CESM1-WACCM": "CESM1(WACCM)",
        "CSIRO-Mk3-6-0": "CSIRO-Mk3.6.0",
        "fio-esm": "FIO-ESM",
        "GFDL-CM2p1": "GFDL-CM2.1",
        "inmcm4": "INM-CM4",
        "MRI-AGCM3-2H": "MRI-AGCM3.2H",
        "MRI-AGCM3-2S": "MRI-AGCM3.2S",
    },
    "CMIP6": {},
    "CORDEX": {},
    "obs4MIPs": {},
}
"""Cache for the mapping between recipe/filesystem and ESGF dataset names."""


[docs] def create_dataset_map(): """Create the DATASET_MAP from recipe datasets to ESGF dataset names. Run `python -m esmvalcore.esgf.facets` to print an up to date map. """ cfg = get_esgf_config() search_args = dict(cfg["search_connection"]) url = search_args.pop("urls")[0] connection = pyesgf.search.SearchConnection(url=url, **search_args) dataset_map = {} indices = { "CMIP3": 2, "CMIP5": 3, "CMIP6": 3, "CORDEX": 7, "obs4MIPs": 2, } for project in FACETS: dataset_map[project] = {} dataset_key = FACETS[project]["dataset"] ctx = connection.new_context( project=project, facets=[dataset_key], fields=["id"], latest=True, ) available_datasets = sorted(ctx.facet_counts[dataset_key]) print(f"The following datasets are available for project {project}:") for dataset in available_datasets: print(dataset) # Figure out the ESGF name of the requested dataset n_available = len(available_datasets) for i, dataset in enumerate(available_datasets, 1): print( f"Looking for dataset name of facet name" f" {dataset} ({i} of {n_available})" ) query = {dataset_key: dataset} dataset_result = next(iter(ctx.search(batch_size=1, **query))) print(f"Dataset id: {dataset_result.dataset_id}") dataset_id = dataset_result.dataset_id if dataset not in dataset_id: idx = indices[project] dataset_alias = dataset_id.split(".")[idx] print( f"Found dataset name '{dataset_alias}'" f" for facet '{dataset}'," ) dataset_map[project][dataset_alias] = dataset return dataset_map
if __name__ == "__main__": # Run this module to create an up to date DATASET_MAP print(create_dataset_map())