src.diagnostic module

class src.diagnostic.PodDataFileFormat(value)

Bases: src.util.basic.MDTFEnum

An enumeration.

ANY_NETCDF = 1
ANY_NETCDF_CLASSIC = 2
ANY_NETCDF3 = 3
NETCDF3_CLASSIC = 4
NETCDF_64BIT_OFFSET = 5
NETCDF_64BIT_DATA = 6
ANY_NETCDF4 = 7
NETCDF4_CLASSIC = 8
NETCDF4 = 9
class src.diagnostic._VarlistGlobalSettings(format: src.diagnostic.PodDataFileFormat = <PodDataFileFormat.ANY_NETCDF_CLASSIC>, rename_variables: bool = False, multi_file_ok: bool = False, dimensions_ordered: bool = False)[source]

Bases: object

format: src.diagnostic.PodDataFileFormat = 2
rename_variables: bool = False
multi_file_ok: bool = False
dimensions_ordered: bool = False
class src.diagnostic._VarlistTimeSettings(frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, max_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_duration: str = sentinel.NotSet, max_duration: str = sentinel.NotSet)[source]

Bases: object

frequency: src.util.datelabel.DateFrequency = sentinel.NotSet
min_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet
max_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet
min_duration: str = sentinel.NotSet
max_duration: str = sentinel.NotSet
class src.diagnostic.VarlistSettings(frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, max_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_duration: str = sentinel.NotSet, max_duration: str = sentinel.NotSet, format: src.diagnostic.PodDataFileFormat = <PodDataFileFormat.ANY_NETCDF_CLASSIC>, rename_variables: bool = False, multi_file_ok: bool = False, dimensions_ordered: bool = False)[source]

Bases: src.diagnostic._VarlistGlobalSettings, src.diagnostic._VarlistTimeSettings

Class to describe options affecting all variables requested by this POD. Corresponds to the “data” section of the POD’s settings.jsonc file.

property global_settings
property time_settings
class src.diagnostic.VarlistCoordinateMixin(need_bounds: bool = False)[source]

Bases: object

Base class to describe a single dimension (in the netcdf data model sense) used by one or more variables. Corresponds to list entries in the “dimensions” section of the POD’s settings.jsonc file.

need_bounds: bool = False
class src.diagnostic.VarlistCoordinate(need_bounds: bool = False, standard_name: str = sentinel.Mandatory, units: src.units.Units = sentinel.Mandatory, axis: str = 'OTHER', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = sentinel.Mandatory)[source]

Bases: src.data_model.DMCoordinate, src.diagnostic.VarlistCoordinateMixin

class src.diagnostic.VarlistLongitudeCoordinate(need_bounds: bool = False, standard_name: str = 'longitude', units: src.units.Units = 'degrees_east', axis: str = 'X', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = 'lon', range: tuple = None)[source]

Bases: src.data_model.DMLongitudeCoordinate, src.diagnostic.VarlistCoordinateMixin

range: tuple = None
class src.diagnostic.VarlistLatitudeCoordinate(need_bounds: bool = False, standard_name: str = 'latitude', units: src.units.Units = 'degrees_north', axis: str = 'Y', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = 'lat', range: tuple = None)[source]

Bases: src.data_model.DMLatitudeCoordinate, src.diagnostic.VarlistCoordinateMixin

range: tuple = None
class src.diagnostic.VarlistVerticalCoordinate(need_bounds: bool = False, standard_name: str = sentinel.Mandatory, units: src.units.Units = '1', axis: str = 'Z', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = sentinel.Mandatory, positive: str = sentinel.Mandatory)[source]

Bases: src.data_model.DMVerticalCoordinate, src.diagnostic.VarlistCoordinateMixin

class src.diagnostic.VarlistPlaceholderTimeCoordinate(need_bounds: bool = False, standard_name: str = 'time', units: src.units.Units = '', axis: str = 'T', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = 'time', calendar: str = '', range: Any = None, frequency: Any = '', min_frequency: Any = '', max_frequency: Any = '', min_duration: Any = 'any', max_duration: Any = 'any')[source]

Bases: src.data_model.DMGenericTimeCoordinate, src.diagnostic.VarlistCoordinateMixin

frequency: Any = ''
min_frequency: Any = ''
max_frequency: Any = ''
min_duration: Any = 'any'
max_duration: Any = 'any'
standard_name = 'time'
axis = 'T'
class src.diagnostic.VarlistTimeCoordinate(need_bounds: bool = False, standard_name: str = 'time', units: src.units.Units = sentinel.Mandatory, axis: str = 'T', bounds_var: src.data_model.AbstractDMCoordinateBounds = None, value: Union[int, float] = None, name: str = sentinel.Mandatory, calendar: str = '', range: src.util.datelabel.AbstractDateRange = None, frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, max_frequency: src.util.datelabel.DateFrequency = sentinel.NotSet, min_duration: str = sentinel.NotSet, max_duration: str = sentinel.NotSet)[source]

Bases: src.diagnostic._VarlistTimeSettings, src.data_model.DMTimeCoordinate, src.diagnostic.VarlistCoordinateMixin

class src.diagnostic.VarlistEntryRequirement(value)

Bases: src.util.basic.MDTFEnum

util.MDTFEnum used to track whether the DataSource is required to provide data for the VarlistEntry.

REQUIRED = 1
OPTIONAL = 2
ALTERNATE = 3
AUX_COORDINATE = 4
class src.diagnostic.VarlistEntryStage(value)

Bases: src.util.basic.MDTFIntEnum

util.MDTFIntEnum used to track the stages of processing of a VarlistEntry carried out by the DataSource.

NOTSET = 1
INITED = 2
QUERIED = 3
FETCHED = 4
PREPROCESSED = 5
class src.diagnostic.VarlistEntry(*args, **kwargs)[source]

Bases: src.core.MDTFObjectBase, src.data_model.DMVariable, src.diagnostic._VarlistGlobalSettings, src.util.logs.VarlistEntryLoggerMixin

Class to describe data for a single variable requested by a POD. Corresponds to list entries in the “varlist” section of the POD’s settings.jsonc file.

Two VarlistEntries are equal (as determined by the __eq__ method, which compares fields without compare=False) if they specify the same data product, ie if the same output file from the preprocessor can be symlinked to two different locations.

Attributes
  • use_exact_name – see docs

  • env_var – Name of env var which is set to the variable’s name in the provided dataset.

  • path_variable – Name of env var containing path to local data.

  • dest_path – Path to local data.

  • alternates – List of lists of VarlistEntries.

  • translationcore.TranslatedVarlistEntry, populated by DataSource.

  • data – dict mapping experiment_keys to DataKeys. Populated by DataSource.

use_exact_name: bool = False
env_var: str = ''
path_variable: str = ''
dest_path: str = ''
requirement: src.diagnostic.VarlistEntryRequirement = 1
alternates: list
translation: Any = None
data: util.ConsistentDict
stage: src.diagnostic.VarlistEntryStage = 1
_deactivation_log_level = 20
property _children

Iterable of child objects associated with this object.

property name_in_model
classmethod from_struct(global_settings_d, dims_d, name, parent, **kwargs)[source]

Instantiate from a struct in the varlist section of a POD’s settings.jsonc.

iter_alternates()[source]

Breadth-first traversal of “sets” of alternate VarlistEntries, alternates for those alternates, etc. (“Sets” is in quotes because they’re implemented as lists here, since VarlistEntries aren’t immutable.)

This is a “deep” iterator, yielding alternates of alternates, alternates of those, … etc. until variables with no alternates are encountered or all variables have been yielded. In addition, it yields the “sets” of alternates and not the VarlistEntries themselves.

static alternates_str(alt_list)[source]
debug_str()[source]

String representation with more debugging information.

iter_data_keys(status=None, status_neq=None)[source]

Yield DataKeyBases from v’s data dict, filtering out those DataKeys that have been eliminated via previous failures in fetching or preprocessing.

deactivate_data_key(d_key, exc)[source]

When a DataKey (d_key) has been deactivated during query or fetch, log a message and delete our record of it if we were using it, and deactivate ourselves if we don’t have any viable DataKeys left.

We can’t just use the status attribute on the DataKey, because the VarlistEntry-DataKey relationship is many-to-many.

property local_data

Return sorted list of local file paths corresponding to the selected experiment.

query_attrs(key_synonyms=None)[source]

Returns a dict of attributes relevant for DataSource.query_dataset() (ie, which describe the variable itself and aren’t specific to the MDTF implementation.)

property env_vars

Get env var definitions for:

  • The path to the preprocessed data file for this variable,

  • The name for this variable in that data file,

  • The names for all of this variable’s coordinate axes in that file,

  • The names of the bounds variables for all of those coordinate

    dimensions, if provided by the data.

_abc_impl = <_abc_data object>
class src.diagnostic.Varlist(coords: dataclasses.InitVar = None, contents: dataclasses.InitVar = sentinel.Mandatory)[source]

Bases: src.data_model.DMDataSet

Class to perform bookkeeping for the model variables requested by a single POD.

classmethod from_struct(d, parent)[source]

Parse the “dimensions”, “data” and “varlist” sections of the POD’s settings.jsonc file when instantiating a new Diagnostic() object.

Parameters

d (dict) – Contents of the POD’s settings.jsonc file.

Returns

dict, keys are names of the dimensions in POD’s convention, values are PodDataDimension objects.

find_var(v)[source]

If a variable matching v is already present in the Varlist, return (a reference to) it (so that we don’t try to add duplicates), otherwise return None.

vars
coord_bounds
aux_coords
class src.diagnostic.Diagnostic(*args, **kwargs)[source]

Bases: src.core.MDTFObjectBase, src.util.logs.PODLoggerMixin

Class holding configuration for a diagnostic script. Object attributes are read from entries in the settings section of the POD’s settings.jsonc file upon initialization.

See settings file documentation for documentation on attributes.

long_name: str = ''
description: str = ''
convention: str = 'CF'
realm: str = ''
driver: str = ''
program: str = ''
runtime_requirements: dict
pod_env_vars: util.ConsistentDict
log_file: io.IOBase = None
varlist: src.diagnostic.Varlist = None
preprocessor: Any = None
POD_CODE_DIR = ''
POD_OBS_DATA = ''
POD_WK_DIR = ''
POD_OUT_DIR = ''
_deactivation_log_level = 40
_interpreters = {'.R': 'Rscript', '.ncl': 'ncl', '.py': 'python'}
property _log_name
classmethod from_struct(pod_name, d, parent, **kwargs)[source]

Instantiate a Diagnostic object from the JSON format used in its settings.jsonc file.

classmethod from_config(pod_name, parent)[source]

Usual method of instantiating Diagnostic objects, from the contents of its settings.jsonc file as stored in the ConfigManager.

property _children

Iterable of child objects associated with this object.

child_deactivation_handler(failed_v, failed_v_exc)[source]

Update the status of which VarlistEntries are “active” (not failed somewhere in the query/fetch process) based on new information. If the process has failed for a VarlistEntry, try to find a set of alternate VarlistEntries. If successful, activate them; if not, raise a PodDataError.

close_log_file(log=True)[source]
setup(data_source)[source]

Configuration set by the DataSource on the POD (after the POD is initialized, but before pre-run checks.)

setup_pod_directories()[source]

Check and create directories specific to this POD.

set_entry_point()[source]

Locate the top-level driver script for the POD.

Raises: PodRuntimeError if driver script can’t be found.

set_interpreter()[source]

Determine what executable should be used to run the driver script.

Note

Existence of the program on teh environment’s $PATH isn’t checked until before the POD runs (see src.environment_manager.)

pre_run_setup()[source]

Perform filesystem operations and checks prior to running the POD.

In order, this 1) sets environment variables specific to the POD, 2) creates POD-specific working directories, and 3) checks for the existence of the POD’s driver script.

Note

The existence of data files is checked with data_manager.DataManager.fetchData() and the runtime environment is validated separately as a function of environment_manager.EnvironmentManager.run(). This is because each POD is run in a subprocess (due to the necessity of supporting multiple languages) so the validation must take place in that subprocess.

Raises: PodRuntimeError if requirements

aren’t met. This is re-raised from the set_entry_point() and _check_for_varlist_files() subroutines.

set_pod_env_vars()[source]

Sets all environment variables for the POD: paths and names of each variable and coordinate. Raise a WormKeyError if any of these definitions conflict.

_abc_impl = <_abc_data object>