Source code for gwgen.evaluation

# -*- coding: utf-8 -*-
"""Evaluation module of the gwgen module"""
from __future__ import division
import os.path as osp
import six
from collections import namedtuple
from psyplot.compat.pycompat import OrderedDict
from itertools import chain, starmap, repeat
import numpy as np
from scipy import stats
import pandas as pd
from gwgen.utils import docstrings
import gwgen.utils as utils
import logging


[docs]class Evaluator(utils.TaskBase): """Abstract base class for evaluation tasks Evaluation tasks should incorporate a run method that is called by the :meth:`gwgen.main.GWGENOrganizer.evaluate` method""" _registry = [] @property def task_data_dir(self): """The directory where to store data""" return self.eval_dir
_PreparationConfigBase = namedtuple('_PreparationConfigBase', ['setup_raw', 'raw2db', 'raw2csv', 'reference', 'input_path']) _PreparationConfigBase = utils.append_doc( _PreparationConfigBase, docstrings.get_sections(""" Parameters ---------- setup_raw: { 'scratch' | 'file' | 'db' | None } The method how to setup the raw data from GHCN and EECRA ``'scratch'`` To set up the task from the raw data ``'file'`` Set up the task from an existing file ``'db'`` Set up the task from a database ``None`` If the file name of this this task exists, use this one, otherwise a database is provided, use this one, otherwise go from scratch raw2db: bool If True, the raw data from GHCN and EECRA is stored in a postgres database raw2csv: bool If True, the raw data from GHCN and EECRA is stored in a csv file reference: str The path of the file where to store the reference data. If None and not already set in the configuration, it will default to ``'evaluation/reference.csv'`` input_path: str The path of the file where to store the model input. If None, and not already set in the configuration, it will default to ``'inputdir/input.csv'`` where *inputdir* is the path to the input directory (by default, *input* in the experiment directory) """, '_PreparationConfigBase')) PreparationConfig = utils.enhanced_config(_PreparationConfigBase, 'PreparationConfig') @docstrings.dedent
[docs]def default_preparation_config( setup_raw=None, raw2db=False, raw2csv=False, reference=None, input_path=None, *args, **kwargs): """ The default configuration for :class:`EvaluationPreparation` instances. See also the :attr:`EvaluationPreparation.default_config` attribute Parameters ---------- %(PreparationConfig.parameters)s""" return PreparationConfig(setup_raw, raw2db, raw2csv, reference, input_path, *utils.default_config(*args, **kwargs))
[docs]class EvaluationPreparation(Evaluator): """Evaluation task to prepare the evaluation""" name = 'prepare' summary = 'Prepare the for experiment for evaluation' http_inventory = ( 'ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt') _datafile = ['reference.csv', 'input.csv'] dbname = ['reference', 'input'] has_run = False @property def default_config(self): ret = default_preparation_config()._replace( **super(EvaluationPreparation, self).default_config._asdict()) return ret._replace(**dict(reference=self.reference_path, input_path=self.input_path)) @property def datafile(self): """The paths to reference and input file""" return [self.reference_path, self.input_path] @property def ghcnd_inventory_file(self): return osp.join(self.data_dir, 'ghcn', 'ghcnd-inventory.txt') @property def station_list(self): fname = self.ghcnd_inventory_file station_ids, lat, lon, variables, first, last = np.loadtxt( fname, dtype='S11', unpack=True).astype(np.str_) return pd.DataFrame({'id': station_ids, 'lat': lat.astype(float), 'lon': lon.astype(float), 'vname': variables, 'firstyr': first.astype(int), 'lastyr': last.astype(int)}, index=np.arange(len(station_ids))) @property def reference_data(self): """The reference :class:`~pandas.DataFrame`""" return self.data[0] @reference_data.setter def reference_data(self, data): self.data[0] = data @property def input_data(self): """The input :class:`~pandas.DataFrame`""" return self.data[1] @input_data.setter def input_data(self, data): self.data[1] = data @classmethod def _modify_parser(cls, parser): parser.setup_args(default_preparation_config) # disable plot_output, etc. parser, setup_grp, run_grp = super( EvaluationPreparation, cls)._modify_parser(parser) parser.update_arg('setup_raw', short='fr', long='raw-from', group=setup_grp) parser.update_arg('input_path', short='i', long='input', group=setup_grp) parser.update_arg('reference', short='r', group=setup_grp) parser.update_arg('raw2db', group=setup_grp) parser.update_arg('raw2csv', group=setup_grp) return parser, setup_grp, run_grp
[docs] def write2file(self, *args, **kwargs): """Reimplemented to sort the data according to the index""" for data in self.data: data.sort_index(inplace=True) return super(EvaluationPreparation, self).write2file(*args, **kwargs)
[docs] def write2db(self, *args, **kwargs): """Reimplemented to sort the data according to the index""" for data in self.data: data.sort_index(inplace=True) return super(EvaluationPreparation, self).write2db(*args, **kwargs)
[docs] def setup_from_db(self, *args, **kwargs): kwargs['index_col'] = [['id', 'year', 'month', 'day'], # reference ['id', 'lon', 'lat', 'year', 'month']] # input super(EvaluationPreparation, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_file(self, *args, **kwargs): kwargs['index_col'] = [['id', 'year', 'month', 'day'], # reference ['id', 'lon', 'lat', 'year', 'month']] # input super(EvaluationPreparation, self).setup_from_file(*args, **kwargs)
def __init__(self, *args, **kwargs): self._manager = kwargs.pop('manager', None) super(EvaluationPreparation, self).__init__(*args, **kwargs) if self.task_config.to_csv: # update the configuration self.reference_path = self.reference_path self.input_path = self.input_path
[docs] def __reduce__(self): """Reimplemented to give provide also the manager""" ret = list(super(EvaluationPreparation, self).__reduce__()) if len(ret) < 3: ret.append({}) ret[2]['_manager'] = self._manager return tuple(ret)
[docs] def init_from_scratch(self): """Initialize the setup via the parameterization classes""" from gwgen.parameterization import ( Parameterizer, YearlyCompleteDailyCloud, YearlyCompleteDailyGHCNData, YearlyCompleteMonthlyCloud, YearlyCompleteMonthlyGHCNData) classes = [ YearlyCompleteDailyCloud, YearlyCompleteDailyGHCNData, YearlyCompleteMonthlyCloud, YearlyCompleteMonthlyGHCNData] config = self.config.copy() config['paramdir'] = self.eval_dir kws = dict(config=config, project_config=self.project_config, to_csv=self.task_config.raw2csv, to_db=self.task_config.raw2db, setup_from=self.task_config.setup_raw) base_kws = {cls.name: kws for cls in classes} self._manager = Parameterizer.get_manager( config=self.global_config.copy()) self._manager.initialize_tasks(self.stations, task_kws=base_kws) if not self.global_config.get('serial'): import multiprocessing as mp for task in self._manager.tasks: utils._db_locks[task.dbname] = mp.Lock() utils._file_locks[task.datafile] = mp.Lock() # make sure the lat-lon information is available self.download_src()
[docs] def download_src(self, force=False): fname = self.ghcnd_inventory_file if force or not osp.exists(fname): self.logger.info("Downloading station file from %s to %s", self.http_inventory, fname) utils.download_file(self.http_inventory, fname)
[docs] def setup_from_scratch(self): from gwgen.parameterization import ( YearlyCompleteDailyCloud, YearlyCompleteDailyGHCNData, YearlyCompleteMonthlyCloud, YearlyCompleteMonthlyGHCNData) def get(name): return next(t for t in tasks if t.name == name) # force serial setup because we might already be in a parallel setup self._manager.config['serial'] = True self._manager.setup(self.stations, to_return='all') tasks = self._manager.tasks # save in the right order for the FORTRAN code order = ['tmin', 'tmax', 'mean_cloud', 'wind', 'prcp', 'wet_day'] # daily reference cday = get(YearlyCompleteDailyGHCNData.name).data ccday = get(YearlyCompleteDailyCloud.name).data try: reference = cday.merge( ccday[['mean_cloud', 'wind']], left_index=True, right_index=True, how='left') except TypeError: # indices to not match reference = cday.ix[1:0] # create empty data frame reference['mean_cloud'] = np.array([], dtype=ccday.mean_cloud.dtype) reference['wind'] = np.array([], dtype=ccday.wind.dtype) reference['wet_day'] = (reference.prcp > 0).astype(int) self.reference_data = reference[order].sort_index() # monthly input cmonth = get(YearlyCompleteMonthlyGHCNData.name).data ccmonth = get(YearlyCompleteMonthlyCloud.name).data try: exp_input = cmonth.merge( ccmonth[['mean_cloud', 'wind']], left_index=True, right_index=True, how='left') # set cloud and wind to 0 where we have no reference exp_input.ix[exp_input.mean_cloud.isnull(), ['mean_cloud', 'wind']] = 0 except TypeError: # indices do not match exp_input = cmonth.ix[1:0] # create empty data frame exp_input['mean_cloud'] = np.array([], dtype=ccmonth.mean_cloud.dtype) exp_input['wind'] = np.array([], dtype=ccmonth.wind.dtype) inventory = self.station_list exp_input = exp_input.reset_index().merge( inventory[inventory.vname == 'PRCP'][['id', 'lon', 'lat']], on='id') exp_input.set_index(['id', 'lon', 'lat', 'year', 'month'], inplace=True) self.input_data = exp_input[order].sort_index()
[docs]class OutputTask(Evaluator): """Task to provide all the data for input and output""" # the last will be ignored! _datafile = 'output.csv' dbname = 'output' name = 'output' summary = 'Load the output of the model' has_run = False @property def datafile(self): return [self.output_path]
[docs] def write2file(self, *args, **kwargs): """Not implemented since the output file is generated by the model!""" self.logger.warn( "Writing to file of %s task is disabled because this is done " "by the model!", self.name)
[docs] def setup_from_db(self, *args, **kwargs): kwargs['index_col'] = ['id', 'year', 'month', 'day'] super(OutputTask, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_file(self, *args, **kwargs): kwargs['index_col'] = ['id', 'year', 'month', 'day'] super(OutputTask, self).setup_from_file(*args, **kwargs)
[docs] def setup_from_scratch(self): raise ValueError( "Cannot set up the %s task from scratch because it requires the " "output of the model!")
_KSConfig = namedtuple('_KSConfig', ['no_rounding', 'names', 'transform_wind']) _KSConfig = utils.append_doc(_KSConfig, docstrings.get_sections(""" Parameters ---------- no_rounding: bool Do not round the simulation to the infered precision of the reference. The inferred precision is the minimum difference between two values with in the entire data names: list of str The list of variables use for calculation. If None, all variables will be used transform_wind: bool If True, the square root of the wind is evaluated (as this is also simulated in the weather generator)""", '_KSConfig')) KSConfig = utils.enhanced_config(_KSConfig, 'KSConfig') @docstrings.dedent
[docs]def default_ks_config( no_rounding=False, names=None, transform_wind=False, *args, **kwargs): """ The default configuration for :class:`KSEvaluation` instances. See also the :attr:`KSEvaluation.default_config` attribute Parameters ---------- %(KSConfig.parameters)s""" return KSConfig(no_rounding, names, transform_wind, *utils.default_config(*args, **kwargs))
_QuantileConfig = namedtuple( '_QuantileConfig', ['quantiles'] + list(_KSConfig._fields)) _QuantileConfig = utils.append_doc(_QuantileConfig, docstrings.get_sections( docstrings.dedents(""" Parameters ---------- %(_KSConfig.parameters)s quantiles: list of floats The quantiles to use for calculating the percentiles """), '_QuantileConfig')) QuantileConfig = utils.enhanced_config(_QuantileConfig, 'QuantileConfig') @docstrings.dedent
[docs]def default_quantile_config( quantiles=[1, 5, 10, 25, 50, 75, 90, 95, 99, 100], *args, **kwargs): """ The default configuration for :class:`QuantileEvaluation` instances. See also the :attr:`QuantileEvaluation.default_config` attribute Parameters ---------- %(QuantileConfig.parameters)s""" return QuantileConfig(quantiles, *default_ks_config(*args, **kwargs))
[docs]class QuantileEvaluation(Evaluator): """Evaluator to evaluate specific quantiles""" name = 'quants' summary = 'Compare the quantiles of simulation and observation' names = OrderedDict([ ('prcp', {'long_name': 'Precipitation', 'units': 'mm'}), ('tmin', {'long_name': 'Min. Temperature', 'units': 'degC'}), ('tmax', {'long_name': 'Max. Temperature', 'units': 'degC'}), ('mean_cloud', {'long_name': 'Cloud fraction', 'units': '-'}), ('wind', {'long_name': 'Wind Speed', 'units': 'm/s'}) ]) @property def all_variables(self): return [[v + '_ref', v + '_sim'] for v in self.names] setup_requires = ['prepare', 'output'] has_run = True _datafile = 'quantile_evaluation.csv' dbname = 'quantile_evaluation' #: default formatoptions for the #: :class:`psyplot.plotter.linreg.DensityRegPlotter` plotter fmt = kwargs = dict( legend={'loc': 'upper left'}, cmap='w_Reds', title='%(pctl)sth percentile', xlabel='%(type)s {desc}', ylabel='%(type)s {desc}', xrange=(['minmax', 1], ['minmax', 99]), yrange=(['minmax', 1], ['minmax', 99]), legendlabels=['$R^2$ = %(rsquared)s'], bounds=['minmax', 11, 0, 99], cbar='', bins=10, ideal=[0, 1], id_color='r', sym_lims='max', ) @property def default_config(self): return default_quantile_config()._replace( **super(QuantileEvaluation, self).default_config._asdict()) @property def ds(self): """The dataset of the quantiles""" import xarray as xr data = self.data.reset_index() ds = xr.Dataset.from_dataframe( data.set_index('pctl', append=True).swaplevel()) full = xr.Dataset.from_dataframe(data).drop(list(chain( self.data.index.names, ['pctl']))) idx_name = full[next(v for v in self.names) + '_sim'].dims[-1] full.rename({idx_name: 'full_index'}, inplace=True) for vref, vsim in self.all_variables: full.rename({vref: 'all_' + vref, vsim: 'all_' + vsim}, inplace=True) ds.merge(full, inplace=True) for orig, attrs, (vref, vsim) in zip( self.names, self.names.values(), self.all_variables): for prefix in ['', 'all_']: ds[prefix + vsim].attrs.update(attrs) ds[prefix + vref].attrs.update(attrs) ds[prefix + vsim].attrs['standard_name'] = orig ds[prefix + vref].attrs['standard_name'] = orig ds[prefix + vref].attrs['type'] = 'observed' ds[prefix + vsim].attrs['type'] = 'simulated' ds['all_' + vsim].attrs['pctl'] = 'All' ds['all_' + vsim].attrs['pctl'] = 'All' ds.pctl.attrs['long_name'] = 'Percentile' return ds def __init__(self, *args, **kwargs): super(QuantileEvaluation, self).__init__(*args, **kwargs) names = self.task_config.names if names is not None: self.names = OrderedDict( t for t in self.names.items() if t[0] in names)
[docs] def setup_from_file(self, *args, **kwargs): kwargs['index_col'] = ['id', 'year'] return super(QuantileEvaluation, self).setup_from_file(*args, **kwargs)
[docs] def setup_from_db(self, *args, **kwargs): kwargs['index_col'] = ['id', 'year'] return super(QuantileEvaluation, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_scratch(self): df_ref = self.prepare.reference_data dfi = self.prepare.input_data.reset_index(['lon', 'lat']) # create simulation dataframe df_sim = self.output.data if len(df_ref) == 0 or len(df_sim) == 0: self.logger.debug( 'Skipping %s because reference data contains no information!', self.name) return names = self.names # load observed precision if self.task_config.no_rounding: for name in names: df_sim[name].values[:] = self.round_to_ref_prec( df_ref[name].values, df_sim[name].values) # merge reference and simulation into one single dataframe df = df_ref.merge(df_sim, left_index=True, right_index=True, suffixes=['_ref', '_sim']) if {'mean_cloud', 'wind'}.intersection(names): df.reset_index('day', inplace=True) df = df.merge(dfi[['mean_cloud', 'wind']], left_index=True, right_index=True) # mask out non-complete months for cloud validation and months with # 0 or 1 cloud fraction if 'mean_cloud' in names: df.ix[df['mean_cloud_ref'].isnull().values | (df['mean_cloud'] == 0.0) | (df['mean_cloud'] == 1.0), ['mean_cloud_sim', 'mean_cloud_ref']] = np.nan # mask out non-complete wind for wind validation and months with # a mean wind speed of 0 if 'wind' in names: df.ix[df['wind_ref'].isnull().values | (df['wind'] == 0.0), ['wind_sim', 'wind_ref']] = np.nan df.drop(['mean_cloud', 'wind'], 1, inplace=True) df.set_index('day', append=True, inplace=True) # transform wind if self.task_config.transform_wind and 'wind' in names: df['wind_ref'] **= 0.5 df['wind_sim'] **= 0.5 # calculate the percentiles for each station and month g = df.sort_index().groupby(level=['id', 'year']) self.logger.debug('Done with basic setup') data = g.apply(self.calc) if len(data): data.index = data.index.droplevel(2) self.data = data
@classmethod def _modify_parser(cls, parser): parser.setup_args(default_ks_config) parser.setup_args(default_quantile_config) parser, setup_grp, run_grp = super( QuantileEvaluation, cls)._modify_parser(parser) parser.update_arg( 'quantiles', short='q', group=run_grp, type=utils.str_ranges, metavar='f1[,f21[-f22[-f23]]]', help=docstrings.dedents(""" The quantiles to use for calculating the percentiles. %(str_ranges.s_help)s.""")) parser.pop_key('quantiles', 'nargs', None) parser.update_arg('no_rounding', short='nr', group=run_grp) parser.update_arg('names', short='n', group=setup_grp, nargs='+', metavar='variable', choices=list(cls.names)) parser.update_arg('transform_wind', short='tw', group=setup_grp) return parser, setup_grp, run_grp
[docs] def create_project(self, ds): import psyplot.project as psy import seaborn as sns sns.set_style('white') for name, (vref, vsim) in zip(self.names, self.all_variables): self.logger.debug('Creating plots of %s', vsim) kwargs = dict(precision=0.1) if vref.startswith('prcp') else {} psy.plot.densityreg(ds, name='all_' + vsim, coord='all_' + vref, fmt=self.fmt, title='All percentiles', arr_names=['%s_all' % name], **kwargs) arr_names = ['%s_%1.2f' % (name, p) for p in ds.pctl.values] psy.plot.densityreg(ds, name=vsim, coord=vref, fmt=self.fmt, arr_names=arr_names, pctl=range(ds.pctl.size), **kwargs) return psy.gcp(True)[:]
[docs] def make_run_config(self, sp, info): for orig in self.names: info[orig] = d = OrderedDict() for plotter in sp(standard_name=orig).plotters: d[plotter.data.pctl if plotter.data.name.startswith('all') else int(plotter.data.pctl.values)] = pctl_d = OrderedDict() for key in ['rsquared', 'slope', 'intercept']: val = plotter.plot_data[1].attrs.get(key) if val is not None: pctl_d[key] = float(val) return info
[docs] def calc(self, group): def calc_percentiles(vname): arr = group[vname].values arr = arr[~np.isnan(arr)] if vname.startswith('prcp'): arr = arr[arr > 0] if len(arr) == 0: return np.array([np.nan] * len(quantiles)) else: return np.percentile(arr, quantiles) quantiles = self.task_config.quantiles df = pd.DataFrame.from_dict(dict(zip( chain(*self.all_variables), map(calc_percentiles, chain(*self.all_variables))))) df['pctl'] = quantiles df.set_index('pctl') return df
@staticmethod
[docs] def round_to_ref_prec(ref, sim, func=np.ceil): """Round one array to the precision of another Parameters ---------- ref: np.ndarray The reference array to get the precision from sim: np.ndarray The simulated array to round func: function The rounding function to use Returns ------- np.ndarray Rounded `sim`""" ref_sorted = np.unique(ref) if len(ref_sorted) < 2: return sim precision = (ref_sorted[1:] - ref_sorted[:-1]).min() return func((sim / precision) * precision)
[docs]class KSEvaluation(QuantileEvaluation): """Evaluation using a Kolmogorov-Smirnoff test""" name = 'ks' summary = 'Perform a kolmogorov smirnoff test' requires = ['prepare', 'output'] _datafile = 'kolmogorov_evaluation.csv' dbname = 'kolmogorov_evaluation' @property def default_config(self): return default_ks_config()._replace( **super(QuantileEvaluation, self).default_config._asdict()) @staticmethod
[docs] def calc(group): def calc(v1, v2, name): if len(v1) <= 10 or len(v2) <= 10: return { name + '_stat': [np.nan], name + '_p': [np.nan], name: [None], 'n' + name + '_sim': [np.nan], 'n' + name + '_ref': [np.nan]} statistic, p_value = stats.ks_2samp(v1, v2) n1 = len(v1) n2 = len(v2) n = np.sqrt((n1 + n2) / (n1 * n2)) # if statistic > 1.36 * n, we reject the null hypothesis # (alpha = 0.05) return { name + '_stat': [statistic], name + '_p': [p_value], name: [statistic > 1.36 * n], 'n' + name + '_sim': [n1], 'n' + name + '_ref': [n2]} prcp_sim = group.prcp_sim.values[group.prcp_sim.values > 0] prcp_ref = group.prcp_ref.values[group.prcp_ref.values > 0] tmin_sim = group.tmin_sim.values[ (group.tmin_sim.values < 100) & (group.tmin_sim.values > -100) & (~np.isnan(group.tmin_sim.values))] tmin_ref = group.tmin_ref.values[ (group.tmin_ref.values < 100) & (group.tmin_ref.values > -100) & (~np.isnan(group.tmin_ref.values))] tmax_sim = group.tmax_sim.values[ (group.tmax_sim.values < 100) & (group.tmax_sim.values > -100) & (~np.isnan(group.tmax_sim.values))] tmax_ref = group.tmax_ref.values[ (group.tmax_ref.values < 100) & (group.tmax_ref.values > -100) & (~np.isnan(group.tmax_ref.values))] cloud_sl = group.mean_cloud_ref.notnull().values cloud_sim = group.mean_cloud_sim.values[cloud_sl] cloud_ref = group.mean_cloud_ref.values[cloud_sl] wind_sl = group.wind_ref.notnull().values wind_sim = group.wind_sim.values[wind_sl] wind_ref = group.wind_ref.values[wind_sl] return pd.DataFrame.from_dict(dict(chain(*map(six.iteritems, starmap( calc, [(prcp_sim, prcp_ref, 'prcp'), (tmin_sim, tmin_ref, 'tmin'), (tmax_sim, tmax_ref, 'tmax'), (cloud_sim, cloud_ref, 'mean_cloud'), (wind_sim, wind_ref, 'wind')])))))
[docs] def significance_fractions(self, series): "The percentage of stations with no significant difference" return 100. - (len(series.ix[series.notnull() & (series)]) / series.count())*100.
[docs] def run(self, info): """Run the evaluation Parameters ---------- info: dict The configuration dictionary""" logger = self.logger logger.info('Calculating %s evaluation', self.name) df = self.data names = list(self.names) for name in names: info[name] = float(self.significance_fractions(df[name])) if logger.isEnabledFor(logging.DEBUG): logger.debug('Done. Stations without significant difference:') for name, val in info.items(): logger.debug(' %s: %6.3f %%' % (name, val)) try: import cartopy.crs as ccrs except ImportError: self.logger.warn( "Cartopy is not installed, skipping plot of %s task", self.name) else: self.plot_map() info['plot_file'] = self.pdf_file
[docs] def plot_map(self): from matplotlib.backends.backend_pdf import PdfPages import cartopy.crs as ccrs import matplotlib.pyplot as plt names = list(self.names) df = self.data[names] for n in names: df[n] = df.pop(n).astype(float) g = df.groupby(level='id') df_fract = g.agg(dict(zip( names, repeat(self.significance_fractions)))) df_fract = df_fract.merge( g.agg(dict(zip(names, repeat('sum')))), left_index=True, right_index=True, suffixes=['', '_sum']) df_lola = EvaluationPreparation.from_task(self).station_list df_lola = df_lola.ix[~df_lola.duplicated('id').values] df_lola.set_index('id', inplace=True) df_plot = df_lola.merge(df_fract, how='right', left_index=True, right_index=True) pdf = PdfPages(self.pdf_file) for name in names: fig = plt.figure() ax = plt.axes(projection=ccrs.PlateCarree()) ax.set_title(self.names[name]['long_name']) ax.coastlines() ax.background_patch.set_facecolor('0.95') ax.set_global() df_plot.sort_values(name, ascending=False).plot.scatter( 'lon', 'lat', c=name, ax=ax, colorbar=False, transform=ccrs.PlateCarree(), cmap='Reds_r', s=5, edgecolor='none') cbar = fig.colorbar(ax.collections[-1], orientation='horizontal') cbar.set_label( 'Percentage of years not differing significantly[%]') pdf.savefig(fig, bbox_inches='tight') fig = plt.figure() ax = plt.axes(projection=ccrs.PlateCarree()) ax.set_title(self.names[name]['long_name']) ax.coastlines() ax.background_patch.set_facecolor('0.95') ax.set_global() df_plot.sort_values(name, ascending=False).plot.scatter( 'lon', 'lat', c=name + '_sum', ax=ax, colorbar=False, transform=ccrs.PlateCarree(), cmap='Reds', s=5, edgecolor='none') cbar = fig.colorbar(ax.collections[-1], orientation='horizontal') cbar.set_label('Significantly differing years') pdf.savefig(fig, bbox_inches='tight') pdf.close() if self.task_config.close: plt.close('all')
@classmethod def _modify_parser(cls, parser): parser.setup_args(default_ks_config) parser, setup_grp, run_grp = super( QuantileEvaluation, cls)._modify_parser(parser) parser.update_arg('no_rounding', short='nr', group=run_grp) parser.update_arg('names', short='n', group=setup_grp, nargs='+', metavar='variable', choices=list(cls.names)) parser.update_arg('transform_wind', short='tw', group=setup_grp) parser.pop_arg('nc_output', None) parser.pop_arg('project_output', None) parser.pop_arg('new_project', None) parser.pop_arg('project', None) return parser, setup_grp, run_grp
_QualityConfig = namedtuple('_QualityConfig', ['quantiles']) _QualityConfig = utils.append_doc(_QualityConfig, docstrings.get_sections(""" Parameters ---------- quantiles: list of floats The quantiles to use for the quality analysis""", '_QualityConfig')) QualityConfig = utils.enhanced_config(_QualityConfig, 'QualityConfig') @docstrings.dedent
[docs]def default_quality_config( quantiles=None, *args, **kwargs): """ The default configuration for :class:`SimulationQuality` instances. See also the :attr:`SimulationQuality.default_config` attribute Parameters ---------- %(QualityConfig.parameters)s""" return QualityConfig(quantiles, *utils.default_config(*args, **kwargs))
[docs]class SimulationQuality(Evaluator): """Evaluator to provide one value characterizing the quality of the experiment The applied metric is the mean of .. math:: m = \\left<\{\\left<\{R^2_q\}_{q\in Q}\\right>, \\left<\{1 - |1 - a_q|\}_{q\in Q}\\right>, \{ks\}\}\\right> where :math:`\\left<\\right>` denotes the mean of the enclosed set, :math:`q\in Q` are the quantiles from the quantile evaluation, :math:`R^2_q` the corresponding coefficient of determination and :math:`a_q` the slope of quantile :math:`q`. :math:`ks` denotes the fraction of stations that do not differ significantly from the observations according to the ks test. In other words, this quality estimate is the mean of the 1. coefficients of determination 2. the deviation from the ideal slope (:math:`a_q == 1`) and 3. the fraction of stations that do not differ significantly Hence, a value of 1 mean high quality, a value of 0 low quality""" name = 'quality' summary = 'Estimate simulation quality using ks and quantile evaluation' has_run = True @classmethod def _modify_parser(cls, parser): parser.add_argument( '-q', '--quantiles', metavar='q1[,q1[,q2[,...]]]', help="The quantiles to use for the quality analysis", type=lambda s: list(map(float, s.split(',')))) return parser, None, None @property def default_config(self): return default_quality_config()._replace( **super(SimulationQuality, self).default_config._asdict())
[docs] def setup_from_scratch(self): """Only sets an empty dataframe""" self.data = pd.DataFrame([])
[docs] def run(self, info): logger = self.logger logger.info('Calculating %s evaluation', self.name) quants_info = self.config['evaluation'].get('quants') ks_info = self.config['evaluation'].get('ks') missing = [] if quants_info is None: missing.append('quants task') if ks_info is None: missing.append('ks task') if missing: raise ValueError("%s requires that %s has been run before!" % ( self.name, ' and '.join(missing))) quantiles = self.task_config.quantiles if quantiles is None: quantiles = slice(None) possible_names = {'wind', 'prcp', 'tmin', 'tmax', 'mean_cloud', 'cloud'} for v, v_ks in ks_info.items(): if v not in quants_info or v not in possible_names: continue #: Dataframe with intercept, rsquared and slope on index and #: quantiles as columns df = pd.DataFrame(quants_info[v]).loc[:, quantiles] try: del df['All'] except KeyError: pass slope = float((1 - np.abs(1 - df.loc['slope'].values)).mean()) rsquared = float(df.loc['rsquared'].values.mean()) info[v] = OrderedDict([ ('rsquared', rsquared), ('slope', slope), ('ks', v_ks / 100.), ('quality', float(np.mean([rsquared, slope, v_ks / 100.])))]) if logger.isEnabledFor(logging.DEBUG): logger.debug('Simulation quality:') for name, val in info.items(): logger.debug(' %s: %6.3f' % (name, val['quality']))