# -*- coding: utf-8 -*-
"""Evaluation module of the gwgen module"""
from __future__ import division
import os.path as osp
import six
from collections import namedtuple
from psyplot.compat.pycompat import OrderedDict
from itertools import chain, starmap, repeat
import numpy as np
from scipy import stats
import pandas as pd
from gwgen.utils import docstrings
import gwgen.utils as utils
import logging
[docs]class Evaluator(utils.TaskBase):
"""Abstract base class for evaluation tasks
Evaluation tasks should incorporate a run method that is called by the
:meth:`gwgen.main.GWGENOrganizer.evaluate` method"""
_registry = []
@property
def task_data_dir(self):
"""The directory where to store data"""
return self.eval_dir
_PreparationConfigBase = namedtuple('_PreparationConfigBase',
['setup_raw', 'raw2db', 'raw2csv',
'reference', 'input_path'])
_PreparationConfigBase = utils.append_doc(
_PreparationConfigBase, docstrings.get_sections("""
Parameters
----------
setup_raw: { 'scratch' | 'file' | 'db' | None }
The method how to setup the raw data from GHCN and EECRA
``'scratch'``
To set up the task from the raw data
``'file'``
Set up the task from an existing file
``'db'``
Set up the task from a database
``None``
If the file name of this this task exists, use this one,
otherwise a database is provided, use this one, otherwise go
from scratch
raw2db: bool
If True, the raw data from GHCN and EECRA is stored in a postgres database
raw2csv: bool
If True, the raw data from GHCN and EECRA is stored in a csv file
reference: str
The path of the file where to store the reference data. If None and not
already set in the configuration, it will default to
``'evaluation/reference.csv'``
input_path: str
The path of the file where to store the model input. If None, and not
already set in the configuration, it will default to
``'inputdir/input.csv'`` where *inputdir* is the path to the input
directory (by default, *input* in the experiment directory)
""", '_PreparationConfigBase'))
PreparationConfig = utils.enhanced_config(_PreparationConfigBase,
'PreparationConfig')
@docstrings.dedent
[docs]def default_preparation_config(
setup_raw=None, raw2db=False, raw2csv=False, reference=None,
input_path=None, *args, **kwargs):
"""
The default configuration for :class:`EvaluationPreparation` instances.
See also the :attr:`EvaluationPreparation.default_config` attribute
Parameters
----------
%(PreparationConfig.parameters)s"""
return PreparationConfig(setup_raw, raw2db, raw2csv, reference, input_path,
*utils.default_config(*args, **kwargs))
[docs]class EvaluationPreparation(Evaluator):
"""Evaluation task to prepare the evaluation"""
name = 'prepare'
summary = 'Prepare the for experiment for evaluation'
http_inventory = (
'ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt')
_datafile = ['reference.csv', 'input.csv']
dbname = ['reference', 'input']
has_run = False
@property
def default_config(self):
ret = default_preparation_config()._replace(
**super(EvaluationPreparation, self).default_config._asdict())
return ret._replace(**dict(reference=self.reference_path,
input_path=self.input_path))
@property
def datafile(self):
"""The paths to reference and input file"""
return [self.reference_path, self.input_path]
@property
def ghcnd_inventory_file(self):
return osp.join(self.data_dir, 'ghcn', 'ghcnd-inventory.txt')
@property
def station_list(self):
fname = self.ghcnd_inventory_file
station_ids, lat, lon, variables, first, last = np.loadtxt(
fname, dtype='S11', unpack=True).astype(np.str_)
return pd.DataFrame({'id': station_ids, 'lat': lat.astype(float),
'lon': lon.astype(float), 'vname': variables,
'firstyr': first.astype(int),
'lastyr': last.astype(int)},
index=np.arange(len(station_ids)))
@property
def reference_data(self):
"""The reference :class:`~pandas.DataFrame`"""
return self.data[0]
@reference_data.setter
def reference_data(self, data):
self.data[0] = data
@property
def input_data(self):
"""The input :class:`~pandas.DataFrame`"""
return self.data[1]
@input_data.setter
def input_data(self, data):
self.data[1] = data
@classmethod
def _modify_parser(cls, parser):
parser.setup_args(default_preparation_config)
# disable plot_output, etc.
parser, setup_grp, run_grp = super(
EvaluationPreparation, cls)._modify_parser(parser)
parser.update_arg('setup_raw', short='fr', long='raw-from',
group=setup_grp)
parser.update_arg('input_path', short='i', long='input',
group=setup_grp)
parser.update_arg('reference', short='r', group=setup_grp)
parser.update_arg('raw2db', group=setup_grp)
parser.update_arg('raw2csv', group=setup_grp)
return parser, setup_grp, run_grp
[docs] def write2file(self, *args, **kwargs):
"""Reimplemented to sort the data according to the index"""
for data in self.data:
data.sort_index(inplace=True)
return super(EvaluationPreparation, self).write2file(*args, **kwargs)
[docs] def write2db(self, *args, **kwargs):
"""Reimplemented to sort the data according to the index"""
for data in self.data:
data.sort_index(inplace=True)
return super(EvaluationPreparation, self).write2db(*args, **kwargs)
[docs] def setup_from_db(self, *args, **kwargs):
kwargs['index_col'] = [['id', 'year', 'month', 'day'], # reference
['id', 'lon', 'lat', 'year', 'month']] # input
super(EvaluationPreparation, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_file(self, *args, **kwargs):
kwargs['index_col'] = [['id', 'year', 'month', 'day'], # reference
['id', 'lon', 'lat', 'year', 'month']] # input
super(EvaluationPreparation, self).setup_from_file(*args, **kwargs)
def __init__(self, *args, **kwargs):
self._manager = kwargs.pop('manager', None)
super(EvaluationPreparation, self).__init__(*args, **kwargs)
if self.task_config.to_csv: # update the configuration
self.reference_path = self.reference_path
self.input_path = self.input_path
[docs] def __reduce__(self):
"""Reimplemented to give provide also the manager"""
ret = list(super(EvaluationPreparation, self).__reduce__())
if len(ret) < 3:
ret.append({})
ret[2]['_manager'] = self._manager
return tuple(ret)
[docs] def init_from_scratch(self):
"""Initialize the setup via the parameterization classes"""
from gwgen.parameterization import (
Parameterizer, YearlyCompleteDailyCloud,
YearlyCompleteDailyGHCNData, YearlyCompleteMonthlyCloud,
YearlyCompleteMonthlyGHCNData)
classes = [
YearlyCompleteDailyCloud, YearlyCompleteDailyGHCNData,
YearlyCompleteMonthlyCloud, YearlyCompleteMonthlyGHCNData]
config = self.config.copy()
config['paramdir'] = self.eval_dir
kws = dict(config=config, project_config=self.project_config,
to_csv=self.task_config.raw2csv,
to_db=self.task_config.raw2db,
setup_from=self.task_config.setup_raw)
base_kws = {cls.name: kws for cls in classes}
self._manager = Parameterizer.get_manager(
config=self.global_config.copy())
self._manager.initialize_tasks(self.stations, task_kws=base_kws)
if not self.global_config.get('serial'):
import multiprocessing as mp
for task in self._manager.tasks:
utils._db_locks[task.dbname] = mp.Lock()
utils._file_locks[task.datafile] = mp.Lock()
# make sure the lat-lon information is available
self.download_src()
[docs] def download_src(self, force=False):
fname = self.ghcnd_inventory_file
if force or not osp.exists(fname):
self.logger.info("Downloading station file from %s to %s",
self.http_inventory, fname)
utils.download_file(self.http_inventory, fname)
[docs] def setup_from_scratch(self):
from gwgen.parameterization import (
YearlyCompleteDailyCloud, YearlyCompleteDailyGHCNData,
YearlyCompleteMonthlyCloud, YearlyCompleteMonthlyGHCNData)
def get(name):
return next(t for t in tasks if t.name == name)
# force serial setup because we might already be in a parallel setup
self._manager.config['serial'] = True
self._manager.setup(self.stations, to_return='all')
tasks = self._manager.tasks
# save in the right order for the FORTRAN code
order = ['tmin', 'tmax', 'mean_cloud', 'wind', 'prcp', 'wet_day']
# daily reference
cday = get(YearlyCompleteDailyGHCNData.name).data
ccday = get(YearlyCompleteDailyCloud.name).data
try:
reference = cday.merge(
ccday[['mean_cloud', 'wind']], left_index=True,
right_index=True, how='left')
except TypeError: # indices to not match
reference = cday.ix[1:0] # create empty data frame
reference['mean_cloud'] = np.array([],
dtype=ccday.mean_cloud.dtype)
reference['wind'] = np.array([],
dtype=ccday.wind.dtype)
reference['wet_day'] = (reference.prcp > 0).astype(int)
self.reference_data = reference[order].sort_index()
# monthly input
cmonth = get(YearlyCompleteMonthlyGHCNData.name).data
ccmonth = get(YearlyCompleteMonthlyCloud.name).data
try:
exp_input = cmonth.merge(
ccmonth[['mean_cloud', 'wind']], left_index=True,
right_index=True, how='left')
# set cloud and wind to 0 where we have no reference
exp_input.ix[exp_input.mean_cloud.isnull(),
['mean_cloud', 'wind']] = 0
except TypeError: # indices do not match
exp_input = cmonth.ix[1:0] # create empty data frame
exp_input['mean_cloud'] = np.array([],
dtype=ccmonth.mean_cloud.dtype)
exp_input['wind'] = np.array([],
dtype=ccmonth.wind.dtype)
inventory = self.station_list
exp_input = exp_input.reset_index().merge(
inventory[inventory.vname == 'PRCP'][['id', 'lon', 'lat']],
on='id')
exp_input.set_index(['id', 'lon', 'lat', 'year', 'month'],
inplace=True)
self.input_data = exp_input[order].sort_index()
[docs]class OutputTask(Evaluator):
"""Task to provide all the data for input and output"""
# the last will be ignored!
_datafile = 'output.csv'
dbname = 'output'
name = 'output'
summary = 'Load the output of the model'
has_run = False
@property
def datafile(self):
return [self.output_path]
[docs] def write2file(self, *args, **kwargs):
"""Not implemented since the output file is generated by the model!"""
self.logger.warn(
"Writing to file of %s task is disabled because this is done "
"by the model!", self.name)
[docs] def setup_from_db(self, *args, **kwargs):
kwargs['index_col'] = ['id', 'year', 'month', 'day']
super(OutputTask, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_file(self, *args, **kwargs):
kwargs['index_col'] = ['id', 'year', 'month', 'day']
super(OutputTask, self).setup_from_file(*args, **kwargs)
[docs] def setup_from_scratch(self):
raise ValueError(
"Cannot set up the %s task from scratch because it requires the "
"output of the model!")
_KSConfig = namedtuple('_KSConfig', ['no_rounding', 'names', 'transform_wind'])
_KSConfig = utils.append_doc(_KSConfig, docstrings.get_sections("""
Parameters
----------
no_rounding: bool
Do not round the simulation to the infered precision of the
reference. The inferred precision is the minimum difference between
two values with in the entire data
names: list of str
The list of variables use for calculation. If None, all variables will be
used
transform_wind: bool
If True, the square root of the wind is evaluated (as this is also
simulated in the weather generator)""", '_KSConfig'))
KSConfig = utils.enhanced_config(_KSConfig, 'KSConfig')
@docstrings.dedent
[docs]def default_ks_config(
no_rounding=False, names=None, transform_wind=False, *args, **kwargs):
"""
The default configuration for :class:`KSEvaluation` instances.
See also the :attr:`KSEvaluation.default_config` attribute
Parameters
----------
%(KSConfig.parameters)s"""
return KSConfig(no_rounding, names, transform_wind,
*utils.default_config(*args, **kwargs))
_QuantileConfig = namedtuple(
'_QuantileConfig',
['quantiles'] + list(_KSConfig._fields))
_QuantileConfig = utils.append_doc(_QuantileConfig, docstrings.get_sections(
docstrings.dedents("""
Parameters
----------
%(_KSConfig.parameters)s
quantiles: list of floats
The quantiles to use for calculating the percentiles
"""), '_QuantileConfig'))
QuantileConfig = utils.enhanced_config(_QuantileConfig, 'QuantileConfig')
@docstrings.dedent
[docs]def default_quantile_config(
quantiles=[1, 5, 10, 25, 50, 75, 90, 95, 99, 100], *args, **kwargs):
"""
The default configuration for :class:`QuantileEvaluation` instances.
See also the :attr:`QuantileEvaluation.default_config` attribute
Parameters
----------
%(QuantileConfig.parameters)s"""
return QuantileConfig(quantiles, *default_ks_config(*args, **kwargs))
[docs]class QuantileEvaluation(Evaluator):
"""Evaluator to evaluate specific quantiles"""
name = 'quants'
summary = 'Compare the quantiles of simulation and observation'
names = OrderedDict([
('prcp', {'long_name': 'Precipitation',
'units': 'mm'}),
('tmin', {'long_name': 'Min. Temperature',
'units': 'degC'}),
('tmax', {'long_name': 'Max. Temperature',
'units': 'degC'}),
('mean_cloud', {'long_name': 'Cloud fraction',
'units': '-'}),
('wind', {'long_name': 'Wind Speed',
'units': 'm/s'})
])
@property
def all_variables(self):
return [[v + '_ref', v + '_sim'] for v in self.names]
setup_requires = ['prepare', 'output']
has_run = True
_datafile = 'quantile_evaluation.csv'
dbname = 'quantile_evaluation'
#: default formatoptions for the
#: :class:`psyplot.plotter.linreg.DensityRegPlotter` plotter
fmt = kwargs = dict(
legend={'loc': 'upper left'},
cmap='w_Reds',
title='%(pctl)sth percentile',
xlabel='%(type)s {desc}',
ylabel='%(type)s {desc}',
xrange=(['minmax', 1], ['minmax', 99]),
yrange=(['minmax', 1], ['minmax', 99]),
legendlabels=['$R^2$ = %(rsquared)s'],
bounds=['minmax', 11, 0, 99],
cbar='',
bins=10,
ideal=[0, 1],
id_color='r',
sym_lims='max',
)
@property
def default_config(self):
return default_quantile_config()._replace(
**super(QuantileEvaluation, self).default_config._asdict())
@property
def ds(self):
"""The dataset of the quantiles"""
import xarray as xr
data = self.data.reset_index()
ds = xr.Dataset.from_dataframe(
data.set_index('pctl', append=True).swaplevel())
full = xr.Dataset.from_dataframe(data).drop(list(chain(
self.data.index.names, ['pctl'])))
idx_name = full[next(v for v in self.names) + '_sim'].dims[-1]
full.rename({idx_name: 'full_index'}, inplace=True)
for vref, vsim in self.all_variables:
full.rename({vref: 'all_' + vref, vsim: 'all_' + vsim},
inplace=True)
ds.merge(full, inplace=True)
for orig, attrs, (vref, vsim) in zip(
self.names, self.names.values(), self.all_variables):
for prefix in ['', 'all_']:
ds[prefix + vsim].attrs.update(attrs)
ds[prefix + vref].attrs.update(attrs)
ds[prefix + vsim].attrs['standard_name'] = orig
ds[prefix + vref].attrs['standard_name'] = orig
ds[prefix + vref].attrs['type'] = 'observed'
ds[prefix + vsim].attrs['type'] = 'simulated'
ds['all_' + vsim].attrs['pctl'] = 'All'
ds['all_' + vsim].attrs['pctl'] = 'All'
ds.pctl.attrs['long_name'] = 'Percentile'
return ds
def __init__(self, *args, **kwargs):
super(QuantileEvaluation, self).__init__(*args, **kwargs)
names = self.task_config.names
if names is not None:
self.names = OrderedDict(
t for t in self.names.items() if t[0] in names)
[docs] def setup_from_file(self, *args, **kwargs):
kwargs['index_col'] = ['id', 'year']
return super(QuantileEvaluation, self).setup_from_file(*args, **kwargs)
[docs] def setup_from_db(self, *args, **kwargs):
kwargs['index_col'] = ['id', 'year']
return super(QuantileEvaluation, self).setup_from_db(*args, **kwargs)
[docs] def setup_from_scratch(self):
df_ref = self.prepare.reference_data
dfi = self.prepare.input_data.reset_index(['lon', 'lat'])
# create simulation dataframe
df_sim = self.output.data
if len(df_ref) == 0 or len(df_sim) == 0:
self.logger.debug(
'Skipping %s because reference data contains no information!',
self.name)
return
names = self.names
# load observed precision
if self.task_config.no_rounding:
for name in names:
df_sim[name].values[:] = self.round_to_ref_prec(
df_ref[name].values, df_sim[name].values)
# merge reference and simulation into one single dataframe
df = df_ref.merge(df_sim, left_index=True, right_index=True,
suffixes=['_ref', '_sim'])
if {'mean_cloud', 'wind'}.intersection(names):
df.reset_index('day', inplace=True)
df = df.merge(dfi[['mean_cloud', 'wind']], left_index=True,
right_index=True)
# mask out non-complete months for cloud validation and months with
# 0 or 1 cloud fraction
if 'mean_cloud' in names:
df.ix[df['mean_cloud_ref'].isnull().values |
(df['mean_cloud'] == 0.0) |
(df['mean_cloud'] == 1.0),
['mean_cloud_sim', 'mean_cloud_ref']] = np.nan
# mask out non-complete wind for wind validation and months with
# a mean wind speed of 0
if 'wind' in names:
df.ix[df['wind_ref'].isnull().values | (df['wind'] == 0.0),
['wind_sim', 'wind_ref']] = np.nan
df.drop(['mean_cloud', 'wind'], 1, inplace=True)
df.set_index('day', append=True, inplace=True)
# transform wind
if self.task_config.transform_wind and 'wind' in names:
df['wind_ref'] **= 0.5
df['wind_sim'] **= 0.5
# calculate the percentiles for each station and month
g = df.sort_index().groupby(level=['id', 'year'])
self.logger.debug('Done with basic setup')
data = g.apply(self.calc)
if len(data):
data.index = data.index.droplevel(2)
self.data = data
@classmethod
def _modify_parser(cls, parser):
parser.setup_args(default_ks_config)
parser.setup_args(default_quantile_config)
parser, setup_grp, run_grp = super(
QuantileEvaluation, cls)._modify_parser(parser)
parser.update_arg(
'quantiles', short='q', group=run_grp, type=utils.str_ranges,
metavar='f1[,f21[-f22[-f23]]]', help=docstrings.dedents("""
The quantiles to use for calculating the percentiles.
%(str_ranges.s_help)s."""))
parser.pop_key('quantiles', 'nargs', None)
parser.update_arg('no_rounding', short='nr', group=run_grp)
parser.update_arg('names', short='n', group=setup_grp,
nargs='+', metavar='variable',
choices=list(cls.names))
parser.update_arg('transform_wind', short='tw', group=setup_grp)
return parser, setup_grp, run_grp
[docs] def create_project(self, ds):
import psyplot.project as psy
import seaborn as sns
sns.set_style('white')
for name, (vref, vsim) in zip(self.names, self.all_variables):
self.logger.debug('Creating plots of %s', vsim)
kwargs = dict(precision=0.1) if vref.startswith('prcp') else {}
psy.plot.densityreg(ds, name='all_' + vsim, coord='all_' + vref,
fmt=self.fmt, title='All percentiles',
arr_names=['%s_all' % name],
**kwargs)
arr_names = ['%s_%1.2f' % (name, p) for p in ds.pctl.values]
psy.plot.densityreg(ds, name=vsim, coord=vref, fmt=self.fmt,
arr_names=arr_names, pctl=range(ds.pctl.size),
**kwargs)
return psy.gcp(True)[:]
[docs] def make_run_config(self, sp, info):
for orig in self.names:
info[orig] = d = OrderedDict()
for plotter in sp(standard_name=orig).plotters:
d[plotter.data.pctl if plotter.data.name.startswith('all') else
int(plotter.data.pctl.values)] = pctl_d = OrderedDict()
for key in ['rsquared', 'slope', 'intercept']:
val = plotter.plot_data[1].attrs.get(key)
if val is not None:
pctl_d[key] = float(val)
return info
[docs] def calc(self, group):
def calc_percentiles(vname):
arr = group[vname].values
arr = arr[~np.isnan(arr)]
if vname.startswith('prcp'):
arr = arr[arr > 0]
if len(arr) == 0:
return np.array([np.nan] * len(quantiles))
else:
return np.percentile(arr, quantiles)
quantiles = self.task_config.quantiles
df = pd.DataFrame.from_dict(dict(zip(
chain(*self.all_variables), map(calc_percentiles,
chain(*self.all_variables)))))
df['pctl'] = quantiles
df.set_index('pctl')
return df
@staticmethod
[docs] def round_to_ref_prec(ref, sim, func=np.ceil):
"""Round one array to the precision of another
Parameters
----------
ref: np.ndarray
The reference array to get the precision from
sim: np.ndarray
The simulated array to round
func: function
The rounding function to use
Returns
-------
np.ndarray
Rounded `sim`"""
ref_sorted = np.unique(ref)
if len(ref_sorted) < 2:
return sim
precision = (ref_sorted[1:] - ref_sorted[:-1]).min()
return func((sim / precision) * precision)
[docs]class KSEvaluation(QuantileEvaluation):
"""Evaluation using a Kolmogorov-Smirnoff test"""
name = 'ks'
summary = 'Perform a kolmogorov smirnoff test'
requires = ['prepare', 'output']
_datafile = 'kolmogorov_evaluation.csv'
dbname = 'kolmogorov_evaluation'
@property
def default_config(self):
return default_ks_config()._replace(
**super(QuantileEvaluation, self).default_config._asdict())
@staticmethod
[docs] def calc(group):
def calc(v1, v2, name):
if len(v1) <= 10 or len(v2) <= 10:
return {
name + '_stat': [np.nan],
name + '_p': [np.nan],
name: [None],
'n' + name + '_sim': [np.nan],
'n' + name + '_ref': [np.nan]}
statistic, p_value = stats.ks_2samp(v1, v2)
n1 = len(v1)
n2 = len(v2)
n = np.sqrt((n1 + n2) / (n1 * n2))
# if statistic > 1.36 * n, we reject the null hypothesis
# (alpha = 0.05)
return {
name + '_stat': [statistic],
name + '_p': [p_value],
name: [statistic > 1.36 * n],
'n' + name + '_sim': [n1],
'n' + name + '_ref': [n2]}
prcp_sim = group.prcp_sim.values[group.prcp_sim.values > 0]
prcp_ref = group.prcp_ref.values[group.prcp_ref.values > 0]
tmin_sim = group.tmin_sim.values[
(group.tmin_sim.values < 100) & (group.tmin_sim.values > -100) &
(~np.isnan(group.tmin_sim.values))]
tmin_ref = group.tmin_ref.values[
(group.tmin_ref.values < 100) & (group.tmin_ref.values > -100) &
(~np.isnan(group.tmin_ref.values))]
tmax_sim = group.tmax_sim.values[
(group.tmax_sim.values < 100) & (group.tmax_sim.values > -100) &
(~np.isnan(group.tmax_sim.values))]
tmax_ref = group.tmax_ref.values[
(group.tmax_ref.values < 100) & (group.tmax_ref.values > -100) &
(~np.isnan(group.tmax_ref.values))]
cloud_sl = group.mean_cloud_ref.notnull().values
cloud_sim = group.mean_cloud_sim.values[cloud_sl]
cloud_ref = group.mean_cloud_ref.values[cloud_sl]
wind_sl = group.wind_ref.notnull().values
wind_sim = group.wind_sim.values[wind_sl]
wind_ref = group.wind_ref.values[wind_sl]
return pd.DataFrame.from_dict(dict(chain(*map(six.iteritems, starmap(
calc, [(prcp_sim, prcp_ref, 'prcp'),
(tmin_sim, tmin_ref, 'tmin'),
(tmax_sim, tmax_ref, 'tmax'),
(cloud_sim, cloud_ref, 'mean_cloud'),
(wind_sim, wind_ref, 'wind')])))))
[docs] def significance_fractions(self, series):
"The percentage of stations with no significant difference"
return 100. - (len(series.ix[series.notnull() & (series)]) /
series.count())*100.
[docs] def run(self, info):
"""Run the evaluation
Parameters
----------
info: dict
The configuration dictionary"""
logger = self.logger
logger.info('Calculating %s evaluation', self.name)
df = self.data
names = list(self.names)
for name in names:
info[name] = float(self.significance_fractions(df[name]))
if logger.isEnabledFor(logging.DEBUG):
logger.debug('Done. Stations without significant difference:')
for name, val in info.items():
logger.debug(' %s: %6.3f %%' % (name, val))
try:
import cartopy.crs as ccrs
except ImportError:
self.logger.warn(
"Cartopy is not installed, skipping plot of %s task",
self.name)
else:
self.plot_map()
info['plot_file'] = self.pdf_file
[docs] def plot_map(self):
from matplotlib.backends.backend_pdf import PdfPages
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
names = list(self.names)
df = self.data[names]
for n in names:
df[n] = df.pop(n).astype(float)
g = df.groupby(level='id')
df_fract = g.agg(dict(zip(
names, repeat(self.significance_fractions))))
df_fract = df_fract.merge(
g.agg(dict(zip(names, repeat('sum')))), left_index=True,
right_index=True, suffixes=['', '_sum'])
df_lola = EvaluationPreparation.from_task(self).station_list
df_lola = df_lola.ix[~df_lola.duplicated('id').values]
df_lola.set_index('id', inplace=True)
df_plot = df_lola.merge(df_fract, how='right', left_index=True,
right_index=True)
pdf = PdfPages(self.pdf_file)
for name in names:
fig = plt.figure()
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_title(self.names[name]['long_name'])
ax.coastlines()
ax.background_patch.set_facecolor('0.95')
ax.set_global()
df_plot.sort_values(name, ascending=False).plot.scatter(
'lon', 'lat', c=name, ax=ax, colorbar=False,
transform=ccrs.PlateCarree(), cmap='Reds_r', s=5,
edgecolor='none')
cbar = fig.colorbar(ax.collections[-1], orientation='horizontal')
cbar.set_label(
'Percentage of years not differing significantly[%]')
pdf.savefig(fig, bbox_inches='tight')
fig = plt.figure()
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_title(self.names[name]['long_name'])
ax.coastlines()
ax.background_patch.set_facecolor('0.95')
ax.set_global()
df_plot.sort_values(name, ascending=False).plot.scatter(
'lon', 'lat', c=name + '_sum', ax=ax, colorbar=False,
transform=ccrs.PlateCarree(), cmap='Reds', s=5,
edgecolor='none')
cbar = fig.colorbar(ax.collections[-1], orientation='horizontal')
cbar.set_label('Significantly differing years')
pdf.savefig(fig, bbox_inches='tight')
pdf.close()
if self.task_config.close:
plt.close('all')
@classmethod
def _modify_parser(cls, parser):
parser.setup_args(default_ks_config)
parser, setup_grp, run_grp = super(
QuantileEvaluation, cls)._modify_parser(parser)
parser.update_arg('no_rounding', short='nr', group=run_grp)
parser.update_arg('names', short='n', group=setup_grp,
nargs='+', metavar='variable',
choices=list(cls.names))
parser.update_arg('transform_wind', short='tw', group=setup_grp)
parser.pop_arg('nc_output', None)
parser.pop_arg('project_output', None)
parser.pop_arg('new_project', None)
parser.pop_arg('project', None)
return parser, setup_grp, run_grp
_QualityConfig = namedtuple('_QualityConfig', ['quantiles'])
_QualityConfig = utils.append_doc(_QualityConfig, docstrings.get_sections("""
Parameters
----------
quantiles: list of floats
The quantiles to use for the quality analysis""", '_QualityConfig'))
QualityConfig = utils.enhanced_config(_QualityConfig, 'QualityConfig')
@docstrings.dedent
[docs]def default_quality_config(
quantiles=None, *args, **kwargs):
"""
The default configuration for :class:`SimulationQuality` instances.
See also the :attr:`SimulationQuality.default_config` attribute
Parameters
----------
%(QualityConfig.parameters)s"""
return QualityConfig(quantiles, *utils.default_config(*args, **kwargs))
[docs]class SimulationQuality(Evaluator):
"""Evaluator to provide one value characterizing the quality of the
experiment
The applied metric is the mean of
.. math::
m = \\left<\{\\left<\{R^2_q\}_{q\in Q}\\right>,
\\left<\{1 - |1 - a_q|\}_{q\in Q}\\right>,
\{ks\}\}\\right>
where :math:`\\left<\\right>` denotes the mean of the enclosed set,
:math:`q\in Q` are the quantiles from the quantile evaluation,
:math:`R^2_q` the corresponding coefficient of determination and
:math:`a_q` the slope of quantile :math:`q`. :math:`ks` denotes the
fraction of stations that do not differ significantly from the observations
according to the ks test.
In other words, this quality estimate is the mean of the
1. coefficients of determination
2. the deviation from the ideal slope (:math:`a_q == 1`) and
3. the fraction of stations that do not differ significantly
Hence, a value of 1 mean high quality, a value of 0 low quality"""
name = 'quality'
summary = 'Estimate simulation quality using ks and quantile evaluation'
has_run = True
@classmethod
def _modify_parser(cls, parser):
parser.add_argument(
'-q', '--quantiles', metavar='q1[,q1[,q2[,...]]]',
help="The quantiles to use for the quality analysis",
type=lambda s: list(map(float, s.split(','))))
return parser, None, None
@property
def default_config(self):
return default_quality_config()._replace(
**super(SimulationQuality, self).default_config._asdict())
[docs] def setup_from_scratch(self):
"""Only sets an empty dataframe"""
self.data = pd.DataFrame([])
[docs] def run(self, info):
logger = self.logger
logger.info('Calculating %s evaluation', self.name)
quants_info = self.config['evaluation'].get('quants')
ks_info = self.config['evaluation'].get('ks')
missing = []
if quants_info is None:
missing.append('quants task')
if ks_info is None:
missing.append('ks task')
if missing:
raise ValueError("%s requires that %s has been run before!" % (
self.name, ' and '.join(missing)))
quantiles = self.task_config.quantiles
if quantiles is None:
quantiles = slice(None)
possible_names = {'wind', 'prcp', 'tmin', 'tmax', 'mean_cloud',
'cloud'}
for v, v_ks in ks_info.items():
if v not in quants_info or v not in possible_names:
continue
#: Dataframe with intercept, rsquared and slope on index and
#: quantiles as columns
df = pd.DataFrame(quants_info[v]).loc[:, quantiles]
try:
del df['All']
except KeyError:
pass
slope = float((1 - np.abs(1 - df.loc['slope'].values)).mean())
rsquared = float(df.loc['rsquared'].values.mean())
info[v] = OrderedDict([
('rsquared', rsquared), ('slope', slope), ('ks', v_ks / 100.),
('quality', float(np.mean([rsquared, slope, v_ks / 100.])))])
if logger.isEnabledFor(logging.DEBUG):
logger.debug('Simulation quality:')
for name, val in info.items():
logger.debug(' %s: %6.3f' % (name, val['quality']))