Source code for abcpy.output

import pickle
import warnings

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde

from abcpy.utils import wass_dist


[docs]class Journal:
    """The journal holds information created by the run of inference schemes.

    It can be configured to even hold intermediate.

    Attributes
    ----------
    parameters : numpy.array
        a nxpxt matrix
    weights : numpy.array
        a nxt matrix
    opt_value : numpy.array
        nxp matrix containing for each parameter the evaluated objective function for every time step
    configuration : Python dictionary
        dictionary containing the schemes configuration parameters

    """

[docs]    def __init__(self, type):
        """
        Initializes a new output journal of given type.

        Parameters
        ----------
        type: int (identifying type)
            type=0 only logs final parametersa and weight (production use);
            type=1 logs all generated information (reproducibily use).
        """

        self.accepted_parameters = []
        self.names_and_parameters = []
        self.weights = []
        self.ESS = []
        self.distances = []
        self.opt_values = []
        self.configuration = {}

        if type not in [0, 1]:
            raise ValueError("Parameter type has not valid value.")
        else:
            self._type = type

        self.number_of_simulations = []

[docs]    @classmethod
    def fromFile(cls, filename):
        """This method reads a saved journal from disk an returns it as an object.

        Notes
        -----
        To store a journal use Journal.save(filename).

        Parameters
        ----------
        filename: string
            The string representing the location of a file
            
        Returns
        -------
        abcpy.output.Journal
            The journal object serialized in <filename>


        Example
        --------
        >>> jnl = Journal.fromFile('example_output.jnl')

        """

        with open(filename, 'rb') as input:
            journal = pickle.load(input)
        return journal

[docs]    def add_user_parameters(self, names_and_params):
        """
        Saves the provided parameters and names of the probabilistic models corresponding to them. If type==0, old
        parameters get overwritten.

        Parameters
        ----------
        names_and_params: list
            Each entry is a tuple, where the first entry is the name of the probabilistic model, and the second entry is
            the parameters associated with this model.
        """
        if self._type == 0:
            self.names_and_parameters = [dict(names_and_params)]
        else:
            self.names_and_parameters.append(dict(names_and_params))

[docs]    def add_accepted_parameters(self, accepted_parameters):
        """
        FIX THIS!
        Saves provided accepted parameters by appending them to the journal. If type==0, old accepted parameters get
        overwritten.

        Parameters
        ----------
        accepted_parameters: list
        """

        if self._type == 0:
            self.accepted_parameters = [accepted_parameters]

        if self._type == 1:
            self.accepted_parameters.append(accepted_parameters)

[docs]    def add_weights(self, weights):
        """
        Saves provided weights by appending them to the journal. If type==0, old weights get overwritten.

        Parameters
        ----------
        weights: numpy.array
            vector containing n weigths
        """

        if self._type == 0:
            self.weights = [weights]

        if self._type == 1:
            self.weights.append(weights)

[docs]    def add_distances(self, distances):
        """
        Saves provided distances by appending them to the journal. If type==0, old weights get overwritten.

        Parameters
        ----------
        distances: numpy.array
            vector containing n distances
        """

        if self._type == 0:
            self.distances = [distances]

        if self._type == 1:
            self.distances.append(distances)

[docs]    def add_opt_values(self, opt_values):
        """
        Saves provided values of the evaluation of the schemes objective function. If type==0, old values get
        overwritten

        Parameters
        ----------
        opt_value: numpy.array
            vector containing n evaluations of the schemes objective function
        """

        if self._type == 0:
            self.opt_values = [opt_values]

        if self._type == 1:
            self.opt_values.append(opt_values)

[docs]    def add_ESS_estimate(self, weights):
        """
        Computes and saves Effective Sample Size (ESS) estimate starting from provided weights; ESS is estimated as sum
        the inverse of sum of squared normalized weights. The provided weights are normalized before computing ESS.
        If type==0, old ESS estimate gets overwritten.

        Parameters
        ----------
        weights: numpy.array
            vector containing n weigths
        """

        # normalize weights:
        normalized_weights = weights / np.sum(weights)

        ESS = 1 / sum(sum(pow(normalized_weights, 2)))

        if self._type == 0:
            self.ESS = [ESS]

        if self._type == 1:
            self.ESS.append(ESS)

[docs]    def save(self, filename):
        """
        Stores the journal to disk.

        Parameters
        ----------
        filename: string
            the location of the file to store the current object to.
        """

        with open(filename, 'wb') as output:
            pickle.dump(self, output, -1)

[docs]    def get_parameters(self, iteration=None):
        """
        Returns the parameters from a sampling scheme.

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return parameters

        Returns
        -------
        names_and_parameters: dictionary
            Samples from the specified iteration (last, if not specified) returned as a disctionary with names of the
            random variables
        """

        if iteration is None:
            endp = len(self.names_and_parameters) - 1
            params = self.names_and_parameters[endp]
            return params
        else:
            return self.names_and_parameters[iteration]

[docs]    def get_accepted_parameters(self, iteration=None):
        """
        Returns the accepted parameters from a sampling scheme.

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return parameters

        Returns
        -------
        accepted_parameters: dictionary
            Samples from the specified iteration (last, if not specified) returned as a disctionary with names of the
            random variables
        """

        if iteration is None:
            return self.accepted_parameters[-1]

        else:
            return self.accepted_parameters[iteration]

[docs]    def get_weights(self, iteration=None):
        """
        Returns the weights from a sampling scheme.

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return weights
        """

        if iteration is None:
            end = len(self.weights) - 1
            return self.weights[end]
        else:
            return self.weights[iteration]

[docs]    def get_distances(self, iteration=None):
        """
        Returns the distances from a sampling scheme.

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return distances
        """

        if iteration is None:
            end = len(self.distances) - 1
            return self.distances[end]
        else:
            return self.distances[iteration]

[docs]    def get_ESS_estimates(self, iteration=None):
        """
        Returns the estimate of Effective Sample Size (ESS) from a sampling scheme.

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return ESS
        """
        if iteration is None:
            iteration = -1

        return self.ESS[iteration]

[docs]    def posterior_mean(self, iteration=None):
        """
        Computes posterior mean from the samples drawn from posterior distribution

        For intermediate results, pass the iteration.

        Parameters
        ----------
        iteration: int
            specify the iteration for which to return posterior mean

        Returns
        -------
        posterior mean: dictionary
            Posterior mean from the specified iteration (last, if not specified) returned as a disctionary with names of
            the random variables
        """

        if iteration is None:
            endp = len(self.names_and_parameters) - 1
            params = self.names_and_parameters[endp]
            weights = self.weights[endp]
        else:
            params = self.names_and_parameters[iteration]
            weights = self.weights[iteration]

        return_value = []
        for keyind in params.keys():
            return_value.append((keyind,
                                 np.average(np.array(params[keyind]).squeeze(), weights=weights.reshape(len(weights), ),
                                            axis=0)))

        return dict(return_value)

[docs]    def posterior_cov(self, iteration=None):
        """
        Computes posterior covariance from the samples drawn from posterior distribution

        Returns
        -------
        np.ndarray
            posterior covariance
        dic
            order of the variables in the covariance matrix
        """

        if iteration is None:
            endp = len(self.names_and_parameters) - 1
            params = self.names_and_parameters[endp]
            weights = self.weights[endp]
        else:
            params = self.names_and_parameters[iteration]
            weights = self.weights[iteration]

        joined_params = []
        for keyind in params.keys():
            joined_params.append(np.array(params[keyind]).squeeze(axis=1))

        return np.cov(np.transpose(np.hstack(joined_params)), aweights=weights.reshape(len(weights), )), params.keys()

[docs]    def posterior_histogram(self, iteration=None, n_bins=10):
        """
        Computes a weighted histogram of multivariate posterior samples
        and returns histogram H and a list of p arrays describing the bin
        edges for each dimension.
        
        Returns
        -------
        python list 
            containing two elements (H = np.ndarray, edges = list of p arrays)
        """
        if iteration is None:
            endp = len(self.names_and_parameters) - 1
            params = self.names_and_parameters[endp]
            weights = self.weights[endp]
        else:
            params = self.names_and_parameters[iteration]
            weights = self.weights[iteration]

        joined_params = []
        for keyind in params.keys():
            joined_params.append(np.array(params[keyind]).squeeze(axis=1))

        H, edges = np.histogramdd(np.hstack(joined_params), bins=n_bins, weights=weights.reshape(len(weights), ))
        return [H, edges]

    # TODO this does not work for multidimensional parameters
[docs]    def plot_posterior_distr(self, parameters_to_show=None, ranges_parameters=None, iteration=None, show_samples=None,
                             single_marginals_only=False, double_marginals_only=False, write_posterior_mean=True,
                             show_posterior_mean=True, true_parameter_values=None, contour_levels=14, figsize=None,
                             show_density_values=True, bw_method=None, path_to_save=None):
        """
        Produces a visualization of the posterior distribution of the parameters of the model.

        A Gaussian kernel density estimate (KDE) is used to approximate the density starting from the sampled
        parameters. Specifically, it produces a scatterplot matrix, where the diagonal contains single parameter
        marginals, while the off diagonal elements contain the contourplot for the paired marginals for each possible
        pair of parameters.

        This visualization is not satisfactory for parameters that take on discrete values, specially in the case where
        the number of values it can assume are small, as it obtains the posterior by KDE in this case as well. We need
        to improve on that, considering histograms.

        Parameters
        ----------
        parameters_to_show : list, optional
            a list of the parameters for which you want to plot the posterior distribution. For each parameter, you need
            to provide the name string as it was defined in the model. For instance,
            `jrnl.plot_posterior_distr(parameters_to_show=["mu"])` will only plot the posterior distribution for the
            parameter named "mu" in the list of parameters.
            If `None`, then all parameters will be displayed.
        ranges_parameters : Python dictionary, optional
            a dictionary in which you can optionally provide the plotting range for the parameters that you chose to
            display. You can use this even if `parameters_to_show=None`. The dictionary key is the name of parameter,
            and the range needs to be an array-like of the form [lower_limit, upper_limit]. For instance:
            {"theta" : [0,2]} specifies that you want to plot the posterior distribution for the parameter "theta" in
            the range [0,2].
        iteration : int, optional
            specify the iteration for which to plot the posterior distribution, in the case of a sequential algorithm.
            If `None`, then the last iteration will be used.
        show_samples : boolean, optional
            specifies if you want to show the posterior samples overimposed to the contourplots of the posterior
            distribution. If `None`, the default behaviour is the following: if the posterior samples are associated
            with importance weights, then the samples are not showed (in fact, the KDE for the posterior distribution
            takes into account the weights, and showing the samples may be misleading). Otherwise, if the posterior
            samples are not associated with weights, they are displayed by default.
        single_marginals_only : boolean, optional
            if `True`, the method does not show the paired marginals but only the single parameter marginals;
            otherwise, it shows the paired marginals as well. Default to `False`.
        double_marginals_only : boolean, optional
            if `True`, the method shows the contour plot for the marginal posterior for each possible pair of parameters
            in the parameters that have to be shown (all parameters of the model if `parameters_to_show` is None).
            Default to `False`.
        write_posterior_mean : boolean, optional
            Whether to write or not the posterior mean on the single marginal plots. Default to `True`.
        show_posterior_mean: boolean, optional
            Whether to display a line corresponding to the posterior mean value in the plot. Default to `True`.
        true_parameter_values: array-like, optional
            you can provide here the true values of the parameters, if known, and that will be displayed in the
            posterior plot. It has to be an array-like of the same length of `parameters_to_show` (if that is provided),
            otherwise of length equal to the number of parameters in the model, and with entries corresponding to the
            true value of that parameter (in case `parameters_to_show` is not provided, the order of the parameters is
            the same order the model `forward_simulate` step takes.
        contour_levels: integer, optional
            The number of levels to be used in the contour plots. Default to 14.
        figsize: float, optional
            Denotes the size (in inches) of the smaller dimension of the plot; the other dimension is automatically
            determined. If None, then figsize is chosen automatically. Default to `None`.
        show_density_values: boolean, optional
            If `True`, the method displays the value of the density at each contour level in the contour plot. Default
            to `True`.
        bw_method: str, scalar or callable, optional
            The parameter of the `scipy.stats.gaussian_kde` defining the method used to calculate the bandwith in the
            Gaussian kernel density estimator. Please refer to the documentation therein for details. Default to `None`.
        path_to_save : string, optional
            if provided, save the figure in png format in the specified path.

        Returns
        -------
        tuple
            a tuple containing the matplotlib "fig, axes" objects defining the plot. Can be useful for further
            modifications.
        """

        # if you do not pass any name, then we plot all parameters.
        # you can get the list of parameters from the journal file as:
        if parameters_to_show is None:
            parameters_to_show = list(self.names_and_parameters[0].keys())
        else:
            for param_name in parameters_to_show:
                if param_name not in self.names_and_parameters[0].keys():
                    raise KeyError("Parameter '{}' is not a parameter of the model.".format(param_name))

        if single_marginals_only and double_marginals_only:
            raise RuntimeError("You cannot ask to produce only plots for single marginals and double marginals only at "
                               "the same time. Either or both of `single_marginal_only` or `double_marginal_only` have "
                               "to be False.")

        if len(parameters_to_show) == 1 and double_marginals_only:
            raise RuntimeError("It is not possible to plot a bivariate marginal if only one parameter is in the "
                               "`parameters_to_show` list")

        if true_parameter_values is not None:
            if len(true_parameter_values) != len(parameters_to_show):
                raise RuntimeError("You need to provide values for all the parameters to be shown.")

        meanpost = np.array([self.posterior_mean(iteration=iteration)[x] for x in parameters_to_show])

        post_samples_dict = {name: np.concatenate(self.get_parameters(iteration)[name]) for name in parameters_to_show}

        weights = np.concatenate(self.get_weights(iteration))
        all_weights_are_1 = all([weights[i] == 1 for i in range(len(weights))])

        if show_samples is None:
            # by default, we display samples if the weights are all 1. Otherwise, we choose not to display them by
            # default as they may be misleading
            if all_weights_are_1:
                show_samples = True
            else:
                show_samples = False

        elif not all_weights_are_1 and show_samples is True:
            # in this case, the user required to show the sample points but importance weights are present. Then warn
            # the user about this
            warnings.warn(
                "You requested to show the sample points; however, note that the algorithm generated posterior samples "
                "associated with weights, and the kernel density estimate used to produce the density plots consider "
                "those. Therefore, the resulting plot may be misleading. ", RuntimeWarning)

        data = np.hstack(list(post_samples_dict.values()))
        datat = data.transpose()

        # now set up the range of parameters. If the range for a given parameter is not given, use the min and max
        # value of posterior samples.

        if ranges_parameters is None:
            ranges_parameters = {}

        # check if the user provided range for some parameters that are not requested to be displayed:
        if not all([key in post_samples_dict for key in ranges_parameters.keys()]):
            warnings.warn("You provided range for a parameter that was not requested to be displayed (or it may be "
                          "that you misspelled something). This will be ignored.", RuntimeWarning)

        for name in parameters_to_show:
            if name in ranges_parameters:
                # then check the format is correct
                if isinstance(ranges_parameters[name], list):
                    if not (len(ranges_parameters[name]) == 2 and isinstance(ranges_parameters[name][0], (int, float))):
                        raise TypeError(
                            "The range for the parameter {} should be an array-like with two numbers.".format(name))
                elif isinstance(ranges_parameters[name], np.ndarray):
                    if not (ranges_parameters[name].shape == 2 or ranges_parameters[name].shape == (2, 1)):
                        raise TypeError(
                            "The range for the parameter {} should be an array-like with two numbers.".format(name))
            else:
                # add as new range the min and max values. We add also a 5% of the whole range on both sides to have
                # better visualization
                difference = np.max(post_samples_dict[name]) - np.min(post_samples_dict[name])
                ranges_parameters[name] = [np.min(post_samples_dict[name]) - 0.05 * difference,
                                           np.max(post_samples_dict[name]) + 0.05 * difference]

        def write_post_mean_function(ax, post_mean, size):
            ax.text(0.15, 0.06, r"Post. mean = {:.2f}".format(post_mean), size=size,
                    transform=ax.transAxes)

        def scatterplot_matrix(data, meanpost, names, single_marginals_only=False, **kwargs):
            """
            Plots a scatterplot matrix of subplots.  Each row of "data" is plotted
            against other rows, resulting in a nrows by nrows grid of subplots with the
            diagonal subplots labeled with "parameters_to_show".  Additional keyword arguments are
            passed on to matplotlib's "plot" command. Returns the matplotlib figure
            object containg the subplot grid.
            """
            if figsize is None:
                figsize_actual = 4 * len(names)
            else:
                figsize_actual = figsize
            numvars, numdata = data.shape
            if single_marginals_only:
                fig, axes = plt.subplots(nrows=1, ncols=numvars, figsize=(figsize_actual, figsize_actual / len(names)))
            else:
                fig, axes = plt.subplots(nrows=numvars, ncols=numvars, figsize=(figsize_actual, figsize_actual))
                fig.subplots_adjust(hspace=0.08, wspace=0.08)

            # if we have to plot 1 single parameter value, envelop the ax in an array, so that it gives no troubles:
            if len(names) == 1:
                if not single_marginals_only:
                    axes = np.array([[axes]])
                else:
                    axes = np.array([axes])

            if not single_marginals_only:
                if len(names) > 1:
                    for ax in axes.flat:
                        # Hide all ticks and labels
                        ax.xaxis.set_visible(False)
                        ax.yaxis.set_visible(False)

                        # Set up ticks only on one side for the "edge" subplots...
                        if ax.is_first_col():
                            ax.yaxis.set_ticks_position('left')
                        if ax.is_last_col():
                            ax.yaxis.set_ticks_position('right')
                        if ax.is_first_row():
                            ax.xaxis.set_ticks_position('top')
                        if ax.is_last_row():
                            ax.xaxis.set_ticks_position('bottom')

                # off diagonal plots:
                for x in range(numvars):
                    for y in range(numvars):
                        if x is not y:
                            # this plots the posterior samples
                            if show_samples:
                                axes[x, y].plot(data[y], data[x], '.k', markersize='1')

                            xmin, xmax = ranges_parameters[names[y]]
                            ymin, ymax = ranges_parameters[names[x]]

                            # then you need to perform the KDE and plot the contour of posterior
                            X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
                            positions = np.vstack([X.ravel(), Y.ravel()])
                            values = np.vstack([data[y].T, data[x].T])
                            kernel = gaussian_kde(values, weights=weights, bw_method=bw_method)
                            Z = np.reshape(kernel(positions).T, X.shape)
                            # axes[x, y].plot(meanpost[y], meanpost[x], 'r+', markersize='10')
                            if show_posterior_mean:
                                axes[x, y].plot([xmin, xmax], [meanpost[x], meanpost[x]], "red", markersize='20',
                                                linestyle='solid')
                                axes[x, y].plot([meanpost[y], meanpost[y]], [ymin, ymax], "red", markersize='20',
                                                linestyle='solid')
                            if true_parameter_values is not None:
                                axes[x, y].plot([xmin, xmax], [true_parameter_values[x], true_parameter_values[x]],
                                                "green",
                                                markersize='20', linestyle='dashed')
                                axes[x, y].plot([true_parameter_values[y], true_parameter_values[y]], [ymin, ymax],
                                                "green",
                                                markersize='20', linestyle='dashed')

                            CS = axes[x, y].contour(X, Y, Z, contour_levels, linestyles='solid')
                            if show_density_values:
                                axes[x, y].clabel(CS, fontsize=figsize_actual / len(names) * 2.25, inline=1)
                            axes[x, y].set_xlim([xmin, xmax])
                            axes[x, y].set_ylim([ymin, ymax])

            # diagonal plots

            if not single_marginals_only:
                diagonal_axes = np.array([axes[i, i] for i in range(len(axes))])
            else:
                diagonal_axes = axes
            label_size = figsize_actual / len(names) * 4
            title_size = figsize_actual / len(names) * 4.25
            post_mean_size = figsize_actual / len(names) * 4
            ticks_size = figsize_actual / len(names) * 3

            for i, label in enumerate(names):
                xmin, xmax = ranges_parameters[names[i]]
                positions = np.linspace(xmin, xmax, 100)
                gaussian_kernel = gaussian_kde(data[i], weights=weights, bw_method=bw_method)
                diagonal_axes[i].plot(positions, gaussian_kernel(positions), color='k', linestyle='solid', lw="1",
                                      alpha=1, label="Density")
                values = gaussian_kernel(positions)
                # axes[i, i].plot([positions[np.argmax(values)], positions[np.argmax(values)]], [0, np.max(values)])
                if show_posterior_mean:
                    diagonal_axes[i].plot([meanpost[i], meanpost[i]], [0, 1.1 * np.max(values)], "red", alpha=1,
                                          label="Posterior mean")
                if true_parameter_values is not None:
                    diagonal_axes[i].plot([true_parameter_values[i], true_parameter_values[i]],
                                          [0, 1.1 * np.max(values)], "green",
                                          alpha=1, label="True value")
                if write_posterior_mean:
                    write_post_mean_function(diagonal_axes[i], meanpost[i], size=post_mean_size)
                diagonal_axes[i].set_xlim([xmin, xmax])
                diagonal_axes[i].set_ylim([0, 1.1 * np.max(values)])

            # labels and ticks:
            if not single_marginals_only:
                for j, label in enumerate(names):
                    axes[0, j].set_title(label, size=title_size)

                    if len(names) > 1:
                        axes[j, 0].set_ylabel(label, size=label_size)
                        axes[-1, j].set_xlabel(label, size=label_size)
                    else:
                        axes[j, 0].set_ylabel("Density", size=label_size)

                    axes[j, 0].tick_params(axis='both', which='major', labelsize=ticks_size)
                    axes[j, 0].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
                    axes[j, 0].yaxis.offsetText.set_fontsize(ticks_size)
                    axes[j, 0].yaxis.set_visible(True)

                    axes[-1, j].tick_params(axis='both', which='major', labelsize=ticks_size)
                    axes[-1, j].ticklabel_format(style='sci', axis='x')  # , scilimits=(0, 0))
                    axes[-1, j].xaxis.offsetText.set_fontsize(ticks_size)
                    axes[-1, j].xaxis.set_visible(True)
                    axes[j, -1].tick_params(axis='both', which='major', labelsize=ticks_size)
                    axes[j, -1].yaxis.offsetText.set_fontsize(ticks_size)
                    axes[j, -1].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
                    axes[j, -1].yaxis.set_visible(True)

            else:
                for j, label in enumerate(names):
                    axes[j].set_title(label, size=figsize_actual / len(names) * 4.25)
                axes[0].set_ylabel("Density")

            return fig, axes

        def double_marginals_plot(data, meanpost, names, **kwargs):
            """
            Plots contour plots of all possible pairs of samples. Additional keyword arguments are
            passed on to matplotlib's "plot" command. Returns the matplotlib figure
            object containg the subplot grid.
            """
            numvars, numdata = data.shape
            numplots = np.int(numvars * (numvars - 1) / 2)
            if figsize is None:
                figsize_actual = 4 * numplots
            else:
                figsize_actual = figsize

            fig, axes = plt.subplots(nrows=1, ncols=numplots, figsize=(figsize_actual, figsize_actual / numplots))

            if numplots == 1:  # in this case you will only have one plot. Envelop it to avoid problems.
                axes = [axes]

            # if we have to plot 1 single parameter value, envelop the ax in an array, so that it gives no troubles:

            ax_counter = 0

            for x in range(numvars):
                for y in range(0, x):
                    # this plots the posterior samples
                    if show_samples:
                        axes[ax_counter].plot(data[y], data[x], '.k', markersize='1')

                    xmin, xmax = ranges_parameters[names[y]]
                    ymin, ymax = ranges_parameters[names[x]]

                    # then you need to perform the KDE and plot the contour of posterior
                    X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
                    positions = np.vstack([X.ravel(), Y.ravel()])
                    values = np.vstack([data[y].T, data[x].T])
                    kernel = gaussian_kde(values, weights=weights, bw_method=bw_method)
                    Z = np.reshape(kernel(positions).T, X.shape)
                    # axes[x, y].plot(meanpost[y], meanpost[x], 'r+', markersize='10')
                    if show_posterior_mean:
                        axes[ax_counter].plot([xmin, xmax], [meanpost[x], meanpost[x]], "red", markersize='20',
                                              linestyle='solid')
                        axes[ax_counter].plot([meanpost[y], meanpost[y]], [ymin, ymax], "red", markersize='20',
                                              linestyle='solid')
                    if true_parameter_values is not None:
                        axes[ax_counter].plot([xmin, xmax], [true_parameter_values[x], true_parameter_values[x]],
                                              "green",
                                              markersize='20', linestyle='dashed')
                        axes[ax_counter].plot([true_parameter_values[y], true_parameter_values[y]], [ymin, ymax],
                                              "green",
                                              markersize='20', linestyle='dashed')

                    CS = axes[ax_counter].contour(X, Y, Z, contour_levels, linestyles='solid')
                    if show_density_values:
                        axes[ax_counter].clabel(CS, fontsize=figsize_actual / numplots * 2.25, inline=1)
                    axes[ax_counter].set_xlim([xmin, xmax])
                    axes[ax_counter].set_ylim([ymin, ymax])

                    label_size = figsize_actual / numplots * 4
                    ticks_size = figsize_actual / numplots * 3
                    axes[ax_counter].set_ylabel(names[x], size=label_size)
                    axes[ax_counter].set_xlabel(names[y], size=label_size)

                    axes[ax_counter].tick_params(axis='both', which='major', labelsize=ticks_size)
                    axes[ax_counter].ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
                    axes[ax_counter].yaxis.offsetText.set_fontsize(ticks_size)
                    axes[ax_counter].yaxis.set_visible(True)
                    axes[ax_counter].ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
                    axes[ax_counter].yaxis.offsetText.set_fontsize(ticks_size)
                    axes[ax_counter].xaxis.set_visible(True)

                    ax_counter += 1

            return fig, axes

        if not double_marginals_only:
            fig, axes = scatterplot_matrix(datat, meanpost, parameters_to_show,
                                           single_marginals_only=single_marginals_only)
        else:
            fig, axes = double_marginals_plot(datat, meanpost, parameters_to_show)

        if path_to_save is not None:
            plt.savefig(path_to_save, bbox_inches="tight")

        return fig, axes

[docs]    def plot_ESS(self):
        """
        Produces a plot showing the evolution of the estimated ESS (from sample weights) across iterations; it also
        shows as a baseline the maximum possible ESS which can be achieved, corresponding to the case of independent
        samples, which is equal to the total number of samples.

        Returns
        -------
        tuple
            a tuple containing the matplotlib "fig, ax" objects defining the plot. Can be useful for further
            modifications.
        """

        if self._type == 0:
            raise RuntimeError("ESS plot is available only if the journal was created with full_output=1; otherwise, "
                               "ESS is saved only for the last iteration.")

        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4))

        ax.scatter(np.arange(len(self.ESS)) + 1, self.ESS, label="Estimated ESS")
        ax.set_xlabel("Iteration")
        ax.set_ylabel("ESS")
        # put horizontal line at the largest value ESS can get:
        ax.axhline(len(self.weights[-1]), label="Max theoretical value", ls="dashed")
        ax.legend()
        ax.set_xticks(np.arange(len(self.ESS)) + 1)

        return fig, ax

[docs]    def Wass_convergence_plot(self, num_iter_max=1e8, **kwargs):
        """
        Computes the Wasserstein distance between the empirical distribution at subsequent iterations to see whether
        the approximation of the posterior is converging. Then, it produces a plot displaying that. The approximation of
        the posterior is converging if the Wass distance between subsequent iterations decreases with iteration and gets
        close to 0, as that means there is no evolution of the posterior samples. The Wasserstein distance is estimated
        using the POT library).

        This method only works when the Journal stores results from all the iterations (ie it was generated with
        full_output=1). Moreover, this only works when all the parameters in the model are univariate.

        Parameters
        ----------
        num_iter_max : integer, optional
            The maximum number of iterations in the linear programming algorithm to estimate the Wasserstein distance.
            Default to 1e8.
        kwargs
            Additional arguments passed to the wass_dist calculation function.

        Returns
        -------
        tuple
            a tuple containing the matplotlib "fig, ax" objects defining the plot and the list of the computed
            Wasserstein distances. "fig" and "ax" can be useful for further modifying the plot.
        """
        if self._type == 0:
            raise RuntimeError("Wasserstein distance convergence test is available only if the journal was created with"
                               " full_output=1; in fact, this works by comparing the saved empirical distribution at "
                               "different iterations, and the latter is saved only if full_output=1.")

        if len(self.weights) == 1:
            raise RuntimeError("Only a set of posterior samples has been saved, corresponding to either running a "
                               "sequential algorithm for one iteration only or to using non-sequential algorithms (as"
                               "RejectionABC). Wasserstein distance convergence test requires at least samples from at "
                               "least 2 iterations.")
        if self.get_accepted_parameters().dtype == "object":
            raise RuntimeError("This error was probably raised due to the parameters in your model having different "
                               "dimenions (and specifically not being univariate). For now, Wasserstein distance"
                               " convergence test is available only if the different parameters have the same "
                               "dimension.")

        wass_dist_lists = [None] * (len(self.weights) - 1)

        for i in range(len(self.weights) - 1):
            params_1 = self.get_accepted_parameters(i)
            params_2 = self.get_accepted_parameters(i + 1)
            weights_1 = self.get_weights(i)
            weights_2 = self.get_weights(i + 1)
            if len(params_1.shape) == 1:  # we assume that the dimension of parameters is 1
                params_1 = params_1.reshape(-1, 1)
            else:
                params_1 = params_1.reshape(params_1.shape[0], -1)
            if len(params_2.shape) == 1:  # we assume that the dimension of parameters is 1
                params_2 = params_2.reshape(-1, 1)
            else:
                params_2 = params_2.reshape(params_2.shape[0], -1)

            if len(weights_1.shape) == 2:  # it can be that the weights have shape (-1,1); reshape therefore
                weights_1 = weights_1.reshape(-1)
            if len(weights_2.shape) == 2:  # it can be that the weights have shape (-1,1); reshape therefore
                weights_2 = weights_2.reshape(-1)

            wass_dist_lists[i] = wass_dist(samples_1=params_1, samples_2=params_2, weights_1=weights_1,
                                           weights_2=weights_2, num_iter_max=num_iter_max, **kwargs)

        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4))
        ax.scatter(np.arange(len(self.weights) - 1) + 1, wass_dist_lists,
                   label="Estimated Wass. distance\nbetween iteration i and i+1")
        ax.set_xlabel("Iteration")
        ax.set_ylabel("Wasserstein distance")
        ax.legend()
        # put horizontal line at the largest value ESS can get:
        ax.set_xticks(np.arange(len(self.weights) - 1) + 1)

        return fig, ax, wass_dist_lists