Source code for abcpy.summaryselections

from abc import ABCMeta, abstractmethod

from abcpy.graphtools import GraphTools
from abcpy.acceptedparametersmanager import *

import numpy as np
from sklearn import linear_model


[docs]class Summaryselections(metaclass=ABCMeta):
    """This abstract base class defines a way to choose the summary statistics.
    """

[docs]    @abstractmethod
    def __init__(self, model, statistics_calc, backend, n_samples=1000, seed=None):
        """The constructor of a sub-class must accept a non-optional model, statistics calculator and 
        backend which are stored to self.model, self.statistics_calc and self.backend. Further it 
        accepts two optional parameters n_samples and seed defining the number of simulated dataset
        used for the pilot to decide the summary statistics and the integer to initialize the random 
        number generator.
    
        Parameters
        ----------
        model: abcpy.models.Model
            Model object that conforms to the Model class.
        statistics_cal: abcpy.statistics.Statistics
            Statistics object that conforms to the Statistics class.
        backend: abcpy.backends.Backend
            Backend object that conforms to the Backend class.
        n_samples: int, optional
            The number of (parameter, simulated data) tuple generated to learn the summary statistics in pilot step. 
            The default value is 1000.
        n_samples_per_param: int, optional
            Number of data points in each simulated data set.
        seed: integer, optional
            Optional initial seed for the random number generator. The default value is generated randomly.    
        """
        raise NotImplementedError

    def __getstate__(self):
        state = self.__dict__.copy()
        del state['backend']
        return state

[docs]    @abstractmethod
    def transformation(self, statistics):
        raise NotImplementedError


[docs]class Semiautomatic(Summaryselections, GraphTools):
    """This class implements the semi automatic summary statistics choice described in Fearnhead and Prangle [1].
    
    [1] Fearnhead P., Prangle D. 2012. Constructing summary statistics for approximate
    Bayesian computation: semi-automatic approximate Bayesian computation. J. Roy. Stat. Soc. B 74:419–474.    
    """

[docs]    def __init__(self, model, statistics_calc, backend, n_samples=1000, n_samples_per_param = 1, seed=None):
        self.model = model
        self.statistics_calc = statistics_calc
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.n_samples_per_param = n_samples_per_param

        # An object managing the bds objects
        self.accepted_parameters_manager = AcceptedParametersManager(self.model)
        self.accepted_parameters_manager.broadcast(self.backend, [])

        # main algorithm
        seed_arr = self.rng.randint(1, n_samples * n_samples, size=n_samples, dtype=np.int32)
        rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
        rng_pds = self.backend.parallelize(rng_arr)

        sample_parameters_statistics_pds = self.backend.map(self._sample_parameter_statistics, rng_pds)

        sample_parameters_and_statistics = self.backend.collect(sample_parameters_statistics_pds)
        sample_parameters, sample_statistics = [list(t) for t in zip(*sample_parameters_and_statistics)]
        sample_parameters = np.array(sample_parameters)
        sample_statistics = np.concatenate(sample_statistics)

        self.coefficients_learnt = np.zeros(shape=(sample_parameters.shape[1], sample_statistics.shape[1]))
        regr = linear_model.LinearRegression(fit_intercept=True)
        for ind in range(sample_parameters.shape[1]):
            regr.fit(sample_statistics, sample_parameters[:, ind])
            self.coefficients_learnt[ind, :] = regr.coef_

[docs]    def transformation(self, statistics):
        if not statistics.shape[1] == self.coefficients_learnt.shape[1]:
            raise ValueError('Mismatch in dimension of summary statistics')
        return np.dot(statistics, np.transpose(self.coefficients_learnt))

    def _sample_parameter_statistics(self, rng=np.random.RandomState()):
        """
        Samples a single model parameter and simulates from it until
        distance between simulated outcome and the observation is
        smaller than eplison.

        Parameters
        ----------
        seed: int
            value of a seed to be used for reseeding
        Returns
        -------
        np.array
            accepted parameter
        """

        self.sample_from_prior(rng=rng)
        parameter = self.get_parameters()
        y_sim = self.simulate(self.n_samples_per_param, rng=rng)
        if y_sim is not None:
            statistics = self.statistics_calc.statistics(y_sim)
        return (parameter, statistics)