Source code for vip_hci.fm.negfc_nested

#! /usr/bin/env python
"""
Module with functions for posterior sampling of the NEGFC parameters using
nested sampling (``nestle``).

.. [BAR13]
   | K. Barbary 2013
   | **nestle**
   | *GitHub repository*
   | `https://github.com/kbarbary/nestle
     <https://github.com/kbarbary/nestle>`_

.. [FER09]
   | Feroz et al. 2009
   | **MULTINEST: an efficient and robust Bayesian inference tool for cosmology
     and particle physics**
   | *MNRAS, Volume 398, Issue 4, pp. 1601-1614*
   | `https://arxiv.org/abs/0809.3437
     <https://arxiv.org/abs/0809.3437>`_

.. [MUK06]
   | Mukherjee et al. 2006
   | **A Nested Sampling Algorithm for Cosmological Model Selection**
   | *ApJL, Volume 638, Issue 2, pp. 51-54*
   | `https://arxiv.org/abs/astro-ph/0508461
     <https://arxiv.org/abs/astro-ph/0508461>`_

.. [SKI04]
   | Skilling 2004
   | **Bayesian Inference and Maximum Entropy Methods in Science and Engineering:
     24th International Workshop on Bayesian Inference and Maximum Entropy
     Methods in Science and Engineering**
   | *American Institute of Physics Conference Series, Volume 735, pp. 395-405*
   | `https://ui.adsabs.harvard.edu/abs/2004AIPC..735..395S
     <https://ui.adsabs.harvard.edu/abs/2004AIPC..735..395S>`_

"""


__author__ = 'Carlos Alberto Gomez Gonzalez, V. Christiaens',
__all__ = ['nested_negfc_sampling',
           'nested_sampling_results']

import nestle
import corner
import numpy as np
from matplotlib import pyplot as plt
from ..config import time_ini, timing
from .negfc_mcmc import lnlike, confidence, show_walk_plot
from .negfc_fmerit import get_mu_and_sigma
from ..psfsub import pca_annulus



[docs]
def nested_negfc_sampling(init, cube, angs, psfn, fwhm, mu_sigma=True,
                          sigma='spe+pho', fmerit='sum', annulus_width=8,
                          aperture_radius=1, ncomp=10, scaling=None,
                          svd_mode='lapack', cube_ref=None, collapse='median',
                          algo=pca_annulus, delta_rot=1, algo_options={},
                          weights=None, w=(5, 5, 200), method='single',
                          npoints=100, dlogz=0.1, decline_factor=None,
                          rstate=None, verbose=True):
    """ Runs a nested sampling algorithm with ``nestle`` [BAR13]_ in order to
    determine the position and the flux of the planet using the 'Negative Fake
    Companion' technique. The result of this procedure is a ``nestle`` object
    containing the samples from the posterior distributions of each of the 3
    parameters. It provides good results (value plus error bars) compared to a
    more CPU intensive Monte Carlo approach with the affine invariant sampler
    (``emcee``).

    Parameters
    ----------
    init: numpy ndarray or tuple of length 3
        The first guess for the position and flux of the planet, respectively.
        It serves for generating the bounds of the log prior function (uniform
        in a bounded interval).
    cube: 3d or 4d numpy ndarray
        Input ADI or ADI+IFS cube.
    angs: numpy.array
        The parallactic angle vector.
    psfn: numpy 2D or 3D array
        Normalised PSF template used for negative fake companion injection.
        The PSF must be centered and the flux in a 1xFWHM aperture must equal 1
        (use ``vip_hci.metrics.normalize_psf``).
        If the input cube is 3D and a 3D array is provided, the first dimension
        must match for both cubes. This can be useful if the star was
        unsaturated and conditions were variable.
        If the input cube is 4D, psfn must be either 3D or 4D. In either cases,
        the first dimension(s) must match those of the input cube.
    fwhm : float
        The FWHM in pixels.
    mu_sigma: tuple of 2 floats or bool, opt
        If set to None: not used, and falls back to original version of the
        algorithm, using fmerit [WER17]_.
        If a tuple of 2 elements: should be the mean and standard deviation of
        pixel intensities in an annulus centered on the location of the
        companion candidate, excluding the area directly adjacent to the CC.
        If set to anything else, but None/False/tuple: will compute said mean
        and standard deviation automatically.
        These values will then be used in the log-probability of the MCMC.
    sigma: str, opt
        Sets the type of noise to be included as sigma^2 in the log-probability
        expression. Choice between 'pho' for photon (Poisson) noise, 'spe' for
        residual (mostly whitened) speckle noise, or 'spe+pho' for both.
    force_rPA: bool, optional
        Whether to only search for optimal flux, provided (r,PA).
    fmerit : {'sum', 'stddev'}, string optional
        If mu_sigma is not provided nor set to True, this parameter determines
        which figure of merit to be used among the 2 possibilities implemented
        in [WER17]_. 'stddev' may work well for point like sources
        surrounded by extended signals.
    annulus_width: float, optional
        The width in pixel of the annulus on which the PCA is performed.
    aperture_radius: float, optional
        The radius of the circular aperture in FWHM.
    ncomp: int optional
        The number of principal components.
    scaling : {None, "temp-mean", spat-mean", "temp-standard",
        "spat-standard"}, None or str optional
        Pixel-wise scaling mode using ``sklearn.preprocessing.scale``
        function. If set to None, the input matrix is left untouched. Otherwise:

        * ``temp-mean``: temporal px-wise mean is subtracted.

        * ``spat-mean``: spatial mean is subtracted.

        * ``temp-standard``: temporal mean centering plus scaling pixel values
          to unit variance (temporally).

        * ``spat-standard``: spatial mean centering plus scaling pixel values
          to unit variance (spatially).

        DISCLAIMER: Using ``temp-mean`` or ``temp-standard`` scaling can improve
        the speckle subtraction for ASDI or (A)RDI reductions. Nonetheless, this
        involves a sort of c-ADI preprocessing, which (i) can be dangerous for
        datasets with low amount of rotation (strong self-subtraction), and (ii)
        should probably be referred to as ARDI (i.e. not RDI stricto sensu).
    svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional
        Switch for different ways of computing the SVD and selected PCs.
    cube_ref: numpy ndarray, 3d, optional
        Reference library cube. For Reference Star Differential Imaging.
    collapse : {'median', 'mean', 'sum', 'trimmean', None}, str or None, opt
        Sets the way of collapsing the frames for producing a final image. If
        None then the cube of residuals is used when measuring the function of
        merit (instead of a single final frame).
    algo_options: dict, opt
        Dictionary with additional parameters for the algorithm (e.g. tol,
        min_frames_lib, max_frames_lib)
    weights : 1d array, optional
        If provided, the negative fake companion fluxes will be scaled according
        to these weights before injection in the cube. Can reflect changes in
        the observing conditions throughout the sequence.
    w : tuple of length 3
        The size of the bounds (around the initial state ``init``) for each
        parameter.
    method : {"single", "multi", "classic"}, str optional
        Flavor of nested sampling. Single ellipsoid works well for the NEGFC and
        is the default.
    npoints : int optional
        Number of active points. At least ndim+1 (4 will produce bad results).
        For problems with just a few parameters (<=5) like the NEGFC, good
        results are obtained with 100 points (default).
    dlogz : Estimated remaining evidence
        Iterations will stop when the estimated contribution of the remaining
        prior volume to the total evidence falls below this threshold.
        Explicitly, the stopping criterion is log(z + z_est) - log(z) < dlogz
        where z is the current evidence from all saved samples, and z_est is the
        estimated contribution from the remaining volume. This option and
        decline_factor are mutually exclusive. If neither is specified, the
        default is dlogz=0.5.
    decline_factor : float, optional
        If supplied, iteration will stop when the weight (likelihood times prior
        volume) of newly saved samples has been declining for
        decline_factor * nsamples consecutive samples. A value of 1.0 seems to
        work pretty well.
    rstate : random instance, optional
        RandomState instance. If not given, the global random state of the
        numpy.random module will be used.

    Returns
    -------
    res : nestle object
        ``Nestle`` object with the nested sampling results, including the
        posterior samples.

    Note
    ----
    Nested Sampling is a computational approach for integrating posterior
    probability in order to compare models in Bayesian statistics. It is similar
    to Markov Chain Monte Carlo (MCMC) in that it generates samples that can be
    used to estimate the posterior probability distribution. Unlike MCMC, the
    nature of the sampling also allows one to calculate the integral of the
    distribution. It also happens to be a pretty good method for robustly
    finding global maxima.

    Nestle documentation:
    http://kbarbary.github.io/nestle/

    **Convergence**:
    http://kbarbary.github.io/nestle/stopping.html
    Nested sampling has no well-defined stopping point. As iterations continue,
    the active points sample a smaller and smaller region of prior space.
    This can continue indefinitely. Unlike typical MCMC methods, we don't gain
    any additional precision on the results by letting the algorithm run longer;
    the precision is determined at the outset by the number of active points.
    So, we want to stop iterations as soon as we think the active points are
    doing a pretty good job sampling the remaining prior volume - once we've
    converged to the highest-likelihood regions such that the likelihood is
    relatively flat within the remaining prior volume.

    **Method**:
    The trick in nested sampling is to, at each step in the algorithm,
    efficiently choose a new point in parameter space drawn with uniform
    probability from the parameter space with likelihood greater than the
    current likelihood constraint. The different methods all use the
    current set of active points as an indicator of where the target
    parameter space lies, but differ in how they select new points from it:

        - "classic" is close to the method described in [SKI04]_.
        - "single", [MUK06]_, Determines a single ellipsoid that bounds all
          active points, enlarges the ellipsoid by a user-settable factor,
          and selects a new point at random from within the ellipsoid.
        - "multiple", [FER09]_ (Multinest). In cases where the posterior is
          multi-modal, the single-ellipsoid method can be extremely
          inefficient. In such situations, there are clusters of active
          points on separate high-likelihood regions separated by regions of
          lower likelihood. Bounding all points in a single ellipsoid means
          that the ellipsoid includes the lower-likelihood regions we wish to
          avoid sampling from. The solution is to detect these clusters and
          bound them in separate ellipsoids. For this, we use a recursive
          process where we perform K-means clustering with K=2. If the
          resulting two ellipsoids have a significantly lower total volume
          than the parent ellipsoid (less than half), we accept the split and
          repeat the clustering and volume test on each of the two subset of
          points. This process continues recursively. Alternatively, if the
          total ellipse volume is significantly greater than expected (based
          on the expected density of points) this indicates that there may be
          more than two clusters and that K=2 was not an appropriate cluster
          division. We therefore still try to subdivide the clusters
          recursively. However, we still only accept the final split into N
          clusters if the total volume decrease is significant.

    """

    # calculate mu_sigma
    mu_sig = get_mu_and_sigma(cube, angs, ncomp, annulus_width, aperture_radius,
                              fwhm, init[0], init[1], cube_ref=cube_ref,
                              svd_mode=svd_mode, scaling=scaling, algo=algo,
                              delta_rot=delta_rot, collapse=collapse,
                              algo_options=algo_options)
    # Measure mu and sigma once in the annulus (instead of each MCMC step)
    if isinstance(mu_sigma, tuple):
        if len(mu_sigma) != 2:
            raise TypeError("if a tuple, mu_sigma should have 2 elements")

    elif mu_sigma:
        mu_sigma = mu_sig
        if verbose:
            msg = "The mean and stddev in the annulus at the radius of the "
            msg += "companion (excluding the PA area directly adjacent to it)"
            msg += " are {:.2f} and {:.2f} respectively."
            print(msg.format(mu_sigma[0], mu_sigma[1]))
    else:
        mu_sigma = mu_sig[0]  # just take mean

    def prior_transform(x):
        """
        Computes the transformation from the unit distribution `[0, 1]` to
        parameter space.

        The default prior bounds are
            radius: (r - w[0], r + w[0])
            theta: (theta - w[1], theta + w[1])
            flux: (f - w[2], f + w[3])
        The default distributions used are
        radius: Uniform distribution transformed into polar coordinates
            This distribution assumes uniform distribution for the (x,y)
            coordinates transformed to polar coordinates.
        theta: Uniform distribution
            This distribution is derived the same as the radial distribution,
            but there is no change on the prior for theta after the
            change-of-variables transformation.
        flux: Poisson-invariant scale distribution
            This distribution is the Jeffrey's prior for Poisson data

        Note
        ----
        The prior transform function is used to specify the Bayesian prior for
        the problem, in a round-about way. It is a transformation from a space
        where variables are independently and uniformly distributed between 0
        and 1 to the parameter space of interest. For independent parameters,
        this would be the product of the inverse cumulative distribution
        function (also known as the percent point function or quantile function)
        for each parameter.
        http://kbarbary.github.io/nestle/prior.html
        """
        rmin = init[0] - w[0]
        rmax = init[0] + w[0]
        r = np.sqrt((rmax**2 - rmin**2) * x[0] + rmin**2)

        tmin = init[1] - w[1]
        tmax = init[1] + w[1]
        t = x[1] * (tmax - tmin) + tmin

        fmin = init[2] - w[2]
        fmax = init[2] + w[2]
        f = (x[2] * (np.sqrt(fmax) - np.sqrt(fmin)) + np.sqrt(fmin))**2

        return np.array([r, t, f])

    def f(param):
        return lnlike(param=param, cube=cube, angs=angs, psf_norm=psfn,
                      fwhm=fwhm, annulus_width=annulus_width,
                      aperture_radius=aperture_radius, initial_state=init,
                      cube_ref=cube_ref, svd_mode=svd_mode, scaling=scaling,
                      algo=algo, delta_rot=delta_rot, fmerit=fmerit,
                      mu_sigma=mu_sigma, sigma=sigma, ncomp=ncomp,
                      collapse=collapse, algo_options=algo_options,
                      weights=weights)

    # -------------------------------------------------------------------------
    if verbose:
        start = time_ini()

    if verbose:
        print('Prior bounds on parameters:')
        print('Radius [{},{}]'.format(init[0] - w[0], init[0] + w[0], ))
        print('Theta [{},{}]'.format(init[1] - w[1], init[1] + w[1]))
        print('Flux [{},{}]'.format(init[2] - w[2], init[2] + w[2]))
        print('\nUsing {} active points'.format(npoints))

    res = nestle.sample(f, prior_transform, ndim=3, method=method,
                        npoints=npoints, rstate=rstate, dlogz=dlogz,
                        decline_factor=decline_factor)

    # if verbose:  print; timing(start)
    if verbose:
        print('\nTotal running time:')
        timing(start)
    return res




[docs]
def nested_sampling_results(ns_object, burnin=0.4, bins=None, cfd=68.27,
                            save=False, output_dir='/', plot=False):
    """ Shows the results of the Nested Sampling, summary, parameters with
    errors, walk and corner plots.

    Parameters
    ----------
    ns_object: numpy.array
        The nestle object returned from `nested_negfc_sampling`.
    burnin: float, default: 0
        The fraction of a walker we want to discard.
    bins: int, optional
        The number of bins used to sample the posterior distributions.
    cfd: float, optional
        The confidence level given in percentage.
    save: boolean, default: False
        If True, a pdf file is created.
    output_dir: str, optional
        The name of the output directory which contains the output files in the
        case  ``save`` is True.
    plot: bool, optional
        Whether to show the plots (instead of saving them).

    Returns
    -------
    final_res: numpy ndarray
         Best-fit parameters and uncertainties (corresponding to 68% confidence
         interval). Dimensions: nparams x 2.

    """
    res = ns_object
    nsamples = res.samples.shape[0]
    indburnin = int(np.percentile(np.array(range(nsamples)), burnin * 100))

    print(res.summary())

    print(
        '\nNatural log of prior volume and Weight corresponding to each sample')
    if save or plot:
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(res.logvol, '.', alpha=0.5, color='gray')
        plt.xlabel('samples')
        plt.ylabel('logvol')
        plt.vlines(indburnin, res.logvol.min(), res.logvol.max(),
                   linestyles='dotted')
        plt.subplot(1, 2, 2)
        plt.plot(res.weights, '.', alpha=0.5, color='gray')
        plt.xlabel('samples')
        plt.ylabel('weights')
        plt.vlines(indburnin, res.weights.min(), res.weights.max(),
                   linestyles='dotted')
        if save:
            plt.savefig(output_dir+'Nested_results.pdf')
        if plot:
            plt.show()

        print("\nWalk plots before the burnin")
        show_walk_plot(np.expand_dims(res.samples, axis=0))
        if burnin > 0:
            print("\nWalk plots after the burnin")
            show_walk_plot(np.expand_dims(res.samples[indburnin:], axis=0))
        if save:
            plt.savefig(output_dir+'Nested_walk_plots.pdf')
        if plot:
            plt.show()

    mean, cov = nestle.mean_and_cov(res.samples[indburnin:],
                                    res.weights[indburnin:])
    print("\nWeighted mean +- sqrt(covariance)")
    print("Radius = {:.3f} +/- {:.3f}".format(mean[0], np.sqrt(cov[0, 0])))
    print("Theta = {:.3f} +/- {:.3f}".format(mean[1], np.sqrt(cov[1, 1])))
    print("Flux = {:.3f} +/- {:.3f}".format(mean[2], np.sqrt(cov[2, 2])))

    if save:
        with open(output_dir+'Nested_sampling.txt', "w") as f:
            f.write('#################################\n')
            f.write('####   CONFIDENCE INTERVALS   ###\n')
            f.write('#################################\n')
            f.write(' \n')
            f.write('Results of the NESTED SAMPLING fit\n')
            f.write('----------------------------------\n ')
            f.write(' \n')
            f.write("\nWeighted mean +- sqrt(covariance)\n")
            f.write(
                "Radius = {:.3f} +/- {:.3f}\n".format(mean[0], np.sqrt(cov[0, 0])))
            f.write(
                "Theta = {:.3f} +/- {:.3f}\n".format(mean[1], np.sqrt(cov[1, 1])))
            f.write(
                "Flux = {:.3f} +/- {:.3f}\n".format(mean[2], np.sqrt(cov[2, 2])))

    if bins is None:
        bins = int(np.sqrt(res.samples[indburnin:].shape[0]))
        print("\nHist bins =", bins)

    if save or plot:
        ranges = None
        fig = corner.corner(res.samples[indburnin:], bins=bins,
                            labels=["$r$", r"$\theta$", "$f$"],
                            weights=res.weights[indburnin:], range=ranges,
                            plot_contours=True)
        fig.set_size_inches(8, 8)
    if save:
        plt.savefig(output_dir+'Nested_corner.pdf')

    print('\nConfidence intervals')
    if save or plot:
        _ = confidence(res.samples[indburnin:], cfd=68, bins=bins,
                       weights=res.weights[indburnin:],
                       gaussian_fit=True, verbose=True, save=False)

    if save:
        plt.savefig(output_dir+'Nested_confi_hist_flux_r_theta_gaussfit.pdf')

    final_res = np.array([[mean[0], np.sqrt(cov[0, 0])],
                          [mean[1], np.sqrt(cov[1, 1])],
                          [mean[2], np.sqrt(cov[2, 2])]])
    return final_res