Source code for vip_hci.fm.negfc_mcmc

#! /usr/bin/env python
"""
Module with the MCMC (``emcee``) sampling for NEGFC parameter estimation.

.. [CHR21]
   | Christiaens et al. 2021
   | **A faint companion around CrA-9: protoplanet or obscured binary?**
   | *MNRAS, Volume 502, Issue 4, pp. 6117-6139*
   | `https://arxiv.org/abs/2102.10288
     <https://arxiv.org/abs/2102.10288>`_

.. [FOR13]
   | Foreman-Mackey et al. 2013
   | **emcee: The MCMC Hammer**
   | *PASP, Volume 125, Issue 925, p. 306*
   | `https://arxiv.org/abs/1202.3665
     <https://arxiv.org/abs/1202.3665>`_

.. [QUA15]
   | Quanz et al. 2015
   | **Confirmation and Characterization of the Protoplanet HD 100546 b—Direct
   Evidence for Gas Giant Planet Formation at 50 AU**
   | *Astronomy & Astrophysics, Volume 807, p. 64*
   | `https://arxiv.org/abs/1412.5173
     <https://arxiv.org/abs/1412.5173>`_

.. [WER17]
   | Wertz et al. 2017
   | **VLT/SPHERE robust astrometry of the HR8799 planets at milliarcsecond
   level accuracy. Orbital architecture analysis with PyAstrOFit**
   | *Astronomy & Astrophysics, Volume 598, p. 83*
   | `https://arxiv.org/abs/1610.04014
     <https://arxiv.org/abs/1610.04014>`_

.. [GOO10]
   | Goodman & Weare 2010
   | **Ensemble samplers with affine invariance**
   | *Comm. App. Math. Comp. Sci., Vol. 5, Issue 1, pp. 65-80.*
   | `https://ui.adsabs.harvard.edu/abs/2010CAMCS...5...65G
     <https://ui.adsabs.harvard.edu/abs/2010CAMCS...5...65G>`_

"""
from ..fits import write_fits
__author__ = 'O. Wertz, Carlos Alberto Gomez Gonzalez, V. Christiaens'
__all__ = ['mcmc_negfc_sampling',
           'chain_zero_truncated',
           'show_corner_plot',
           'show_walk_plot',
           'confidence']
import numpy as np
import os
import emcee
from multiprocessing import cpu_count
import inspect
import datetime
import corner
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import pickle
from scipy.stats import norm
from ..fm import cube_inject_companions
from ..config import time_ini, timing
from ..config.utils_conf import sep
from ..psfsub import pca_annulus
from .negfc_fmerit import get_values_optimize, get_mu_and_sigma
from .utils_mcmc import gelman_rubin, autocorr_test
from .utils_negfc import find_nearest
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


def lnprior(param, bounds, force_rPA=False):
    """Define the prior log-function.

    Parameters
    ----------
    param: tuple
        The model parameters.
    bounds: list
        The bounds for each model parameter.
        Ex: bounds = [(10,20),(0,360),(0,5000)]
    force_rPA: bool, optional
        Whether to only search for optimal flux, provided (r,PA).

    Returns
    -------
    out: float.
        0 if all the model parameters satisfy the prior conditions defined here.
        -np.inf if at least one model parameters is out of bounds.

    """
    if not force_rPA:
        try:
            _ = param[0]
            _ = param[1]
            _ = param[2:]
        except TypeError:
            print('param must be a tuple, {} given'.format(type(param)))

    if not force_rPA:
        try:
            _ = bounds[0]
            _ = bounds[1]
            _ = bounds[2:]
        except TypeError:
            msg = 'bounds must be a list of tuple, {} given'.format(
                type(bounds))
            print(msg)

    cond = True
    for i in range(len(param)):
        if bounds[i][0] <= param[i] <= bounds[i][1]:
            pass
        else:
            cond = False

    if cond:
        return 0.0
    else:
        return -np.inf


def lnlike(param, cube, angs, psf_norm, fwhm, annulus_width, ncomp,
           aperture_radius, initial_state, cube_ref=None, svd_mode='lapack',
           scaling=None, algo=pca_annulus, delta_rot=1, fmerit='sum',
           imlib='vip-fft', interpolation='lanczos4', collapse='median',
           algo_options={}, weights=None, transmission=None, mu_sigma=True,
           sigma='spe+pho', force_rPA=False, debug=False):
    """Define the log-likelihood function.

    Parameters
    ----------
    param: tuple
        The model parameters, typically (r, theta, flux).
    cube: 3d or 4d numpy ndarray
        Input ADI or ADI+IFS cube.
    angs: numpy.array
        The parallactic angle fits image expressed as a numpy.array.
    psf_norm: numpy.array
        The scaled psf expressed as a numpy.array.
    annulus_width: float
        The width of the annulus of interest in pixels.
    ncomp: int or None
        The number of principal components for PCA-based algorithms.
    fwhm : float
        The FHWM in pixels.
    aperture_radius: float
        The radius of the circular aperture in terms of the FWHM.
    initial_state: numpy.array
        The initial guess for the position and the flux of the planet.
    cube_ref : 3d or 4d numpy ndarray, or list of 3d ndarray, optional
        Reference library cube for Reference Star Differential Imaging. Should
        be 3d, except if the input cube is 4d, in which case it can either be a
        4d array or a list of 3d ndarray (reference cube for each wavelength).
    svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional
        Switch for different ways of computing the SVD and selected PCs.
    scaling : {None, "temp-mean", spat-mean", "temp-standard",
        "spat-standard"}, None or str optional
        Pixel-wise scaling mode using ``sklearn.preprocessing.scale``
        function. If set to None, the input matrix is left untouched. Otherwise:

        * ``temp-mean``: temporal px-wise mean is subtracted.

        * ``spat-mean``: spatial mean is subtracted.

        * ``temp-standard``: temporal mean centering plus scaling pixel values
          to unit variance (temporally).

        * ``spat-standard``: spatial mean centering plus scaling pixel values
          to unit variance (spatially).

        DISCLAIMER: Using ``temp-mean`` or ``temp-standard`` scaling can improve
        the speckle subtraction for ASDI or (A)RDI reductions. Nonetheless, this
        involves a sort of c-ADI preprocessing, which (i) can be dangerous for
        datasets with low amount of rotation (strong self-subtraction), and (ii)
        should probably be referred to as ARDI (i.e. not RDI stricto sensu).
    algo: vip function, optional {pca_annulus, pca_annular}
        Post-processing algorithm used.
    delta_rot: float, optional
        If algo is set to pca_annular, delta_rot is the angular threshold used
        to select frames in the PCA library (see description of pca_annular).
    fmerit : {'sum', 'stddev', 'hessian'}, string optional
        If mu_sigma is not provided nor set to True, this parameter determines
        which figure of merit to be used:

            * ``sum``: minimizes the sum of absolute residual intensities in the
            aperture defined with `initial_state` and `aperture_radius`. More
            details in [WER17]_.

            * ``stddev``: minimizes the standard deviation of residual
            intensities in the aperture defined with `initial_state` and
            `aperture_radius`. More details in [WER17]_.

            * ``hessian``: minimizes the sum of absolute values of the
            determinant of the Hessian matrix calculated for each of the 4
            pixels encompassing the first guess location defined with
            `initial_state`. More details in [QUA15]_.

        From experience: ``sum`` is more robust for high SNR companions (but
        rather consider setting mu_sigma=True), while ``stddev`` tend to be more
        reliable in presence of strong residual speckle noise. ``hessian`` is
        expected to be more reliable in presence of extended signals around the
        companion location.
    imlib : str, optional
        See the documentation of the ``vip_hci.preproc.frame_shift`` function.
    interpolation : str, optional
        See the documentation of the ``vip_hci.preproc.frame_shift`` function.
    collapse : {'median', 'mean', 'sum', 'trimmean', 'wmean'}, str, optional
        Sets the way of collapsing the frames for producing a final image. If
        None then the cube of residuals is used when measuring the function of
        merit (instead of a single final frame).
    algo_options: dict, opt
        Dictionary with additional parameters related to the algorithm
        (e.g. tol, min_frames_lib, max_frames_lib). If 'algo' is not a vip
        routine, this dict should contain all necessary arguments apart from
        the cube and derotation angles. Note: arguments such as ncomp, svd_mode,
        scaling, imlib, interpolation or collapse can also be included in this
        dict (the latter are also kept as function arguments for consistency
        with older versions of vip).
    weights : 1d array, optional
        If provided, the negative fake companion fluxes will be scaled according
        to these weights before injection in the cube. Can reflect changes in
        the observing conditions throughout the sequence.
    transmission: numpy array, optional
        Radial transmission of the coronagraph, if any. Array with either
        2 x n_rad, 1+n_ch x n_rad columns. The first column should contain the
        radial separation in pixels, while the other column(s) are the
        corresponding off-axis transmission (between 0 and 1), for either all,
        or each spectral channel (only relevant for a 4D input cube).
    mu_sigma: tuple of 2 floats or None, opt
        If set to None: not used, and falls back to original version of the
        algorithm, using fmerit. Otherwise, should be a tuple of 2 elements,
        containing the mean and standard deviation of pixel intensities in an
        annulus centered on the location of the companion, excluding the area
        directly adjacent to the companion.
    sigma: str, opt
        Sets the type of noise to be included as sigma^2 in the log-probability
        expression. Choice between 'pho' for photon (Poisson) noise, 'spe' for
        residual (mostly whitened) speckle noise, or 'spe+pho' for both.
    force_rPA: bool, optional
        Whether to only search for optimal flux, provided (r,PA).
    debug: boolean
        If True, the cube is returned along with the likelihood log-function.

    Returns
    -------
    out: float
        The log of the likelihood.

    """
    # set imlib for rotation and shift
    if imlib == 'opencv':
        imlib_rot = imlib
        imlib_sh = imlib
    elif imlib == 'skimage' or imlib == 'ndimage-interp':
        imlib_rot = 'skimage'
        imlib_sh = 'ndimage-interp'
    elif imlib == 'vip-fft' or imlib == 'ndimage-fourier':
        imlib_rot = 'vip-fft'
        imlib_sh = 'ndimage-fourier'
    else:
        raise TypeError("Interpolation not recognized.")

    if force_rPA:
        r0 = initial_state[0]
        theta0 = initial_state[1]
        if len(param) > 1:
            flux = -np.array(param)
        else:
            flux = -param[0]
    else:
        r0 = param[0]
        theta0 = param[1]
        if len(param) > 3:
            flux = -np.array(param[2:])
        else:
            flux = -param[2]

    # Apply weights if any
    # if weights is None:
    norm_weights = None  # weights
    if weights is not None:
        #norm_weights = weights/np.sum(weights)
        if np.isscalar(flux):
            flux = flux*weights
        else:
            flux = np.outer(flux, weights)

    # Create the cube with the negative fake companion injected
    cube_negfc = cube_inject_companions(cube, psf_norm, angs, flevel=flux,
                                        rad_dists=[r0], n_branches=1,
                                        theta=theta0, imlib=imlib_sh,
                                        interpolation=interpolation,
                                        transmission=transmission,
                                        verbose=False)
    # Perform PCA and extract the zone of interest
    values = get_values_optimize(cube_negfc, angs, ncomp, annulus_width,
                                 aperture_radius, fwhm, initial_state[0],
                                 initial_state[1], cube_ref=cube_ref,
                                 svd_mode=svd_mode, scaling=scaling,
                                 algo=algo, delta_rot=delta_rot, imlib=imlib_rot,
                                 interpolation=interpolation, collapse=collapse,
                                 algo_options=algo_options,
                                 weights=norm_weights)

    if isinstance(mu_sigma, tuple):
        mu = mu_sigma[0]
        sigma2 = mu_sigma[1]**2
        num = np.power(mu-values, 2)
        denom = 0
        if 'spe' in sigma:
            denom += sigma2
        if 'pho' in sigma:
            denom += np.abs(values-mu)
        lnlikelihood = -0.5 * np.sum(num/denom)
    else:
        mu = mu_sigma
        # old version - delete?
        if fmerit == 'sum':
            lnlikelihood = -0.5 * np.sum(np.abs(values-mu))
        elif fmerit == 'stddev':
            values = values[values != 0]
            lnlikelihood = -np.std(values, ddof=1)*values.size
        else:
            raise RuntimeError('fmerit choice not recognized.')

    if debug:
        return lnlikelihood, cube_negfc
    else:
        return lnlikelihood


def lnprob(param, bounds, cube, angs, psf_norm, fwhm, annulus_width, ncomp,
           aperture_radius, initial_state, cube_ref=None, svd_mode='lapack',
           scaling=None, algo=pca_annulus, delta_rot=1, fmerit='sum',
           imlib='vip-fft', interpolation='lanczos4', collapse='median',
           algo_options={}, weights=None, transmission=None, mu_sigma=True,
           sigma='spe+pho', force_rPA=False, display=False):
    """Define the log-probability function as the sum between the prior and\
    log-likelihood funtions.

    Parameters
    ----------
    param: tuple
        The model parameters.
    bounds: list
        The bounds for each model parameter.
        Ex: bounds = [(10,20),(0,360),(0,5000)]
    cube: 3d or 4d numpy ndarray
        Input ADI or ADI+IFS cube.
    angs: numpy.array
        The parallactic angle fits image expressed as a numpy.array.
    psfn: numpy 2D or 3D array
        Normalised PSF template used for negative fake companion injection.
        The PSF must be centered and the flux in a 1xFWHM aperture must equal 1
        (use ``vip_hci.metrics.normalize_psf``).
        If the input cube is 3D and a 3D array is provided, the first dimension
        must match for both cubes. This can be useful if the star was
        unsaturated and conditions were variable.
        If the input cube is 4D, psfn must be either 3D or 4D. In either cases,
        the first dimension(s) must match those of the input cube.
    fwhm : float
        The FHWM in pixels.
    annulus_width: float
        The width in pixel of the annulus on wich the PCA is performed.
    ncomp: int or None
        The number of principal components for PCA-based algorithms.
    aperture_radius: float
        The radius of the circular aperture in FWHM.
    initial_state: numpy.array
        The initial guess for the position and the flux of the planet.
    cube_ref : 3d or 4d numpy ndarray, or list of 3d ndarray, optional
        Reference library cube for Reference Star Differential Imaging. Should
        be 3d, except if the input cube is 4d, in which case it can either be a
        4d array or a list of 3d ndarray (reference cube for each wavelength).
    svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional
        Switch for different ways of computing the SVD and selected PCs.
    scaling : {None, "temp-mean", spat-mean", "temp-standard",
        "spat-standard"}, None or str optional
        Pixel-wise scaling mode using ``sklearn.preprocessing.scale``
        function. If set to None, the input matrix is left untouched. Otherwise:

        * ``temp-mean``: temporal px-wise mean is subtracted.

        * ``spat-mean``: spatial mean is subtracted.

        * ``temp-standard``: temporal mean centering plus scaling pixel values
          to unit variance (temporally).

        * ``spat-standard``: spatial mean centering plus scaling pixel values
          to unit variance (spatially).

        DISCLAIMER: Using ``temp-mean`` or ``temp-standard`` scaling can improve
        the speckle subtraction for ASDI or (A)RDI reductions. Nonetheless, this
        involves a sort of c-ADI preprocessing, which (i) can be dangerous for
        datasets with low amount of rotation (strong self-subtraction), and (ii)
        should probably be referred to as ARDI (i.e. not RDI stricto sensu).
    fmerit : {'sum', 'stddev', 'hessian'}, string optional
        If mu_sigma is not provided nor set to True, this parameter determines
        which figure of merit to be used:

            * ``sum``: minimizes the sum of absolute residual intensities in the
            aperture defined with `initial_state` and `aperture_radius`. More
            details in [WER17]_.

            * ``stddev``: minimizes the standard deviation of residual
            intensities in the aperture defined with `initial_state` and
            `aperture_radius`. More details in [WER17]_.

            * ``hessian``: minimizes the sum of absolute values of the
            determinant of the Hessian matrix calculated for each of the 4
            pixels encompassing the first guess location defined with
            `initial_state`. More details in [QUA15]_.

        From experience: ``sum`` is more robust for high SNR companions (but
        rather consider setting mu_sigma=True), while ``stddev`` tend to be more
        reliable in presence of strong residual speckle noise. ``hessian`` is
        expected to be more reliable in presence of extended signals around the
        companion location.
    imlib : str, optional
        See the documentation of the ``vip_hci.preproc.frame_rotate`` function.
    interpolation : str, optional
        See the documentation of the ``vip_hci.preproc.frame_rotate`` function.
    algo_options, : dict, opt
        Dictionary with additional parameters related to the algorithm
        (e.g. tol, min_frames_lib, max_frames_lib). If 'algo' is not a vip
        routine, this dict should contain all necessary arguments apart from
        the cube and derotation angles. Note: arguments such as ncomp, svd_mode,
        scaling, imlib, interpolation or collapse can also be included in this
        dict (the latter are also kept as function arguments for consistency
        with older versions of vip).
    collapse : {'median', 'mean', 'sum', 'trimmean', 'wmean'}, str, optional
        Sets the way of collapsing the frames for producing a final image. If
        None then the cube of residuals is used when measuring the function of
        merit (instead of a single final frame).
    weights : 1d array, optional
        If provided, the negative fake companion fluxes will be scaled according
        to these weights before injection in the cube. Can reflect changes in
        the observing conditions throughout the sequence.
    transmission: numpy array, optional
        Radial transmission of the coronagraph, if any. Array with either
        2 x n_rad, 1+n_ch x n_rad columns. The first column should contain the
        radial separation in pixels, while the other column(s) are the
        corresponding off-axis transmission (between 0 and 1), for either all,
        or each spectral channel (only relevant for a 4D input cube).
    mu_sigma: tuple of 2 floats or None, opt
        If set to None: not used, and falls back to original version of the
        algorithm, using fmerit. Otherwise, should be a tuple of 2 elements,
        containing the mean and standard deviation of pixel intensities in an
        annulus centered on the location of the companion, excluding the area
        directly adjacent to the companion.
    sigma: str, opt
        Sets the type of noise to be included as sigma^2 in the log-probability
        expression. Choice between 'pho' for photon (Poisson) noise, 'spe' for
        residual (mostly whitened) speckle noise, or 'spe+pho' for both.
    force_rPA: bool, optional
        Whether to only search for optimal flux, provided (r,PA).
    display: boolean
        If True, the cube is displayed with ds9.

    Returns
    -------
    out: float
        The probability log-function.

    """
    if initial_state is None:
        initial_state = param

    lp = lnprior(param, bounds, force_rPA)

    if np.isinf(lp):
        return -np.inf

    return lp + lnlike(param, cube, angs, psf_norm, fwhm, annulus_width,
                       ncomp, aperture_radius, initial_state, cube_ref,
                       svd_mode, scaling, algo, delta_rot, fmerit, imlib,
                       interpolation, collapse, algo_options, weights,
                       transmission, mu_sigma, sigma, force_rPA)


[docs] def mcmc_negfc_sampling(cube, angs, psfn, initial_state, algo=pca_annulus, ncomp=1, annulus_width=8, aperture_radius=1, fwhm=4, mu_sigma=True, sigma='spe+pho', force_rPA=False, fmerit='sum', cube_ref=None, svd_mode='lapack', scaling=None, delta_rot=1, imlib='vip-fft', interpolation='lanczos4', collapse='median', algo_options={}, wedge=None, weights=None, transmission=None, nwalkers=100, bounds=None, a=2.0, burnin=0.3, rhat_threshold=1.01, rhat_count_threshold=1, niteration_min=10, niteration_limit=10000, niteration_supp=0, check_maxgap=20, conv_test='ac', ac_c=50, ac_count_thr=3, nproc=1, output_dir='results/', output_file=None, display=False, verbosity=0, save=False): r"""Run an affine invariant mcmc sampling algorithm to determine position and flux of a companion using the 'Negative Fake Companion' technique. The result of this procedure is a chain with the samples from the posterior distributions of each of the 3 parameters. This technique can be summarized as follows: 1) We inject a negative fake companion (one candidate) at a given position and characterized by a given flux, both close to the expected values. 2) We run PCA on an full annulus which pass through the initial guess, regardless of the position of the candidate. 3) We extract the intensity values of all the pixels contained in a circular aperture centered on the initial guess. 4) We calculate a function of merit :math:`\chi^2` (see below). The steps 1) to 4) are then looped. At each iteration, the candidate model parameters are defined by the ``emcee`` Affine Invariant algorithm [FOR13]_. There are different possibilities for the figure of merit (step 4): - ``mu_sigma=None``; ``fmerit='sum'`` (as in [WER17]_):\ :math:`\chi^2 = \sum(\|I_j\|)` - ``mu_sigma=None``; ``fmerit='stddev'`` (likely more appropriate than 'sum' when residual speckle noise is still significant):\ :math:`\chi^2 = N \sigma_{I_j}(values,ddof=1)*values.size` - ``mu_sigma=True`` or a tuple (as in [CHR21]_, new default):\ :math:`\chi^2 = \sum\frac{(I_j- mu)^2}{\sigma^2}` where :math:`j \in {1,...,N}` with N the total number of pixels contained in the circular aperture, :math:`\sigma_{I_j}` is the standard deviation of :math:`I_j` values, and :math:`\mu` is the mean pixel intensity in a truncated annulus at the radius of the companion candidate (i.e. excluding the cc region). See description of ``mu_sigma`` and ``sigma`` for more details on :math:`\sigma` considered in the last equation. Speed tricks: - crop your input cube to a size such as to just include the annulus on which the PCA is performed; - set ``imlib='opencv'`` (much faster image rotation, at the expense of flux conservation); - increase ``nproc`` (if your machine allows); - reduce ``ac_c`` (or increase ``rhat_threshold`` if ``conv_test='gb'``) for a faster convergence); - reduce ``niteration_limit`` to force the sampler to stop even if it has not reached convergence. Parameters ---------- cube: 3d or 4d numpy ndarray Input ADI or ADI+IFS cube. angs: numpy.array The parallactic angle vector. psfn: numpy 2D or 3D array Normalised PSF template used for negative fake companion injection. The PSF must be centered and the flux in a 1xFWHM aperture must equal 1 (use ``vip_hci.metrics.normalize_psf``). If the input cube is 3D and a 3D array is provided, the first dimension must match for both cubes. This can be useful if the star was unsaturated and conditions were variable. If the input cube is 4D, psfn must be either 3D or 4D. In either cases, the first dimension(s) must match those of the input cube. initial_state: tuple or 1d numpy ndarray The first guess for the position and flux(es) of the planet, in that order. In the case of a 3D input cube, it should have a length of 3, while in the case of a 4D input cube, the length should be 2 + number of spectral channels (one flux estimate per wavelength). Each walker will start in a small ball around this preferred position. algo : python routine, optional Post-processing algorithm used to model and subtract the star. First 2 arguments must be input cube and derotation angles. Must return a post-processed 2d frame. ncomp: int or None, optional The number of principal components for PCA-based algorithms. annulus_width: float, optional The width in pixels of the annulus on which the PCA is performed. aperture_radius: float, optional The radius in FWHM of the circular aperture. fwhm : float The FHWM in pixels. mu_sigma: tuple of 2 floats or bool, opt If set to None: not used, and falls back to original version of the algorithm, using ``fmerit`` [WER17]_. If a tuple of 2 elements: should be the mean and standard deviation of pixel intensities in an annulus centered on the location of the companion candidate, excluding the area directly adjacent to the CC. If set to anything else, but None/False/tuple: will compute said mean and standard deviation automatically. These values will then be used in the log-probability of the MCMC. sigma: str, opt Sets the type of noise to be included as sigma^2 in the log-probability expression. Choice between 'pho' for photon (Poisson) noise, 'spe' for residual (mostly whitened) speckle noise, or 'spe+pho' for both. force_rPA: bool, optional Whether to only search for optimal flux, provided (r,PA). fmerit : {'sum', 'stddev', 'hessian'}, string optional If mu_sigma is not provided nor set to True, this parameter determines which figure of merit to be used: * ``sum``: minimizes the sum of absolute residual intensities in the aperture defined with `initial_state` and `aperture_radius`. More details in [WER17]_. * ``stddev``: minimizes the standard deviation of residual intensities in the aperture defined with `initial_state` and `aperture_radius`. More details in [WER17]_. * ``hessian``: minimizes the sum of absolute values of the determinant of the Hessian matrix calculated for each of the 4 pixels encompassing the first guess location defined with `initial_state`. More details in [QUA15]_. From experience: ``sum`` is more robust for high SNR companions (but rather consider setting mu_sigma=True), while ``stddev`` tend to be more reliable in presence of strong residual speckle noise. ``hessian`` is expected to be more reliable in presence of extended signals around the companion location. cube_ref : 3d or 4d numpy ndarray, or list of 3d ndarray, optional Reference library cube for Reference Star Differential Imaging. Should be 3d, except if the input cube is 4d, in which case it can either be a 4d array or a list of 3d ndarray (reference cube for each wavelength). svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional Switch for different ways of computing the SVD and selected PCs. 'randsvd' is not recommended for the negative fake companion technique. scaling : {None, "temp-mean", spat-mean", "temp-standard", "spat-standard"}, None or str optional Pixel-wise scaling mode using ``sklearn.preprocessing.scale`` function. If set to None, the input matrix is left untouched. Otherwise: * ``temp-mean``: temporal px-wise mean is subtracted. * ``spat-mean``: spatial mean is subtracted. * ``temp-standard``: temporal mean centering plus scaling pixel values to unit variance (temporally). * ``spat-standard``: spatial mean centering plus scaling pixel values to unit variance (spatially). DISCLAIMER: Using ``temp-mean`` or ``temp-standard`` scaling can improve the speckle subtraction for ASDI or (A)RDI reductions. Nonetheless, this involves a sort of c-ADI preprocessing, which (i) can be dangerous for datasets with low amount of rotation (strong self-subtraction), and (ii) should probably be referred to as ARDI (i.e. not RDI stricto sensu). delta_rot: float, optional If algo is set to pca_annular, delta_rot is the angular threshold used to select frames in the PCA library (see description of pca_annular). Increases processing time. imlib : str, optional Imlib used for both image rotation and sub-px shift: - "opencv": will use it for both; - "skimage" or "ndimage-interp" will use scikit-image and scipy.ndimage for rotation and shift resp.; - "ndimage-fourier" or "vip-fft" will use Fourier transform based methods for both. interpolation : str, optional Interpolation order. See the documentation of the ``vip_hci.preproc.frame_rotate`` function. Note that the interpolation options are identical for rotation and shift within each of the 3 imlib cases above. collapse : {'median', 'mean', 'sum', 'trimmean', 'wmean'}, str, optional Sets the way of collapsing the frames for producing a final image. If None then the cube of residuals is used when measuring the function of merit (instead of a single final frame). algo_options: dict, opt Dictionary with additional parameters related to the algorithm (e.g. tol, min_frames_lib, max_frames_lib). If 'algo' is not a vip routine, this dict should contain all necessary arguments apart from the cube and derotation angles. Note: arguments such as ncomp, svd_mode, scaling, imlib, interpolation or collapse can also be included in this dict (the latter are also kept as function arguments for consistency with older versions of vip). wedge: tuple, opt Range in theta where the mean and standard deviation are computed in an annulus defined in the PCA image. If None, it will be calculated automatically based on initial guess and derotation angles to avoid negative ADI side lobes. If some disc signal is present elsewhere in the annulus, it is recommended to provide wedge manually. The provided range should be continuous and >0. E.g. provide (270, 370) to consider a PA range between [-90,+10]. weights : 1d array, optional If provided, the negative fake companion fluxes will be scaled according to these weights before injection in the cube. Can reflect changes in the observing conditions throughout the sequence. transmission: numpy array, optional Radial transmission of the coronagraph, if any. Array with either 2 x n_rad, 1+n_ch x n_rad columns. The first column should contain the radial separation in pixels, while the other column(s) are the corresponding off-axis transmission (between 0 and 1), for either all, or each spectral channel (only relevant for a 4D input cube). nwalkers: int optional The number of [GOO10]_ 'walkers'. bounds: numpy.array or list, default=None, optional The prior knowledge on the model parameters. If None, large bounds will be automatically estimated from the initial state. a: float, default=2.0 The proposal scale parameter. See Note. burnin: float, default=0.3 The fraction of a walker chain which is discarded. Note: only used for Gelman-Rubin convergence test - the chains are returned full. rhat_threshold: float, default=0.01 The Gelman-Rubin threshold used for the test for nonconvergence. rhat_count_threshold: int, optional The Gelman-Rubin test must be satisfied 'rhat_count_threshold' times in a row before claiming that the chain has converged. conv_test: str, optional {'gb','ac'} Method to check for convergence: - 'gb' for gelman-rubin test (http://digitalassets.lib.berkeley.edu/sdtr/ucb/text/305.pdf) - 'ac' for autocorrelation analysis (https://emcee.readthedocs.io/en/stable/tutorials/autocorr/) ac_c: float, optional If the convergence test is made using the auto-correlation, this is the value of C such that tau/N < 1/C is the condition required for tau to be considered a reliable auto-correlation time estimate (for N number of samples). Recommended: C>50. More details here: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ ac_count_thr: int, optional The auto-correlation test must be satisfied ac_count_thr times in a row before claiming that the chain has converged. niteration_min: int, optional Steps per walker lower bound. The simulation will run at least this number of steps per walker. niteration_limit: int, optional Steps per walker upper bound. If the simulation runs up to 'niteration_limit' steps without having reached the convergence criterion, the run is stopped. niteration_supp: int, optional Number of iterations to run after having "reached the convergence". check_maxgap: int, optional Maximum number of steps per walker between two Gelman-Rubin test. nproc: int or None, optional The number of processes to use for parallelization. If None, will be set automatically to half the number of CPUs available. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. output_file: str, optional The name of the output file which contains the MCMC results in the case ``save`` is True. display: bool, optional If True, the walk plot is displayed at each evaluation of the Gelman- Rubin test. verbosity: 0, 1, 2 or 3, optional Verbosity level. 0 for no output and 3 for full information. (only difference between 2 and 3 is that 3 also writes intermediate pickles containing the state of the chain at convergence tests; these can end up taking a lot of space). save: bool, optional If True, the MCMC results are pickled. Returns ------- out : numpy.array The MCMC chain. Note ---- The parameter ``a`` must be > 1. For more theoretical information concerning this parameter, see [GOO10]_. The parameter ``rhat_threshold`` can be a numpy.array with individual threshold value for each model parameter. """ if verbosity > 0: start_time = time_ini() print(" MCMC sampler for the NEGFC technique ") print(sep) # If required, one create the output folder. if save: output_file_tmp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if output_dir[-1] == '/': output_dir = output_dir[:-1] try: os.makedirs(output_dir) except OSError as exc: if exc.errno == 17 and os.path.isdir(output_dir): # errno.EEXIST == 17 -> File exists pass else: raise if cube.ndim != 4 and cube.ndim != 3: raise ValueError('`cube` must be a 3D or 4D numpy array') if cube_ref is not None: if cube.ndim == 3: if cube_ref.ndim != 3: raise ValueError('`cube_ref` must be a 3D numpy array') elif cube.ndim == 4: if cube_ref[0].ndim != 3: msg = '`cube_ref` must be a 4D array or a list of 3D arrays' raise ValueError(msg) if weights is not None: if not len(weights) == cube.shape[-3]: msg = "Weights should have same length as temporal cube axis" raise TypeError(msg) norm_weights = None # weights/np.sum(weights) else: norm_weights = weights if psfn.ndim > 2: if psfn.shape[0] != cube.shape[0]: msg = "If PSF is 3D or 4D, first dimension should match that of " msg += "input cube" raise TypeError(msg) if 'spe' not in sigma and 'pho' not in sigma: raise ValueError("sigma not recognized") # set imlib for rotation and shift if imlib == 'opencv': imlib_rot = imlib elif imlib == 'skimage' or imlib == 'ndimage-interp': imlib_rot = 'skimage' elif imlib == 'vip-fft' or imlib == 'ndimage-fourier': imlib_rot = 'vip-fft' else: raise TypeError("Interpolation not recognized.") if nproc is None: nproc = cpu_count() // 2 # Hyper-threading doubles the # of cores # ######################################################################### # Initialization of the variables # ######################################################################### dim = len(initial_state) if force_rPA: dim -= 2 itermin = niteration_min limit = niteration_limit supp = niteration_supp maxgap = check_maxgap initial_state = np.array(initial_state) if initial_state[1] == 0: initial_state[1] = 360 # for appropriate scaling of initial ball mu_sig = get_mu_and_sigma(cube, angs, ncomp, annulus_width, aperture_radius, fwhm, initial_state[0], initial_state[1], cube_ref=cube_ref, wedge=wedge, svd_mode=svd_mode, scaling=scaling, algo=algo, delta_rot=delta_rot, imlib=imlib_rot, interpolation=interpolation, collapse=collapse, weights=norm_weights, algo_options=algo_options) # Measure mu and sigma once in the annulus (instead of each MCMC step) if isinstance(mu_sigma, tuple): if len(mu_sigma) != 2: raise TypeError("if a tuple, mu_sigma should have 2 elements") elif mu_sigma: mu_sigma = mu_sig if verbosity > 0: msg = "The mean and stddev in the annulus at the radius of the " msg += "companion (excluding the PA area directly adjacent to it)" msg += " are {:.2f} and {:.2f} respectively." print(msg.format(mu_sigma[0], mu_sigma[1])) else: mu_sigma = mu_sig[0] # just take mean if itermin > limit: itermin = 0 fraction = 0.3 geom = 0 lastcheck = 0 konvergence = np.inf rhat_count = 0 ac_count = 0 chain = np.empty([nwalkers, 1, dim]) nIterations = limit + supp rhat = np.zeros(dim) stop = np.inf if bounds is None: bounds = [] d0 = 0 if not force_rPA: # angle subtended by aperture_radius/2 or fwhm at r=initial_state[0] dr = min(annulus_width/2, aperture_radius*fwhm/2) dth = 360./(2*np.pi*initial_state[0]/(aperture_radius*fwhm/2)) bounds = [(initial_state[0] - dr, initial_state[0] + dr), # radius (initial_state[1] - dth, initial_state[1] + dth)] # angle d0 = 2 for i in range(dim-d0): bounds.append((0, 5 * initial_state[d0+i])) # flux # size of ball of parameters for MCMC initialization scal = abs(bounds[0][0]-initial_state[0])/initial_state[0] for i in range(dim): for j in range(2): test_scal = abs(bounds[i][j]-initial_state[i])/initial_state[i] if test_scal < scal: scal = test_scal if force_rPA: init = initial_state[2:] if len(init) > 1: labels = [] for i in range(len(init)): labels.append(r"$f{:.0f}$".format(i)) else: labels = [r"$f$"] else: init = initial_state if len(init) > 3: labels = ["$r$", r"$\theta$"] for i in range(len(init)-2): labels.append(r"$f{:.0f}$".format(i)) else: labels = ["$r$", r"$\theta$", "$f$"] pos = init*(1+np.random.normal(0, scal/50., (nwalkers, dim))) # divided by 7 to not have any walker initialized out of bounds if verbosity > 0: print('Beginning emcee Ensemble sampler...') # deactivate multithreading os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" sampler = emcee.EnsembleSampler(nwalkers, dim, lnprob, a=a, args=([bounds, cube, angs, psfn, fwhm, annulus_width, ncomp, aperture_radius, initial_state, cube_ref, svd_mode, scaling, algo, delta_rot, fmerit, imlib, interpolation, collapse, algo_options, weights, transmission, mu_sigma, sigma, force_rPA]), threads=nproc) if verbosity > 0: print('emcee Ensemble sampler successful') start = datetime.datetime.now() # ######################################################################### # Affine Invariant MCMC run # ######################################################################### if verbosity > 1: print('\nStart of the MCMC run ...') print('Step | Duration/step (sec) | Remaining Estimated Time (sec)') for k, res in enumerate(sampler.sample(pos, iterations=nIterations)): elapsed = (datetime.datetime.now()-start).total_seconds() if verbosity > 1: if k == 0: q = 0.5 else: q = 1 print('{}\t\t{:.5f}\t\t\t{:.5f}'.format(k, elapsed * q, elapsed * (limit-k-1) * q), flush=True) start = datetime.datetime.now() # --------------------------------------------------------------------- # Store the state manually in order to handle with dynamical sized chain # --------------------------------------------------------------------- # Check if the size of the chain is long enough. s = chain.shape[1] if k+1 > s: # if not, one doubles the chain length empty = np.zeros([nwalkers, 2*s, dim]) chain = np.concatenate((chain, empty), axis=1) # Store the state of the chain chain[:, k] = res[0] # --------------------------------------------------------------------- # If k meets the criterion, one tests the non-convergence. # --------------------------------------------------------------------- criterion = int(np.amin([np.ceil(itermin*(1+fraction)**geom), lastcheck+np.floor(maxgap)])) if k == criterion: if verbosity > 1: print('\n {} convergence test in progress...'.format(conv_test)) geom += 1 lastcheck = k if display: show_walk_plot(chain, labels=labels) if save and verbosity == 3: fname = '{d}/{f}_temp_k{k}'.format(d=output_dir, f=output_file_tmp, k=k) data = {'chain': sampler.chain, 'lnprob': sampler.lnprobability, 'AR': sampler.acceptance_fraction} with open(fname, 'wb') as fileSave: pickle.dump(data, fileSave) # We only test the rhat if we have reached the min # of steps if (k+1) >= itermin and konvergence == np.inf: if conv_test == 'gb': thr0 = int(np.floor(burnin*k)) thr1 = int(np.floor((1-burnin)*k*0.25)) # We calculate the rhat for each model parameter. for j in range(dim): part1 = chain[:, thr0:thr0 + thr1, j].reshape(-1) part2 = chain[:, thr0 + 3 * thr1:thr0 + 4 * thr1, j ].reshape(-1) series = np.vstack((part1, part2)) rhat[j] = gelman_rubin(series) if verbosity > 0: print(' r_hat = {}'.format(rhat)) cond = rhat <= rhat_threshold print(' r_hat <= threshold = {} \n'.format(cond)) # We test the rhat. if (rhat <= rhat_threshold).all(): rhat_count += 1 if rhat_count < rhat_count_threshold: if verbosity > 0: msg = "Gelman-Rubin test OK {}/{}" print(msg.format(rhat_count, rhat_count_threshold)) elif rhat_count >= rhat_count_threshold: if verbosity > 0: print('... ==> convergence reached') konvergence = k stop = konvergence + supp else: rhat_count = 0 elif conv_test == 'ac': # We calculate the auto-corr test for each model parameter. if save: chain_name = "TMP_test_chain{:.0f}.fits".format(k) write_fits(output_dir+'/'+chain_name, chain[:, :k]) for j in range(dim): rhat[j] = autocorr_test(chain[:, :k, j]) thr = 1./ac_c if verbosity > 0: print('Auto-corr tau/N = {}'.format(rhat)) print('tau/N <= {} = {} \n'.format(thr, rhat < thr)) if (rhat <= thr).all(): ac_count += 1 if verbosity > 0: msg = "Auto-correlation test passed for all params!" msg += "{}/{}".format(ac_count, ac_count_thr) print(msg) if ac_count >= ac_count_thr: msg = '\n ... ==> convergence reached' print(msg) stop = k else: ac_count = 0 else: raise ValueError('conv_test value not recognized') # append the autocorrelation factor to file for easy reading if save: with open(output_dir + '/MCMC_results_tau.txt', 'a') as f: f.write(str(rhat) + '\n') # We have reached the maximum number of steps for our Markov chain. if k+1 >= stop: if verbosity > 0: print('We break the loop because we have reached convergence') break if k == nIterations-1: if verbosity > 0: print("We have reached the limit # of steps without convergence") if save: frame = inspect.currentframe() args, _, _, values = inspect.getargvalues(frame) input_parameters = {j: values[j] for j in args[1:]} output = {'chain': chain_zero_truncated(chain), 'input_parameters': input_parameters, 'AR': sampler.acceptance_fraction, 'lnprobability': sampler.lnprobability} if output_file is None: output_file = 'MCMC_results' with open(output_dir+'/'+output_file, 'wb') as fileSave: pickle.dump(output, fileSave) msg = "\nThe file MCMC_results has been stored in the folder {}" print(msg.format(output_dir+'/')) if verbosity > 0: timing(start_time) # reactivate multithreading ncpus = cpu_count() os.environ["MKL_NUM_THREADS"] = str(ncpus) os.environ["NUMEXPR_NUM_THREADS"] = str(ncpus) os.environ["OMP_NUM_THREADS"] = str(ncpus) return chain_zero_truncated(chain)
[docs] def chain_zero_truncated(chain): """ Return the Markov chain with the dimension: walkers x steps* x parameters,\ where steps* is the last step before having 0 (not yet constructed chain). Parameters ---------- chain: numpy.array The MCMC chain. Returns ------- out: numpy.array The truncated MCMC chain, that is to say, the chain which only contains relevant information. """ try: idxzero = np.where(chain[0, :, 0] == 0.0)[0][0] except BaseException: idxzero = chain.shape[1] return chain[:, 0:idxzero, :]
[docs] def show_walk_plot(chain, save=False, output_dir='', **kwargs): """ Display or save figure showing the path of each walker during the MCMC run. Parameters ---------- chain: numpy.array The Markov chain. The shape of chain must be nwalkers x length x dim. If a part of the chain is filled with zero values, the method will discard these steps. save: boolean, default: False If True, a pdf file is created. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. kwargs: Additional attributes are passed to the matplotlib plot method. Returns ------- Display the figure or create a pdf file named walk_plot.pdf in the working directory. """ temp = np.where(chain[0, :, 0] == 0.0)[0] if len(temp) != 0: chain = chain[:, :temp[0], :] labels = kwargs.pop('labels', ["$r$", r"$\theta$", "$f$"]) npar = len(labels) y = 2*npar if npar > 1: fig, axes = plt.subplots(npar, 1, sharex=True, figsize=kwargs.pop('figsize', (8, y))) axes[-1].set_xlabel(kwargs.pop('xlabel', 'step number')) axes[-1].set_xlim(kwargs.pop('xlim', [0, chain.shape[1]])) color = kwargs.pop('color', 'k') alpha = kwargs.pop('alpha', 0.4) for j in range(npar): axes[j].plot(chain[:, :, j].T, color=color, alpha=alpha, **kwargs) axes[j].yaxis.set_major_locator(MaxNLocator(5)) axes[j].set_ylabel(labels[j]) else: fig = plt.figure(figsize=kwargs.pop('figsize', (8, y))) plt.xlabel(kwargs.pop('xlabel', 'step number')) plt.xlim(kwargs.pop('xlim', [0, chain.shape[1]])) color = kwargs.pop('color', 'k') alpha = kwargs.pop('alpha', 0.4) plt.plot(chain[:, :, 0].T, color=color, alpha=alpha, **kwargs) plt.locator_params(axis='y', tight=True, nbins=5) plt.ylabel(labels[0]) fig.tight_layout(h_pad=0) if save: plt.savefig(output_dir+'walk_plot.pdf') plt.close(fig)
[docs] def show_corner_plot(chain, burnin=0.5, save=False, output_dir='', **kwargs): """ Display or save a figure showing the corner plot (pdfs + correlation plots). Parameters ---------- chain: numpy.array The Markov chain. The shape of chain must be nwalkers x length x dim. If a part of the chain is filled with zero values, the method will discard these steps. burnin: float, default: 0 The fraction of a walker chain we want to discard. save: boolean, default: False If True, a pdf file is created. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. kwargs: Additional attributes passed to the corner.corner() method. Returns ------- None (Displays the figure or create a pdf file named walk_plot.pdf in the working directory). Raises ------ ImportError """ if chain.shape[0] == 0: msg = "It seems the chain is empty. Have you already run the MCMC?" raise ValueError(msg) labels = kwargs.pop('labels', ["$r$", r"$\theta$", "$f$"]) try: npar = len(labels) temp = np.where(chain[0, :, 0] == 0.0)[0] if len(temp) != 0: chain = chain[:, :temp[0], :] length = chain.shape[1] indburn = int(np.floor(burnin*(length-1))) chain = chain[:, indburn:length, :].reshape((-1, npar)) except IndexError: pass fig = corner.corner(chain, labels=labels, **kwargs) if save: plt.savefig(output_dir+'corner_plot.pdf') plt.close(fig)
[docs] def confidence(isamples, cfd=68.27, bins=100, gaussian_fit=False, weights=None, verbose=True, save=False, output_dir='', force=False, output_file='confidence.txt', title=None, ndig=1, plsc=None, labels=['r', 'theta', 'f'], gt=None, **kwargs): r""" Determine the highly probable value for each model parameter, as well as the 1-sigma confidence interval. Parameters ---------- isamples: numpy.array The independent samples for each model parameter. cfd: float, optional The confidence level given in percentage. bins: int, optional The number of bins used to sample the posterior distributions. gaussian_fit: boolean, optional If True, a gaussian fit is performed in order to determine (:math:`\mu, \sigma`). weights : (n, ) numpy ndarray or None, optional An array of weights for each sample. verbose: boolean, optional Display information in the shell. save: boolean, optional If "True", a txt file with the results is saved in the output repository. output_dir: str, optional If save is True, this is the full path to a directory where the results are saved. force: bool, optional If set to True, force the confidence interval estimate even if too many samples fall in a single bin (unreliable CI estimates). If False, an error message is raised if the percentile of samples falling in a single bin is larger than cfd, suggesting to increase number of bins. output_file: str, optional If save is True, name of the text file in which the results are saved. title: bool or str, optional If not None, will print labels and parameter values on top of each plot. If a string, will print that label in front of the parameter values. ndig: int or list of int, optional If title is not None, this sets the number of significant digits to be used when printing the best estimate for each parameter. If int, the same number of digits is used for all parameters. If a list of int, the length should match the number of parameters. plsc: float, optional If save is True, this is used to convert pixels to arcsec when writing results for r. labels: list of strings, optional Labels to be used for both plots and dictionaries. Length should match the number of free parameters. gt: list or 1d numpy array, optional Ground truth values (when known). If provided, will also be plotted for reference. Returns ------- out: A 2 elements tuple with either: [gaussian_fit=False] i) the highly probable solutions (dictionary), ii) the respective confidence interval (dict.); [gaussian_fit=True] i) the center of the best-fit 1d Gaussian distributions (tuple of 3 floats), and ii) their standard deviation, for each parameter """ try: l = isamples.shape[1] if l == 1: isamples = isamples[:, 0] except BaseException: l = 1 if not l == len(labels): raise ValueError("Length of labels different to number of parameters") if gt is not None: if len(gt) != l: msg = "If provided, the length of ground truth values should match" msg += " number of parameters" raise TypeError(msg) if np.isscalar(ndig): ndig = [ndig]*l else: if len(ndig) != l: msg = "Length of ndig list different to number of parameters" raise ValueError(msg) pKey = labels label_file = labels confidenceInterval = {} val_max = {} if cfd == 100: cfd = 99.9 ####################################### # Determine the confidence interval # ####################################### if gaussian_fit: mu = np.zeros(l) sigma = np.zeros_like(mu) if gaussian_fit: nrows = 2*max(int(np.ceil(l/4)), 1) fig, ax = plt.subplots(nrows, min(4, l), figsize=(12, 4*nrows)) else: nrows = max(int(np.ceil(l/4)), 1) fig, ax = plt.subplots(nrows, min(4, l), figsize=(12, 4*nrows)) for j in range(l): if nrows > 1: if l > 1: ax0_tmp = ax[j//4][j % 4] if gaussian_fit: ax1_tmp = ax[nrows//2+j//4][j % 4] else: ax0_tmp = ax[j//4] if gaussian_fit: ax1_tmp = ax[nrows//2+j//4] elif l > 1 and not gaussian_fit: ax0_tmp = ax[j] else: ax0_tmp = ax if l > 1: if gaussian_fit: n, bin_vertices, _ = ax0_tmp.hist(isamples[:, j], bins=bins, weights=weights, histtype='step', edgecolor='gray') else: n, bin_vertices, _ = ax0_tmp.hist(isamples[:, j], bins=bins, weights=weights, histtype='step', edgecolor='gray') else: if gaussian_fit: n, bin_vertices, _ = ax0_tmp.hist(isamples[:], bins=bins, weights=weights, histtype='step', edgecolor='gray') else: n, bin_vertices, _ = ax0_tmp.hist(isamples[:], bins=bins, weights=weights, histtype='step', edgecolor='gray') bins_width = np.mean(np.diff(bin_vertices)) surface_total = np.sum(np.ones_like(n)*bins_width * n) n_arg_sort = np.argsort(n)[::-1] test = 0 pourcentage = 0 for k, jj in enumerate(n_arg_sort): test = test + bins_width*n[int(jj)] pourcentage = test/surface_total*100 if pourcentage > cfd: if verbose: msg = 'percentage for {}: {}%' print(msg.format(label_file[j], pourcentage)) break if k == 0: msg = "WARNING: Percentile reached in a single bin. " msg += "This may be due to outliers or a small sample." msg += "Uncertainties will be unreliable. Try one of these:" msg += "increase bins, or trim outliers, or decrease cfd." if force: raise ValueError(msg) else: print(msg) n_arg_min = int(n_arg_sort[:k+1].min()) n_arg_max = int(n_arg_sort[:k+1].max()) if n_arg_min == 0: n_arg_min += 1 if n_arg_max == bins: n_arg_max -= 1 val_max[pKey[j]] = bin_vertices[int(n_arg_sort[0])]+bins_width/2. confidenceInterval[pKey[j]] = np.array([bin_vertices[n_arg_min-1], bin_vertices[n_arg_max+1]] - val_max[pKey[j]]) if title is not None: if isinstance(title, str): lab = title else: lab = pKey[j] if l > 1: arg = (isamples[:, j] >= bin_vertices[n_arg_min - 1]) * \ (isamples[:, j] <= bin_vertices[n_arg_max + 1]) if gaussian_fit: ax0_tmp.hist(isamples[arg, j], bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax0_tmp.set_xlabel(labels[j]) if j == 0: ax0_tmp.set_ylabel('Counts') if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"${{{0}}}_{{{1}}}^{{+{2}}}$" tit = msg.format(fmt(val_max[pKey[j]]), fmt(confidenceInterval[pKey[j]][0]), fmt(confidenceInterval[pKey[j]][1])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax0_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) if gt is not None: x_close = find_nearest(bin_vertices, gt[j]) ax0_tmp.vlines(gt[j], 0, n[x_close], label='gt', linestyles='dashed', color='blue') label = 'estimate' else: label = None ax0_tmp.vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red', label=label) mu[j], sigma[j] = norm.fit(isamples[:, j]) n_fit, bins_fit = np.histogram(isamples[:, j], bins, density=1, weights=weights) ax1_tmp.hist(isamples[:, j], bins, density=1, weights=weights, facecolor='gray', edgecolor='darkgray', histtype='step') y = norm.pdf(bins_fit, mu[j], sigma[j]) ax1_tmp.plot(bins_fit, y, 'g-', linewidth=2, alpha=0.7) ax1_tmp.set_xlabel(labels[j]) if j == 0: ax1_tmp.set_ylabel('Counts') if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"$\mu$ = {0}, $\sigma$ = {1}" tit = msg.format(fmt(mu[j]), fmt(sigma[j])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax1_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) if gt is not None: x_close = find_nearest(bins_fit, gt[j]) ax1_tmp.vlines(gt[j], 0, y[x_close], linestyles='dashed', color='blue', label='gt') label = r'estimate ($\mu$)' else: label = None ax1_tmp.vlines(mu[j], 0, np.amax(y), linestyles='dashed', color='green', label=label) if gt is not None: ax0_tmp.legend() ax1_tmp.legend() else: ax0_tmp.hist(isamples[arg, j], bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax0_tmp.vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red') ax0_tmp.set_xlabel(labels[j]) if j == 0: ax0_tmp.set_ylabel('Counts') if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"${{{0}}}_{{{1}}}^{{+{2}}}$" tit = msg.format(fmt(val_max[pKey[j]]), fmt(confidenceInterval[pKey[j]][0]), fmt(confidenceInterval[pKey[j]][1])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax0_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) if gt is not None: x_close = find_nearest(bin_vertices, gt[j]) ax0_tmp.vlines(gt[j], 0, n[x_close], label='gt', linestyles='dashed', color='blue') label = 'estimate' else: label = None ax0_tmp.vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red', label=label) if gt is not None: ax0_tmp.legend() else: arg = (isamples[:] >= bin_vertices[n_arg_min - 1]) * \ (isamples[:] <= bin_vertices[n_arg_max + 1]) if gaussian_fit: ax0_tmp.hist(isamples[arg], bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax0_tmp.set_xlabel(labels[j]) if j == 0: ax0_tmp.set_ylabel('Counts') if gt is not None: x_close = find_nearest(bin_vertices, gt[j]) ax0_tmp.vlines(gt[j], 0, n[x_close], label='gt', linestyles='dashed', color='blue') label = 'estimate' else: label = None ax0_tmp.vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red', label=label) if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"${{{0}}}_{{{1}}}^{{+{2}}}$" tit = msg.format(fmt(val_max[pKey[j]]), fmt(confidenceInterval[pKey[j]][0]), fmt(confidenceInterval[pKey[j]][1])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax0_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) mu[j], sigma[j] = norm.fit(isamples[:]) n_fit, bins_fit = np.histogram(isamples[:], bins, density=1, weights=weights) ax1_tmp.hist(isamples[:], bins, density=1, weights=weights, facecolor='gray', edgecolor='darkgray', histtype='step') y = norm.pdf(bins_fit, mu[j], sigma[j]) ax1_tmp.plot(bins_fit, y, 'g-', linewidth=2, alpha=0.7) ax1_tmp.set_xlabel(labels[j]) if j == 0: ax1_tmp.set_ylabel('Counts') if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"$\mu$ = {{{0}}}, $\sigma$ = {{{1}}}" tit = msg.format(fmt(mu[j]), fmt(sigma[j])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax1_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) if gt is not None: x_close = find_nearest(bins_fit, gt[j]) ax1_tmp.vlines(gt[j], 0, y[x_close], label='gt', linestyles='dashed', color='blue') label = r'estimate ($\mu$)' else: label = None ax1_tmp.vlines(mu[j], 0, np.amax(y), linestyles='dashed', color='green', label=label) if gt is not None: ax0_tmp.legend() ax1_tmp.legend() else: ax0_tmp.hist(isamples[arg], bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax0_tmp.set_xlabel(labels[j]) if j == 0: ax0_tmp.set_ylabel('Counts') if title is not None: fmt = "{{:.{0}f}}".format(ndig[j]).format msg = r"${{{0}}}_{{{1}}}^{{+{2}}}$" tit = msg.format(fmt(val_max[pKey[j]]), fmt(confidenceInterval[pKey[j]][0]), fmt(confidenceInterval[pKey[j]][1])) if gt is not None: tit += r" (gt: ${{{0}}}$)".format(fmt(gt[j])) ax0_tmp.set_title("{0}: {1}".format(lab, tit), fontsize=10) if gt is not None: x_close = find_nearest(bin_vertices, gt[j]) ax0_tmp.vlines(gt[j], 0, n[x_close], label='gt', linestyles='dashed', color='blue') label = 'estimate' else: label = None ax0_tmp.vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red', label=label) if gt is not None: ax0_tmp.legend() plt.tight_layout(w_pad=0.1) if save: if gaussian_fit: plt.savefig(output_dir+'confi_hist_gaussfit.pdf') else: plt.savefig(output_dir+'confi_hist.pdf') if verbose: print('\n\nConfidence intervals:') for i, lab in enumerate(labels): print('{}: {} [{},{}]'.format(lab, val_max[lab], confidenceInterval[lab][0], confidenceInterval[lab][1])) if gaussian_fit: print() print('Gaussian fit results:') for i, lab in enumerate(labels): print('{}: {} +-{}'.format(lab, mu[i], sigma[i])) ############################################ # Write inference results in a text file # ############################################ if save: with open(output_dir+output_file, "w") as f: f.write('###########################\n') f.write('#### INFERENCE TEST ###\n') f.write('###########################\n') f.write(' \n') f.write('Results of the MCMC fit\n') f.write('----------------------- \n') f.write(' \n') f.write('>> Position and flux of the planet (highly probable):\n') f.write('{} % confidence interval\n'.format(cfd)) f.write(' \n') for i in range(l): confidenceMax = confidenceInterval[pKey[i]][1] confidenceMin = -confidenceInterval[pKey[i]][0] if i == 2 or l == 1: text = '{}: \t\t\t{:.3f} \t-{:.3f} \t+{:.3f}\n' else: text = '{}: \t\t\t{:.3f} \t\t-{:.3f} \t\t+{:.3f}\n' f.write(text.format(pKey[i], val_max[pKey[i]], confidenceMin, confidenceMax)) if l > 1 and plsc is not None and 'r' in labels: f.write(' ') f.write('Platescale = {} mas\n'.format(plsc*1000)) f.write('r (mas): \t\t{:.2f} \t\t-{:.2f} \t\t+{:.2f}\n'.format( val_max[pKey[0]]*plsc*1000, -confidenceInterval[pKey[0]][0]*plsc*1000, confidenceInterval[pKey[0]][1]*plsc*1000)) if gaussian_fit: return mu, sigma else: return val_max, confidenceInterval