Source code for vip_hci.fm.negfc_nested

#! /usr/bin/env python
"""
Module with functions for posterior sampling of the NEGFC parameters using
nested sampling (``nestle``).

.. [BAR13]
   | K. Barbary 2013
   | **nestle**
   | *GitHub repository*
   | `https://github.com/kbarbary/nestle
     <https://github.com/kbarbary/nestle>`_

.. [FER09]
   | Feroz et al. 2009
   | **MULTINEST: an efficient and robust Bayesian inference tool for cosmology
     and particle physics**
   | *MNRAS, Volume 398, Issue 4, pp. 1601-1614*
   | `https://arxiv.org/abs/0809.3437
     <https://arxiv.org/abs/0809.3437>`_

.. [MUK06]
   | Mukherjee et al. 2006
   | **A Nested Sampling Algorithm for Cosmological Model Selection**
   | *ApJL, Volume 638, Issue 2, pp. 51-54*
   | `https://arxiv.org/abs/astro-ph/0508461
     <https://arxiv.org/abs/astro-ph/0508461>`_

.. [SKI04]
   | Skilling 2004
   | **Bayesian Inference and Maximum Entropy Methods in Science and Engineering:
     24th International Workshop on Bayesian Inference and Maximum Entropy
     Methods in Science and Engineering**
   | *American Institute of Physics Conference Series, Volume 735, pp. 395-405*
   | `https://ui.adsabs.harvard.edu/abs/2004AIPC..735..395S
     <https://ui.adsabs.harvard.edu/abs/2004AIPC..735..395S>`_

"""


__author__ = 'Carlos Alberto Gomez Gonzalez, V. Christiaens',
__all__ = ['nested_negfc_sampling',
           'nested_sampling_results']

import nestle
import corner
import numpy as np
from matplotlib import pyplot as plt
from ..config import time_ini, timing
from .negfc_mcmc import lnlike, confidence, show_walk_plot
from .negfc_fmerit import get_mu_and_sigma
from ..psfsub import pca_annulus


[docs] def nested_negfc_sampling(init, cube, angs, psfn, fwhm, mu_sigma=True, sigma='spe+pho', fmerit='sum', annulus_width=8, aperture_radius=1, ncomp=10, scaling=None, svd_mode='lapack', cube_ref=None, collapse='median', algo=pca_annulus, delta_rot=1, algo_options={}, weights=None, w=(5, 5, 200), method='single', npoints=100, dlogz=0.1, decline_factor=None, rstate=None, verbose=True): """ Runs a nested sampling algorithm with ``nestle`` [BAR13]_ in order to determine the position and the flux of the planet using the 'Negative Fake Companion' technique. The result of this procedure is a ``nestle`` object containing the samples from the posterior distributions of each of the 3 parameters. It provides good results (value plus error bars) compared to a more CPU intensive Monte Carlo approach with the affine invariant sampler (``emcee``). Parameters ---------- init: numpy ndarray or tuple of length 3 The first guess for the position and flux of the planet, respectively. It serves for generating the bounds of the log prior function (uniform in a bounded interval). cube: 3d or 4d numpy ndarray Input ADI or ADI+IFS cube. angs: numpy.array The parallactic angle vector. psfn: numpy 2D or 3D array Normalised PSF template used for negative fake companion injection. The PSF must be centered and the flux in a 1xFWHM aperture must equal 1 (use ``vip_hci.metrics.normalize_psf``). If the input cube is 3D and a 3D array is provided, the first dimension must match for both cubes. This can be useful if the star was unsaturated and conditions were variable. If the input cube is 4D, psfn must be either 3D or 4D. In either cases, the first dimension(s) must match those of the input cube. fwhm : float The FWHM in pixels. mu_sigma: tuple of 2 floats or bool, opt If set to None: not used, and falls back to original version of the algorithm, using fmerit [WER17]_. If a tuple of 2 elements: should be the mean and standard deviation of pixel intensities in an annulus centered on the location of the companion candidate, excluding the area directly adjacent to the CC. If set to anything else, but None/False/tuple: will compute said mean and standard deviation automatically. These values will then be used in the log-probability of the MCMC. sigma: str, opt Sets the type of noise to be included as sigma^2 in the log-probability expression. Choice between 'pho' for photon (Poisson) noise, 'spe' for residual (mostly whitened) speckle noise, or 'spe+pho' for both. force_rPA: bool, optional Whether to only search for optimal flux, provided (r,PA). fmerit : {'sum', 'stddev'}, string optional If mu_sigma is not provided nor set to True, this parameter determines which figure of merit to be used among the 2 possibilities implemented in [WER17]_. 'stddev' may work well for point like sources surrounded by extended signals. annulus_width: float, optional The width in pixel of the annulus on which the PCA is performed. aperture_radius: float, optional The radius of the circular aperture in FWHM. ncomp: int optional The number of principal components. scaling : {None, "temp-mean", spat-mean", "temp-standard", "spat-standard"}, None or str optional Pixel-wise scaling mode using ``sklearn.preprocessing.scale`` function. If set to None, the input matrix is left untouched. Otherwise: * ``temp-mean``: temporal px-wise mean is subtracted. * ``spat-mean``: spatial mean is subtracted. * ``temp-standard``: temporal mean centering plus scaling pixel values to unit variance (temporally). * ``spat-standard``: spatial mean centering plus scaling pixel values to unit variance (spatially). DISCLAIMER: Using ``temp-mean`` or ``temp-standard`` scaling can improve the speckle subtraction for ASDI or (A)RDI reductions. Nonetheless, this involves a sort of c-ADI preprocessing, which (i) can be dangerous for datasets with low amount of rotation (strong self-subtraction), and (ii) should probably be referred to as ARDI (i.e. not RDI stricto sensu). svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional Switch for different ways of computing the SVD and selected PCs. cube_ref: numpy ndarray, 3d, optional Reference library cube. For Reference Star Differential Imaging. collapse : {'median', 'mean', 'sum', 'trimmean', None}, str or None, opt Sets the way of collapsing the frames for producing a final image. If None then the cube of residuals is used when measuring the function of merit (instead of a single final frame). algo_options: dict, opt Dictionary with additional parameters for the algorithm (e.g. tol, min_frames_lib, max_frames_lib) weights : 1d array, optional If provided, the negative fake companion fluxes will be scaled according to these weights before injection in the cube. Can reflect changes in the observing conditions throughout the sequence. w : tuple of length 3 The size of the bounds (around the initial state ``init``) for each parameter. method : {"single", "multi", "classic"}, str optional Flavor of nested sampling. Single ellipsoid works well for the NEGFC and is the default. npoints : int optional Number of active points. At least ndim+1 (4 will produce bad results). For problems with just a few parameters (<=5) like the NEGFC, good results are obtained with 100 points (default). dlogz : Estimated remaining evidence Iterations will stop when the estimated contribution of the remaining prior volume to the total evidence falls below this threshold. Explicitly, the stopping criterion is log(z + z_est) - log(z) < dlogz where z is the current evidence from all saved samples, and z_est is the estimated contribution from the remaining volume. This option and decline_factor are mutually exclusive. If neither is specified, the default is dlogz=0.5. decline_factor : float, optional If supplied, iteration will stop when the weight (likelihood times prior volume) of newly saved samples has been declining for decline_factor * nsamples consecutive samples. A value of 1.0 seems to work pretty well. rstate : random instance, optional RandomState instance. If not given, the global random state of the numpy.random module will be used. Returns ------- res : nestle object ``Nestle`` object with the nested sampling results, including the posterior samples. Note ---- Nested Sampling is a computational approach for integrating posterior probability in order to compare models in Bayesian statistics. It is similar to Markov Chain Monte Carlo (MCMC) in that it generates samples that can be used to estimate the posterior probability distribution. Unlike MCMC, the nature of the sampling also allows one to calculate the integral of the distribution. It also happens to be a pretty good method for robustly finding global maxima. Nestle documentation: http://kbarbary.github.io/nestle/ **Convergence**: http://kbarbary.github.io/nestle/stopping.html Nested sampling has no well-defined stopping point. As iterations continue, the active points sample a smaller and smaller region of prior space. This can continue indefinitely. Unlike typical MCMC methods, we don't gain any additional precision on the results by letting the algorithm run longer; the precision is determined at the outset by the number of active points. So, we want to stop iterations as soon as we think the active points are doing a pretty good job sampling the remaining prior volume - once we've converged to the highest-likelihood regions such that the likelihood is relatively flat within the remaining prior volume. **Method**: The trick in nested sampling is to, at each step in the algorithm, efficiently choose a new point in parameter space drawn with uniform probability from the parameter space with likelihood greater than the current likelihood constraint. The different methods all use the current set of active points as an indicator of where the target parameter space lies, but differ in how they select new points from it: - "classic" is close to the method described in [SKI04]_. - "single", [MUK06]_, Determines a single ellipsoid that bounds all active points, enlarges the ellipsoid by a user-settable factor, and selects a new point at random from within the ellipsoid. - "multiple", [FER09]_ (Multinest). In cases where the posterior is multi-modal, the single-ellipsoid method can be extremely inefficient. In such situations, there are clusters of active points on separate high-likelihood regions separated by regions of lower likelihood. Bounding all points in a single ellipsoid means that the ellipsoid includes the lower-likelihood regions we wish to avoid sampling from. The solution is to detect these clusters and bound them in separate ellipsoids. For this, we use a recursive process where we perform K-means clustering with K=2. If the resulting two ellipsoids have a significantly lower total volume than the parent ellipsoid (less than half), we accept the split and repeat the clustering and volume test on each of the two subset of points. This process continues recursively. Alternatively, if the total ellipse volume is significantly greater than expected (based on the expected density of points) this indicates that there may be more than two clusters and that K=2 was not an appropriate cluster division. We therefore still try to subdivide the clusters recursively. However, we still only accept the final split into N clusters if the total volume decrease is significant. """ # calculate mu_sigma mu_sig = get_mu_and_sigma(cube, angs, ncomp, annulus_width, aperture_radius, fwhm, init[0], init[1], cube_ref=cube_ref, svd_mode=svd_mode, scaling=scaling, algo=algo, delta_rot=delta_rot, collapse=collapse, algo_options=algo_options) # Measure mu and sigma once in the annulus (instead of each MCMC step) if isinstance(mu_sigma, tuple): if len(mu_sigma) != 2: raise TypeError("if a tuple, mu_sigma should have 2 elements") elif mu_sigma: mu_sigma = mu_sig if verbose: msg = "The mean and stddev in the annulus at the radius of the " msg += "companion (excluding the PA area directly adjacent to it)" msg += " are {:.2f} and {:.2f} respectively." print(msg.format(mu_sigma[0], mu_sigma[1])) else: mu_sigma = mu_sig[0] # just take mean def prior_transform(x): """ Computes the transformation from the unit distribution `[0, 1]` to parameter space. The default prior bounds are radius: (r - w[0], r + w[0]) theta: (theta - w[1], theta + w[1]) flux: (f - w[2], f + w[3]) The default distributions used are radius: Uniform distribution transformed into polar coordinates This distribution assumes uniform distribution for the (x,y) coordinates transformed to polar coordinates. theta: Uniform distribution This distribution is derived the same as the radial distribution, but there is no change on the prior for theta after the change-of-variables transformation. flux: Poisson-invariant scale distribution This distribution is the Jeffrey's prior for Poisson data Note ---- The prior transform function is used to specify the Bayesian prior for the problem, in a round-about way. It is a transformation from a space where variables are independently and uniformly distributed between 0 and 1 to the parameter space of interest. For independent parameters, this would be the product of the inverse cumulative distribution function (also known as the percent point function or quantile function) for each parameter. http://kbarbary.github.io/nestle/prior.html """ rmin = init[0] - w[0] rmax = init[0] + w[0] r = np.sqrt((rmax**2 - rmin**2) * x[0] + rmin**2) tmin = init[1] - w[1] tmax = init[1] + w[1] t = x[1] * (tmax - tmin) + tmin fmin = init[2] - w[2] fmax = init[2] + w[2] f = (x[2] * (np.sqrt(fmax) - np.sqrt(fmin)) + np.sqrt(fmin))**2 return np.array([r, t, f]) def f(param): return lnlike(param=param, cube=cube, angs=angs, psf_norm=psfn, fwhm=fwhm, annulus_width=annulus_width, aperture_radius=aperture_radius, initial_state=init, cube_ref=cube_ref, svd_mode=svd_mode, scaling=scaling, algo=algo, delta_rot=delta_rot, fmerit=fmerit, mu_sigma=mu_sigma, sigma=sigma, ncomp=ncomp, collapse=collapse, algo_options=algo_options, weights=weights) # ------------------------------------------------------------------------- if verbose: start = time_ini() if verbose: print('Prior bounds on parameters:') print('Radius [{},{}]'.format(init[0] - w[0], init[0] + w[0], )) print('Theta [{},{}]'.format(init[1] - w[1], init[1] + w[1])) print('Flux [{},{}]'.format(init[2] - w[2], init[2] + w[2])) print('\nUsing {} active points'.format(npoints)) res = nestle.sample(f, prior_transform, ndim=3, method=method, npoints=npoints, rstate=rstate, dlogz=dlogz, decline_factor=decline_factor) # if verbose: print; timing(start) if verbose: print('\nTotal running time:') timing(start) return res
[docs] def nested_sampling_results(ns_object, burnin=0.4, bins=None, cfd=68.27, save=False, output_dir='/', plot=False): """ Shows the results of the Nested Sampling, summary, parameters with errors, walk and corner plots. Parameters ---------- ns_object: numpy.array The nestle object returned from `nested_negfc_sampling`. burnin: float, default: 0 The fraction of a walker we want to discard. bins: int, optional The number of bins used to sample the posterior distributions. cfd: float, optional The confidence level given in percentage. save: boolean, default: False If True, a pdf file is created. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. plot: bool, optional Whether to show the plots (instead of saving them). Returns ------- final_res: numpy ndarray Best-fit parameters and uncertainties (corresponding to 68% confidence interval). Dimensions: nparams x 2. """ res = ns_object nsamples = res.samples.shape[0] indburnin = int(np.percentile(np.array(range(nsamples)), burnin * 100)) print(res.summary()) print( '\nNatural log of prior volume and Weight corresponding to each sample') if save or plot: plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) plt.plot(res.logvol, '.', alpha=0.5, color='gray') plt.xlabel('samples') plt.ylabel('logvol') plt.vlines(indburnin, res.logvol.min(), res.logvol.max(), linestyles='dotted') plt.subplot(1, 2, 2) plt.plot(res.weights, '.', alpha=0.5, color='gray') plt.xlabel('samples') plt.ylabel('weights') plt.vlines(indburnin, res.weights.min(), res.weights.max(), linestyles='dotted') if save: plt.savefig(output_dir+'Nested_results.pdf') if plot: plt.show() print("\nWalk plots before the burnin") show_walk_plot(np.expand_dims(res.samples, axis=0)) if burnin > 0: print("\nWalk plots after the burnin") show_walk_plot(np.expand_dims(res.samples[indburnin:], axis=0)) if save: plt.savefig(output_dir+'Nested_walk_plots.pdf') if plot: plt.show() mean, cov = nestle.mean_and_cov(res.samples[indburnin:], res.weights[indburnin:]) print("\nWeighted mean +- sqrt(covariance)") print("Radius = {:.3f} +/- {:.3f}".format(mean[0], np.sqrt(cov[0, 0]))) print("Theta = {:.3f} +/- {:.3f}".format(mean[1], np.sqrt(cov[1, 1]))) print("Flux = {:.3f} +/- {:.3f}".format(mean[2], np.sqrt(cov[2, 2]))) if save: with open(output_dir+'Nested_sampling.txt', "w") as f: f.write('#################################\n') f.write('#### CONFIDENCE INTERVALS ###\n') f.write('#################################\n') f.write(' \n') f.write('Results of the NESTED SAMPLING fit\n') f.write('----------------------------------\n ') f.write(' \n') f.write("\nWeighted mean +- sqrt(covariance)\n") f.write( "Radius = {:.3f} +/- {:.3f}\n".format(mean[0], np.sqrt(cov[0, 0]))) f.write( "Theta = {:.3f} +/- {:.3f}\n".format(mean[1], np.sqrt(cov[1, 1]))) f.write( "Flux = {:.3f} +/- {:.3f}\n".format(mean[2], np.sqrt(cov[2, 2]))) if bins is None: bins = int(np.sqrt(res.samples[indburnin:].shape[0])) print("\nHist bins =", bins) if save or plot: ranges = None fig = corner.corner(res.samples[indburnin:], bins=bins, labels=["$r$", r"$\theta$", "$f$"], weights=res.weights[indburnin:], range=ranges, plot_contours=True) fig.set_size_inches(8, 8) if save: plt.savefig(output_dir+'Nested_corner.pdf') print('\nConfidence intervals') if save or plot: _ = confidence(res.samples[indburnin:], cfd=68, bins=bins, weights=res.weights[indburnin:], gaussian_fit=True, verbose=True, save=False) if save: plt.savefig(output_dir+'Nested_confi_hist_flux_r_theta_gaussfit.pdf') final_res = np.array([[mean[0], np.sqrt(cov[0, 0])], [mean[1], np.sqrt(cov[1, 1])], [mean[2], np.sqrt(cov[2, 2])]]) return final_res