Source code for Corrfunc.theory.vpf

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Python wrapper around the C extension for the counts-in-cells
for 3-D real space. Corresponding C codes are in ``theory/vpf``
while the python wrapper is in :py:mod:`Corrfunc.theory.vpf`.
"""

from __future__ import (division, print_function, absolute_import,
                        unicode_literals)

__author__ = ('Manodeep Sinha')
__all__ = ('vpf', )



[docs]
def vpf(rmax, nbins, nspheres, numpN, seed,
        X, Y, Z,
        verbose=False, periodic=True, boxsize=None,
        xbin_refine_factor=1, ybin_refine_factor=1,
        zbin_refine_factor=1, max_cells_per_dim=100,
        copy_particles=True, c_api_timer=False, isa=r'fastest'):
    """
    Function to compute the counts-in-cells on 3-D real-space points.

    Returns a numpy structured array containing the probability of a
    sphere of radius up to ``rmax`` containing [0, numpN-1] galaxies.

    Parameters
    -----------

    rmax: double
        Maximum radius of the sphere to place on the particles

    nbins: integer
        Number of bins in the counts-in-cells. Radius of first shell
        is rmax/nbins

    nspheres: integer (>= 0)
        Number of random spheres to place within the particle distribution.
        For a small number of spheres, the error is larger in the measured
        pN's.

    numpN: integer (>= 1)
        Governs how many unique pN's are to returned. If ``numpN`` is set to 1,
        then only the vpf (p0) is returned. For ``numpN=2``, p0 and p1 are
        returned.

        More explicitly, the columns in the results look like the following:

         ======   ==========================
         numpN    Columns in output
         ======   ==========================
            1      p0
            2      p0      p1
            3      p0      p1     p2
            4      p0      p1     p2     p3
         ======   ==========================

        and so on...

        Note: ``p0`` is the vpf

    seed: unsigned integer
        Random number seed for the underlying GSL random number generator. Used
        to draw centers of the spheres.

    X/Y/Z: arraytype, real (float/double)
        Particle positions in the 3 axes. Must be within [0, boxsize]
        and specified in the same units as ``rp_bins`` and boxsize. All
        3 arrays must be of the same floating-point type.

        Calculations will be done in the same precision as these arrays,
        i.e., calculations will be in floating point if XYZ are single
        precision arrays (C float type); or in double-precision if XYZ
        are double precision arrays (C double type).

    verbose: boolean (default false)
        Boolean flag to control output of informational messages

    periodic: boolean
        Boolean flag to indicate periodic boundary conditions.

    boxsize: double or 3-tuple of double, required if ``periodic=True``
        The (X,Y,Z) side lengths of the spatial domain. Present to facilitate
        exact calculations for periodic wrapping. A scalar ``boxsize`` will
        be broadcast to a 3-tuple. If the boxsize in a dimension is 0., then
        then that dimension's wrap is done based on the extent of the particle
        distribution. If the boxsize in a dimension is -1., then periodicity
        is disabled for that dimension.

        .. versionchanged:: 2.4.0
           Required if ``periodic=True``.

        .. versionchanged:: 2.5.0
           Accepts a 3-tuple of side lengths.

    (xyz)bin_refine_factor: integer, default is (1,1,1); typically within [1-3]
        Controls the refinement on the cell sizes. Can have up to a 20% impact
        on runtime.

        Note: Since the counts in spheres calculation is symmetric
        in all 3 dimensions, the defaults are different from the clustering
        routines.

    max_cells_per_dim: integer, default is 100, typical values in [50-300]
        Controls the maximum number of cells per dimension. Total number of
        cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is
        too small relative to the boxsize (and increasing helps the runtime).

    copy_particles: boolean (default True)
        Boolean flag to make a copy of the particle positions
        If set to False, the particles will be re-ordered in-place

        .. versionadded:: 2.3.0

    c_api_timer: boolean (default false)
        Boolean flag to measure actual time spent in the C libraries. Here
        to allow for benchmarking and scaling studies.

    isa: string (default ``fastest``)
        Controls the runtime dispatch for the instruction set to use. Options
        are: [``fastest``, ``avx512f``, ``avx``, ``sse42``, ``fallback``]

        Setting isa to ``fastest`` will pick the fastest available instruction
        set on the current computer. However, if you set ``isa`` to, say,
        ``avx`` and ``avx`` is not available on the computer, then the code
        will revert to using ``fallback`` (even though ``sse42`` might be
        available).  Unless you are benchmarking the different instruction
        sets, you should always leave ``isa`` to the default value. And if
        you *are* benchmarking, then the string supplied here gets translated
        into an ``enum`` for the instruction set defined in ``utils/defs.h``.

    Returns
    --------

    results: Numpy structured array

        A numpy structured array containing [rmax, pN[numpN]] with ``nbins``
        elements. Each row contains the maximum radius of the sphere and the
        ``numpN`` elements in the ``pN`` array. Each element of this array
        contains the probability that a sphere of radius ``rmax`` contains
        *exactly* ``N`` galaxies. For example, pN[0] (p0, the void probibility
        function) is the probability that a sphere of radius ``rmax`` contains
        zero galaxies.

        if ``c_api_timer`` is set, then the return value is a tuple containing
        (results, api_time). ``api_time`` measures only the time spent within
        the C library and ignores all python overhead.

    Example
    --------

    >>> from __future__ import print_function
    >>> import numpy as np
    >>> from Corrfunc.theory.vpf import vpf
    >>> rmax = 10.0
    >>> nbins = 10
    >>> nspheres = 10000
    >>> numpN = 5
    >>> seed = -1
    >>> N = 100000
    >>> boxsize = 420.0
    >>> seed = 42
    >>> np.random.seed(seed)
    >>> X = np.random.uniform(0, boxsize, N)
    >>> Y = np.random.uniform(0, boxsize, N)
    >>> Z = np.random.uniform(0, boxsize, N)
    >>> results = vpf(rmax, nbins, nspheres, numpN, seed, X, Y, Z,
    ...               boxsize=boxsize, periodic=True)
    >>> for r in results:
    ...     print("{0:10.1f} ".format(r[0]), end="")
    ...     # doctest: +NORMALIZE_WHITESPACE
    ...     for pn in r[1]:
    ...         print("{0:10.3f} ".format(pn), end="")
    ...         # doctest: +NORMALIZE_WHITESPACE
    ...     print("") # doctest: +NORMALIZE_WHITESPACE
           1.0      0.995      0.005      0.000      0.000      0.000
           2.0      0.955      0.044      0.001      0.000      0.000
           3.0      0.858      0.129      0.012      0.001      0.000
           4.0      0.696      0.252      0.047      0.005      0.001
           5.0      0.493      0.347      0.127      0.028      0.005
           6.0      0.295      0.363      0.219      0.091      0.026
           7.0      0.141      0.285      0.265      0.178      0.085
           8.0      0.056      0.159      0.227      0.229      0.161
           9.0      0.019      0.066      0.135      0.191      0.193
          10.0      0.003      0.019      0.054      0.105      0.150

    """

    try:
        from Corrfunc._countpairs import countspheres_vpf as vpf_extn
    except ImportError:
        msg = "Could not import the C extension for the Counts-in-Cells "\
              " (vpf)"
        raise ImportError(msg)

    import numpy as np
    from future.utils import bytes_to_native_str
    from Corrfunc.utils import translate_isa_string_to_enum,\
        convert_to_native_endian, sys_pipes
    from math import pi

    if numpN <= 0:
        msg = "Number of counts-in-cells wanted must be at least 1"
        raise ValueError(msg)

    if periodic and boxsize is None:
        raise ValueError("Must specify a boxsize if periodic=True")

    boxsize = np.atleast_1d(boxsize)
    if len(boxsize) == 1:
        boxsize = (boxsize[0], boxsize[0], boxsize[0])
    boxsize = tuple(boxsize)

    kwargs = {}
    if boxsize is not None:
        kwargs['boxsize'] = boxsize

    # Ensure all input arrays are native endian
    X, Y, Z = [convert_to_native_endian(arr, warn=True)
               for arr in [X, Y, Z]]

    integer_isa = translate_isa_string_to_enum(isa)
    with sys_pipes():
      extn_results = vpf_extn(rmax, nbins,
                              nspheres,
                              numpN,
                              seed,
                              X, Y, Z,
                              verbose=verbose,
                              periodic=periodic,
                              xbin_refine_factor=xbin_refine_factor,
                              ybin_refine_factor=ybin_refine_factor,
                              zbin_refine_factor=zbin_refine_factor,
                              max_cells_per_dim=max_cells_per_dim,
                              copy_particles=copy_particles,
                              c_api_timer=c_api_timer,
                              isa=integer_isa,
                              **kwargs)
    if extn_results is None:
        msg = "RuntimeError occurred"
        raise RuntimeError(msg)
    else:
        extn_results, api_time = extn_results

    results_dtype = np.dtype([(bytes_to_native_str(b'rmax'), np.float64),
                              (bytes_to_native_str(b'pN'),
                               (np.float64, numpN))])
    nbin = len(extn_results)
    results = np.zeros(nbin, dtype=results_dtype)

    for ii, r in enumerate(extn_results):
        results['rmax'][ii] = r[0]
        if numpN == 1:
            results['pN'] = r[1]
        else:
            for j in range(numpN):
                results['pN'][ii][j] = r[1 + j]

    if not c_api_timer:
        return results
    else:
        return results, api_time



if __name__ == '__main__':
    import doctest
    doctest.testmod()