tmp
/
pip-install-ghxuqwgs
/numpy_78e94bf2b6094bf9a1f3d92042f9bf46
/numpy
/random
/mtrand
/mtrand.pyx
# mtrand.pyx -- A Pyrex wrapper of Jean-Sebastien Roy's RandomKit | |
# | |
# Copyright 2005 Robert Kern ([email protected]) | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a | |
# copy of this software and associated documentation files (the | |
# "Software"), to deal in the Software without restriction, including | |
# without limitation the rights to use, copy, modify, merge, publish, | |
# distribute, sublicense, and/or sell copies of the Software, and to | |
# permit persons to whom the Software is furnished to do so, subject to | |
# the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included | |
# in all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
include "Python.pxi" | |
include "numpy.pxd" | |
cdef extern from "math.h": | |
double exp(double x) | |
double log(double x) | |
double floor(double x) | |
double sin(double x) | |
double cos(double x) | |
cdef extern from "mtrand_py_helper.h": | |
object empty_py_bytes(npy_intp length, void **bytes) | |
cdef extern from "randomkit.h": | |
ctypedef struct rk_state: | |
unsigned long key[624] | |
int pos | |
int has_gauss | |
double gauss | |
ctypedef enum rk_error: | |
RK_NOERR = 0 | |
RK_ENODEV = 1 | |
RK_ERR_MAX = 2 | |
char *rk_strerror[2] | |
# 0xFFFFFFFFUL | |
unsigned long RK_MAX | |
void rk_seed(unsigned long seed, rk_state *state) | |
rk_error rk_randomseed(rk_state *state) | |
unsigned long rk_random(rk_state *state) | |
long rk_long(rk_state *state) nogil | |
unsigned long rk_ulong(rk_state *state) nogil | |
unsigned long rk_interval(unsigned long max, rk_state *state) nogil | |
double rk_double(rk_state *state) nogil | |
void rk_fill(void *buffer, size_t size, rk_state *state) nogil | |
rk_error rk_devfill(void *buffer, size_t size, int strong) | |
rk_error rk_altfill(void *buffer, size_t size, int strong, | |
rk_state *state) nogil | |
double rk_gauss(rk_state *state) nogil | |
cdef extern from "distributions.h": | |
# do not need the GIL, but they do need a lock on the state !! */ | |
double rk_normal(rk_state *state, double loc, double scale) nogil | |
double rk_standard_exponential(rk_state *state) nogil | |
double rk_exponential(rk_state *state, double scale) nogil | |
double rk_uniform(rk_state *state, double loc, double scale) nogil | |
double rk_standard_gamma(rk_state *state, double shape) nogil | |
double rk_gamma(rk_state *state, double shape, double scale) nogil | |
double rk_beta(rk_state *state, double a, double b) nogil | |
double rk_chisquare(rk_state *state, double df) nogil | |
double rk_noncentral_chisquare(rk_state *state, double df, double nonc) nogil | |
double rk_f(rk_state *state, double dfnum, double dfden) nogil | |
double rk_noncentral_f(rk_state *state, double dfnum, double dfden, double nonc) nogil | |
double rk_standard_cauchy(rk_state *state) nogil | |
double rk_standard_t(rk_state *state, double df) nogil | |
double rk_vonmises(rk_state *state, double mu, double kappa) nogil | |
double rk_pareto(rk_state *state, double a) nogil | |
double rk_weibull(rk_state *state, double a) nogil | |
double rk_power(rk_state *state, double a) nogil | |
double rk_laplace(rk_state *state, double loc, double scale) nogil | |
double rk_gumbel(rk_state *state, double loc, double scale) nogil | |
double rk_logistic(rk_state *state, double loc, double scale) nogil | |
double rk_lognormal(rk_state *state, double mode, double sigma) nogil | |
double rk_rayleigh(rk_state *state, double mode) nogil | |
double rk_wald(rk_state *state, double mean, double scale) nogil | |
double rk_triangular(rk_state *state, double left, double mode, double right) nogil | |
long rk_binomial(rk_state *state, long n, double p) nogil | |
long rk_binomial_btpe(rk_state *state, long n, double p) nogil | |
long rk_binomial_inversion(rk_state *state, long n, double p) nogil | |
long rk_negative_binomial(rk_state *state, double n, double p) nogil | |
long rk_poisson(rk_state *state, double lam) nogil | |
long rk_poisson_mult(rk_state *state, double lam) nogil | |
long rk_poisson_ptrs(rk_state *state, double lam) nogil | |
long rk_zipf(rk_state *state, double a) nogil | |
long rk_geometric(rk_state *state, double p) nogil | |
long rk_hypergeometric(rk_state *state, long good, long bad, long sample) nogil | |
long rk_logseries(rk_state *state, double p) nogil | |
ctypedef double (* rk_cont0)(rk_state *state) nogil | |
ctypedef double (* rk_cont1)(rk_state *state, double a) nogil | |
ctypedef double (* rk_cont2)(rk_state *state, double a, double b) nogil | |
ctypedef double (* rk_cont3)(rk_state *state, double a, double b, double c) nogil | |
ctypedef long (* rk_disc0)(rk_state *state) nogil | |
ctypedef long (* rk_discnp)(rk_state *state, long n, double p) nogil | |
ctypedef long (* rk_discdd)(rk_state *state, double n, double p) nogil | |
ctypedef long (* rk_discnmN)(rk_state *state, long n, long m, long N) nogil | |
ctypedef long (* rk_discd)(rk_state *state, double a) nogil | |
cdef extern from "initarray.h": | |
void init_by_array(rk_state *self, unsigned long *init_key, | |
npy_intp key_length) | |
# Initialize numpy | |
import_array() | |
import numpy as np | |
import operator | |
import warnings | |
from threading import Lock | |
cdef object cont0_array(rk_state *state, rk_cont0 func, object size, | |
object lock): | |
cdef double *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
length = PyArray_SIZE(array) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state) | |
return array | |
cdef object cont1_array_sc(rk_state *state, rk_cont1 func, object size, double a, | |
object lock): | |
cdef double *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, a) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
length = PyArray_SIZE(array) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, a) | |
return array | |
cdef object cont1_array(rk_state *state, rk_cont1 func, object size, | |
ndarray oa, object lock): | |
cdef double *array_data | |
cdef double *oa_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef flatiter itera | |
cdef broadcast multi | |
if size is None: | |
array = <ndarray>PyArray_SimpleNew(PyArray_NDIM(oa), | |
PyArray_DIMS(oa) , NPY_DOUBLE) | |
length = PyArray_SIZE(array) | |
array_data = <double *>PyArray_DATA(array) | |
itera = <flatiter>PyArray_IterNew(<object>oa) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, (<double *>(itera.dataptr))[0]) | |
PyArray_ITER_NEXT(itera) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
array_data = <double *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(2, <void *>array, | |
<void *>oa) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
array_data[i] = func(state, oa_data[0]) | |
PyArray_MultiIter_NEXTi(multi, 1) | |
return array | |
cdef object cont2_array_sc(rk_state *state, rk_cont2 func, object size, double a, | |
double b, object lock): | |
cdef double *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, a, b) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
length = PyArray_SIZE(array) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, a, b) | |
return array | |
cdef object cont2_array(rk_state *state, rk_cont2 func, object size, | |
ndarray oa, ndarray ob, object lock): | |
cdef double *array_data | |
cdef double *oa_data | |
cdef double *ob_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef broadcast multi | |
if size is None: | |
multi = <broadcast> PyArray_MultiIterNew(2, <void *>oa, <void *>ob) | |
array = <ndarray> PyArray_SimpleNew(multi.nd, multi.dimensions, NPY_DOUBLE) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 0) | |
ob_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
array_data[i] = func(state, oa_data[0], ob_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
array_data = <double *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(3, <void*>array, <void *>oa, <void *>ob) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
ob_data = <double *>PyArray_MultiIter_DATA(multi, 2) | |
array_data[i] = func(state, oa_data[0], ob_data[0]) | |
PyArray_MultiIter_NEXTi(multi, 1) | |
PyArray_MultiIter_NEXTi(multi, 2) | |
return array | |
cdef object cont3_array_sc(rk_state *state, rk_cont3 func, object size, double a, | |
double b, double c, object lock): | |
cdef double *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, a, b, c) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
length = PyArray_SIZE(array) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, a, b, c) | |
return array | |
cdef object cont3_array(rk_state *state, rk_cont3 func, object size, | |
ndarray oa, ndarray ob, ndarray oc, object lock): | |
cdef double *array_data | |
cdef double *oa_data | |
cdef double *ob_data | |
cdef double *oc_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef broadcast multi | |
if size is None: | |
multi = <broadcast> PyArray_MultiIterNew(3, <void *>oa, <void *>ob, <void *>oc) | |
array = <ndarray> PyArray_SimpleNew(multi.nd, multi.dimensions, NPY_DOUBLE) | |
array_data = <double *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 0) | |
ob_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
oc_data = <double *>PyArray_MultiIter_DATA(multi, 2) | |
array_data[i] = func(state, oa_data[0], ob_data[0], oc_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
else: | |
array = <ndarray>np.empty(size, np.float64) | |
array_data = <double *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(4, <void*>array, <void *>oa, | |
<void *>ob, <void *>oc) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
ob_data = <double *>PyArray_MultiIter_DATA(multi, 2) | |
oc_data = <double *>PyArray_MultiIter_DATA(multi, 3) | |
array_data[i] = func(state, oa_data[0], ob_data[0], oc_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
return array | |
cdef object disc0_array(rk_state *state, rk_disc0 func, object size, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state) | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state) | |
return array | |
cdef object discnp_array_sc(rk_state *state, rk_discnp func, object size, | |
long n, double p, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, n, p) | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, n, p) | |
return array | |
cdef object discnp_array(rk_state *state, rk_discnp func, object size, | |
ndarray on, ndarray op, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef double *op_data | |
cdef long *on_data | |
cdef broadcast multi | |
if size is None: | |
multi = <broadcast> PyArray_MultiIterNew(2, <void *>on, <void *>op) | |
array = <ndarray> PyArray_SimpleNew(multi.nd, multi.dimensions, NPY_LONG) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <long *>PyArray_MultiIter_DATA(multi, 0) | |
op_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
array_data[i] = func(state, on_data[0], op_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
else: | |
array = <ndarray>np.empty(size, int) | |
array_data = <long *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(3, <void*>array, <void *>on, <void *>op) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <long *>PyArray_MultiIter_DATA(multi, 1) | |
op_data = <double *>PyArray_MultiIter_DATA(multi, 2) | |
array_data[i] = func(state, on_data[0], op_data[0]) | |
PyArray_MultiIter_NEXTi(multi, 1) | |
PyArray_MultiIter_NEXTi(multi, 2) | |
return array | |
cdef object discdd_array_sc(rk_state *state, rk_discdd func, object size, | |
double n, double p, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, n, p) | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, n, p) | |
return array | |
cdef object discdd_array(rk_state *state, rk_discdd func, object size, | |
ndarray on, ndarray op, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef double *op_data | |
cdef double *on_data | |
cdef broadcast multi | |
if size is None: | |
multi = <broadcast> PyArray_MultiIterNew(2, <void *>on, <void *>op) | |
array = <ndarray> PyArray_SimpleNew(multi.nd, multi.dimensions, NPY_LONG) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <double *>PyArray_MultiIter_DATA(multi, 0) | |
op_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
array_data[i] = func(state, on_data[0], op_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
else: | |
array = <ndarray>np.empty(size, int) | |
array_data = <long *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(3, <void*>array, <void *>on, <void *>op) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
op_data = <double *>PyArray_MultiIter_DATA(multi, 2) | |
array_data[i] = func(state, on_data[0], op_data[0]) | |
PyArray_MultiIter_NEXTi(multi, 1) | |
PyArray_MultiIter_NEXTi(multi, 2) | |
return array | |
cdef object discnmN_array_sc(rk_state *state, rk_discnmN func, object size, | |
long n, long m, long N, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, n, m, N) | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, n, m, N) | |
return array | |
cdef object discnmN_array(rk_state *state, rk_discnmN func, object size, | |
ndarray on, ndarray om, ndarray oN, object lock): | |
cdef long *array_data | |
cdef long *on_data | |
cdef long *om_data | |
cdef long *oN_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef broadcast multi | |
if size is None: | |
multi = <broadcast> PyArray_MultiIterNew(3, <void *>on, <void *>om, <void *>oN) | |
array = <ndarray> PyArray_SimpleNew(multi.nd, multi.dimensions, NPY_LONG) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <long *>PyArray_MultiIter_DATA(multi, 0) | |
om_data = <long *>PyArray_MultiIter_DATA(multi, 1) | |
oN_data = <long *>PyArray_MultiIter_DATA(multi, 2) | |
array_data[i] = func(state, on_data[0], om_data[0], oN_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
else: | |
array = <ndarray>np.empty(size, int) | |
array_data = <long *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(4, <void*>array, <void *>on, <void *>om, | |
<void *>oN) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
on_data = <long *>PyArray_MultiIter_DATA(multi, 1) | |
om_data = <long *>PyArray_MultiIter_DATA(multi, 2) | |
oN_data = <long *>PyArray_MultiIter_DATA(multi, 3) | |
array_data[i] = func(state, on_data[0], om_data[0], oN_data[0]) | |
PyArray_MultiIter_NEXT(multi) | |
return array | |
cdef object discd_array_sc(rk_state *state, rk_discd func, object size, | |
double a, object lock): | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if size is None: | |
return func(state, a) | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, a) | |
return array | |
cdef object discd_array(rk_state *state, rk_discd func, object size, ndarray oa, | |
object lock): | |
cdef long *array_data | |
cdef double *oa_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
cdef broadcast multi | |
cdef flatiter itera | |
if size is None: | |
array = <ndarray>PyArray_SimpleNew(PyArray_NDIM(oa), | |
PyArray_DIMS(oa), NPY_LONG) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
itera = <flatiter>PyArray_IterNew(<object>oa) | |
with lock, nogil: | |
for i from 0 <= i < length: | |
array_data[i] = func(state, (<double *>(itera.dataptr))[0]) | |
PyArray_ITER_NEXT(itera) | |
else: | |
array = <ndarray>np.empty(size, int) | |
array_data = <long *>PyArray_DATA(array) | |
multi = <broadcast>PyArray_MultiIterNew(2, <void *>array, <void *>oa) | |
if (multi.size != PyArray_SIZE(array)): | |
raise ValueError("size is not compatible with inputs") | |
with lock, nogil: | |
for i from 0 <= i < multi.size: | |
oa_data = <double *>PyArray_MultiIter_DATA(multi, 1) | |
array_data[i] = func(state, oa_data[0]) | |
PyArray_MultiIter_NEXTi(multi, 1) | |
return array | |
cdef double kahan_sum(double *darr, npy_intp n): | |
cdef double c, y, t, sum | |
cdef npy_intp i | |
sum = darr[0] | |
c = 0.0 | |
for i from 1 <= i < n: | |
y = darr[i] - c | |
t = sum + y | |
c = (t-sum) - y | |
sum = t | |
return sum | |
def _shape_from_size(size, d): | |
if size is None: | |
shape = (d,) | |
else: | |
try: | |
shape = (operator.index(size), d) | |
except TypeError: | |
shape = tuple(size) + (d,) | |
return shape | |
cdef class RandomState: | |
""" | |
RandomState(seed=None) | |
Container for the Mersenne Twister pseudo-random number generator. | |
`RandomState` exposes a number of methods for generating random numbers | |
drawn from a variety of probability distributions. In addition to the | |
distribution-specific arguments, each method takes a keyword argument | |
`size` that defaults to ``None``. If `size` is ``None``, then a single | |
value is generated and returned. If `size` is an integer, then a 1-D | |
array filled with generated values is returned. If `size` is a tuple, | |
then an array with that shape is filled and returned. | |
Parameters | |
---------- | |
seed : {None, int, array_like}, optional | |
Random seed initializing the pseudo-random number generator. | |
Can be an integer, an array (or other sequence) of integers of | |
any length, or ``None`` (the default). | |
If `seed` is ``None``, then `RandomState` will try to read data from | |
``/dev/urandom`` (or the Windows analogue) if available or seed from | |
the clock otherwise. | |
Notes | |
----- | |
The Python stdlib module "random" also contains a Mersenne Twister | |
pseudo-random number generator with a number of methods that are similar | |
to the ones available in `RandomState`. `RandomState`, besides being | |
NumPy-aware, has the advantage that it provides a much larger number | |
of probability distributions to choose from. | |
""" | |
cdef rk_state *internal_state | |
cdef object lock | |
poisson_lam_max = np.iinfo('l').max - np.sqrt(np.iinfo('l').max)*10 | |
def __init__(self, seed=None): | |
self.internal_state = <rk_state*>PyMem_Malloc(sizeof(rk_state)) | |
self.seed(seed) | |
self.lock = Lock() | |
def __dealloc__(self): | |
if self.internal_state != NULL: | |
PyMem_Free(self.internal_state) | |
self.internal_state = NULL | |
def seed(self, seed=None): | |
""" | |
seed(seed=None) | |
Seed the generator. | |
This method is called when `RandomState` is initialized. It can be | |
called again to re-seed the generator. For details, see `RandomState`. | |
Parameters | |
---------- | |
seed : int or array_like, optional | |
Seed for `RandomState`. | |
Must be convertable to 32 bit unsigned integers. | |
See Also | |
-------- | |
RandomState | |
""" | |
cdef rk_error errcode | |
cdef ndarray obj "arrayObject_obj" | |
try: | |
if seed is None: | |
errcode = rk_randomseed(self.internal_state) | |
else: | |
idx = operator.index(seed) | |
if idx > int(2**32 - 1) or idx < 0: | |
raise ValueError("Seed must be between 0 and 4294967295") | |
rk_seed(idx, self.internal_state) | |
except TypeError: | |
obj = np.asarray(seed).astype(np.int64, casting='safe') | |
if ((obj > int(2**32 - 1)) | (obj < 0)).any(): | |
raise ValueError("Seed must be between 0 and 4294967295") | |
obj = obj.astype('L', casting='unsafe') | |
init_by_array(self.internal_state, <unsigned long *>PyArray_DATA(obj), | |
PyArray_DIM(obj, 0)) | |
def get_state(self): | |
""" | |
get_state() | |
Return a tuple representing the internal state of the generator. | |
For more details, see `set_state`. | |
Returns | |
------- | |
out : tuple(str, ndarray of 624 uints, int, int, float) | |
The returned tuple has the following items: | |
1. the string 'MT19937'. | |
2. a 1-D array of 624 unsigned integer keys. | |
3. an integer ``pos``. | |
4. an integer ``has_gauss``. | |
5. a float ``cached_gaussian``. | |
See Also | |
-------- | |
set_state | |
Notes | |
----- | |
`set_state` and `get_state` are not needed to work with any of the | |
random distributions in NumPy. If the internal state is manually altered, | |
the user should know exactly what he/she is doing. | |
""" | |
cdef ndarray state "arrayObject_state" | |
state = <ndarray>np.empty(624, np.uint) | |
memcpy(<void*>PyArray_DATA(state), <void*>(self.internal_state.key), 624*sizeof(long)) | |
state = <ndarray>np.asarray(state, np.uint32) | |
return ('MT19937', state, self.internal_state.pos, | |
self.internal_state.has_gauss, self.internal_state.gauss) | |
def set_state(self, state): | |
""" | |
set_state(state) | |
Set the internal state of the generator from a tuple. | |
For use if one has reason to manually (re-)set the internal state of the | |
"Mersenne Twister"[1]_ pseudo-random number generating algorithm. | |
Parameters | |
---------- | |
state : tuple(str, ndarray of 624 uints, int, int, float) | |
The `state` tuple has the following items: | |
1. the string 'MT19937', specifying the Mersenne Twister algorithm. | |
2. a 1-D array of 624 unsigned integers ``keys``. | |
3. an integer ``pos``. | |
4. an integer ``has_gauss``. | |
5. a float ``cached_gaussian``. | |
Returns | |
------- | |
out : None | |
Returns 'None' on success. | |
See Also | |
-------- | |
get_state | |
Notes | |
----- | |
`set_state` and `get_state` are not needed to work with any of the | |
random distributions in NumPy. If the internal state is manually altered, | |
the user should know exactly what he/she is doing. | |
For backwards compatibility, the form (str, array of 624 uints, int) is | |
also accepted although it is missing some information about the cached | |
Gaussian value: ``state = ('MT19937', keys, pos)``. | |
References | |
---------- | |
.. [1] M. Matsumoto and T. Nishimura, "Mersenne Twister: A | |
623-dimensionally equidistributed uniform pseudorandom number | |
generator," *ACM Trans. on Modeling and Computer Simulation*, | |
Vol. 8, No. 1, pp. 3-30, Jan. 1998. | |
""" | |
cdef ndarray obj "arrayObject_obj" | |
cdef int pos | |
algorithm_name = state[0] | |
if algorithm_name != 'MT19937': | |
raise ValueError("algorithm must be 'MT19937'") | |
key, pos = state[1:3] | |
if len(state) == 3: | |
has_gauss = 0 | |
cached_gaussian = 0.0 | |
else: | |
has_gauss, cached_gaussian = state[3:5] | |
try: | |
obj = <ndarray>PyArray_ContiguousFromObject(key, NPY_ULONG, 1, 1) | |
except TypeError: | |
# compatibility -- could be an older pickle | |
obj = <ndarray>PyArray_ContiguousFromObject(key, NPY_LONG, 1, 1) | |
if PyArray_DIM(obj, 0) != 624: | |
raise ValueError("state must be 624 longs") | |
memcpy(<void*>(self.internal_state.key), <void*>PyArray_DATA(obj), 624*sizeof(long)) | |
self.internal_state.pos = pos | |
self.internal_state.has_gauss = has_gauss | |
self.internal_state.gauss = cached_gaussian | |
# Pickling support: | |
def __getstate__(self): | |
return self.get_state() | |
def __setstate__(self, state): | |
self.set_state(state) | |
def __reduce__(self): | |
return (np.random.__RandomState_ctor, (), self.get_state()) | |
# Basic distributions: | |
def random_sample(self, size=None): | |
""" | |
random_sample(size=None) | |
Return random floats in the half-open interval [0.0, 1.0). | |
Results are from the "continuous uniform" distribution over the | |
stated interval. To sample :math:`Unif[a, b), b > a` multiply | |
the output of `random_sample` by `(b-a)` and add `a`:: | |
(b - a) * random_sample() + a | |
Parameters | |
---------- | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : float or ndarray of floats | |
Array of random floats of shape `size` (unless ``size=None``, in which | |
case a single float is returned). | |
Examples | |
-------- | |
>>> np.random.random_sample() | |
0.47108547995356098 | |
>>> type(np.random.random_sample()) | |
<type 'float'> | |
>>> np.random.random_sample((5,)) | |
array([ 0.30220482, 0.86820401, 0.1654503 , 0.11659149, 0.54323428]) | |
Three-by-two array of random numbers from [-5, 0): | |
>>> 5 * np.random.random_sample((3, 2)) - 5 | |
array([[-3.99149989, -0.52338984], | |
[-2.99091858, -0.79479508], | |
[-1.23204345, -1.75224494]]) | |
""" | |
return cont0_array(self.internal_state, rk_double, size, self.lock) | |
def tomaxint(self, size=None): | |
""" | |
tomaxint(size=None) | |
Random integers between 0 and ``sys.maxint``, inclusive. | |
Return a sample of uniformly distributed random integers in the interval | |
[0, ``sys.maxint``]. | |
Parameters | |
---------- | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray | |
Drawn samples, with shape `size`. | |
See Also | |
-------- | |
randint : Uniform sampling over a given half-open interval of integers. | |
random_integers : Uniform sampling over a given closed interval of | |
integers. | |
Examples | |
-------- | |
>>> RS = np.random.mtrand.RandomState() # need a RandomState object | |
>>> RS.tomaxint((2,2,2)) | |
array([[[1170048599, 1600360186], | |
[ 739731006, 1947757578]], | |
[[1871712945, 752307660], | |
[1601631370, 1479324245]]]) | |
>>> import sys | |
>>> sys.maxint | |
2147483647 | |
>>> RS.tomaxint((2,2,2)) < sys.maxint | |
array([[[ True, True], | |
[ True, True]], | |
[[ True, True], | |
[ True, True]]], dtype=bool) | |
""" | |
return disc0_array(self.internal_state, rk_long, size, self.lock) | |
def randint(self, low, high=None, size=None): | |
""" | |
randint(low, high=None, size=None) | |
Return random integers from `low` (inclusive) to `high` (exclusive). | |
Return random integers from the "discrete uniform" distribution in the | |
"half-open" interval [`low`, `high`). If `high` is None (the default), | |
then results are from [0, `low`). | |
Parameters | |
---------- | |
low : int | |
Lowest (signed) integer to be drawn from the distribution (unless | |
``high=None``, in which case this parameter is the *highest* such | |
integer). | |
high : int, optional | |
If provided, one above the largest (signed) integer to be drawn | |
from the distribution (see above for behavior if ``high=None``). | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : int or ndarray of ints | |
`size`-shaped array of random integers from the appropriate | |
distribution, or a single such random int if `size` not provided. | |
See Also | |
-------- | |
random.random_integers : similar to `randint`, only for the closed | |
interval [`low`, `high`], and 1 is the lowest value if `high` is | |
omitted. In particular, this other one is the one to use to generate | |
uniformly distributed discrete non-integers. | |
Examples | |
-------- | |
>>> np.random.randint(2, size=10) | |
array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0]) | |
>>> np.random.randint(1, size=10) | |
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) | |
Generate a 2 x 4 array of ints between 0 and 4, inclusive: | |
>>> np.random.randint(5, size=(2, 4)) | |
array([[4, 0, 2, 1], | |
[3, 2, 2, 0]]) | |
""" | |
cdef long lo, hi, rv | |
cdef unsigned long diff | |
cdef long *array_data | |
cdef ndarray array "arrayObject" | |
cdef npy_intp length | |
cdef npy_intp i | |
if high is None: | |
lo = 0 | |
hi = low | |
else: | |
lo = low | |
hi = high | |
if lo >= hi : | |
raise ValueError("low >= high") | |
diff = <unsigned long>hi - <unsigned long>lo - 1UL | |
if size is None: | |
rv = lo + <long>rk_interval(diff, self. internal_state) | |
return rv | |
else: | |
array = <ndarray>np.empty(size, int) | |
length = PyArray_SIZE(array) | |
array_data = <long *>PyArray_DATA(array) | |
with self.lock, nogil: | |
for i from 0 <= i < length: | |
rv = lo + <long>rk_interval(diff, self. internal_state) | |
array_data[i] = rv | |
return array | |
def bytes(self, npy_intp length): | |
""" | |
bytes(length) | |
Return random bytes. | |
Parameters | |
---------- | |
length : int | |
Number of random bytes. | |
Returns | |
------- | |
out : str | |
String of length `length`. | |
Examples | |
-------- | |
>>> np.random.bytes(10) | |
' eh\\x85\\x022SZ\\xbf\\xa4' #random | |
""" | |
cdef void *bytes | |
bytestring = empty_py_bytes(length, &bytes) | |
with self.lock, nogil: | |
rk_fill(bytes, length, self.internal_state) | |
return bytestring | |
def choice(self, a, size=None, replace=True, p=None): | |
""" | |
choice(a, size=None, replace=True, p=None) | |
Generates a random sample from a given 1-D array | |
.. versionadded:: 1.7.0 | |
Parameters | |
----------- | |
a : 1-D array-like or int | |
If an ndarray, a random sample is generated from its elements. | |
If an int, the random sample is generated as if a was np.arange(n) | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
replace : boolean, optional | |
Whether the sample is with or without replacement | |
p : 1-D array-like, optional | |
The probabilities associated with each entry in a. | |
If not given the sample assumes a uniform distribution over all | |
entries in a. | |
Returns | |
-------- | |
samples : 1-D ndarray, shape (size,) | |
The generated random samples | |
Raises | |
------- | |
ValueError | |
If a is an int and less than zero, if a or p are not 1-dimensional, | |
if a is an array-like of size 0, if p is not a vector of | |
probabilities, if a and p have different lengths, or if | |
replace=False and the sample size is greater than the population | |
size | |
See Also | |
--------- | |
randint, shuffle, permutation | |
Examples | |
--------- | |
Generate a uniform random sample from np.arange(5) of size 3: | |
>>> np.random.choice(5, 3) | |
array([0, 3, 4]) | |
>>> #This is equivalent to np.random.randint(0,5,3) | |
Generate a non-uniform random sample from np.arange(5) of size 3: | |
>>> np.random.choice(5, 3, p=[0.1, 0, 0.3, 0.6, 0]) | |
array([3, 3, 0]) | |
Generate a uniform random sample from np.arange(5) of size 3 without | |
replacement: | |
>>> np.random.choice(5, 3, replace=False) | |
array([3,1,0]) | |
>>> #This is equivalent to np.random.permutation(np.arange(5))[:3] | |
Generate a non-uniform random sample from np.arange(5) of size | |
3 without replacement: | |
>>> np.random.choice(5, 3, replace=False, p=[0.1, 0, 0.3, 0.6, 0]) | |
array([2, 3, 0]) | |
Any of the above can be repeated with an arbitrary array-like | |
instead of just integers. For instance: | |
>>> aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher'] | |
>>> np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3]) | |
array(['pooh', 'pooh', 'pooh', 'Christopher', 'piglet'], | |
dtype='|S11') | |
""" | |
# Format and Verify input | |
a = np.array(a, copy=False) | |
if a.ndim == 0: | |
try: | |
# __index__ must return an integer by python rules. | |
pop_size = operator.index(a.item()) | |
except TypeError: | |
raise ValueError("a must be 1-dimensional or an integer") | |
if pop_size <= 0: | |
raise ValueError("a must be greater than 0") | |
elif a.ndim != 1: | |
raise ValueError("a must be 1-dimensional") | |
else: | |
pop_size = a.shape[0] | |
if pop_size is 0: | |
raise ValueError("a must be non-empty") | |
if p is not None: | |
d = len(p) | |
p = <ndarray>PyArray_ContiguousFromObject(p, NPY_DOUBLE, 1, 1) | |
pix = <double*>PyArray_DATA(p) | |
if p.ndim != 1: | |
raise ValueError("p must be 1-dimensional") | |
if p.size != pop_size: | |
raise ValueError("a and p must have same size") | |
if np.logical_or.reduce(p < 0): | |
raise ValueError("probabilities are not non-negative") | |
if abs(kahan_sum(pix, d) - 1.) > 1e-8: | |
raise ValueError("probabilities do not sum to 1") | |
shape = size | |
if shape is not None: | |
size = np.prod(shape, dtype=np.intp) | |
else: | |
size = 1 | |
# Actual sampling | |
if replace: | |
if p is not None: | |
cdf = p.cumsum() | |
cdf /= cdf[-1] | |
uniform_samples = self.random_sample(shape) | |
idx = cdf.searchsorted(uniform_samples, side='right') | |
idx = np.array(idx, copy=False) # searchsorted returns a scalar | |
else: | |
idx = self.randint(0, pop_size, size=shape) | |
else: | |
if size > pop_size: | |
raise ValueError("Cannot take a larger sample than " | |
"population when 'replace=False'") | |
if p is not None: | |
if np.count_nonzero(p > 0) < size: | |
raise ValueError("Fewer non-zero entries in p than size") | |
n_uniq = 0 | |
p = p.copy() | |
found = np.zeros(shape, dtype=np.int) | |
flat_found = found.ravel() | |
while n_uniq < size: | |
x = self.rand(size - n_uniq) | |
if n_uniq > 0: | |
p[flat_found[0:n_uniq]] = 0 | |
cdf = np.cumsum(p) | |
cdf /= cdf[-1] | |
new = cdf.searchsorted(x, side='right') | |
_, unique_indices = np.unique(new, return_index=True) | |
unique_indices.sort() | |
new = new.take(unique_indices) | |
flat_found[n_uniq:n_uniq + new.size] = new | |
n_uniq += new.size | |
idx = found | |
else: | |
idx = self.permutation(pop_size)[:size] | |
if shape is not None: | |
idx.shape = shape | |
if shape is None and isinstance(idx, np.ndarray): | |
# In most cases a scalar will have been made an array | |
idx = idx.item(0) | |
#Use samples as indices for a if a is array-like | |
if a.ndim == 0: | |
return idx | |
if shape is not None and idx.ndim == 0: | |
# If size == () then the user requested a 0-d array as opposed to | |
# a scalar object when size is None. However a[idx] is always a | |
# scalar and not an array. So this makes sure the result is an | |
# array, taking into account that np.array(item) may not work | |
# for object arrays. | |
res = np.empty((), dtype=a.dtype) | |
res[()] = a[idx] | |
return res | |
return a[idx] | |
def uniform(self, low=0.0, high=1.0, size=None): | |
""" | |
uniform(low=0.0, high=1.0, size=None) | |
Draw samples from a uniform distribution. | |
Samples are uniformly distributed over the half-open interval | |
``[low, high)`` (includes low, but excludes high). In other words, | |
any value within the given interval is equally likely to be drawn | |
by `uniform`. | |
Parameters | |
---------- | |
low : float, optional | |
Lower boundary of the output interval. All values generated will be | |
greater than or equal to low. The default value is 0. | |
high : float | |
Upper boundary of the output interval. All values generated will be | |
less than high. The default value is 1.0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray | |
Drawn samples, with shape `size`. | |
See Also | |
-------- | |
randint : Discrete uniform distribution, yielding integers. | |
random_integers : Discrete uniform distribution over the closed | |
interval ``[low, high]``. | |
random_sample : Floats uniformly distributed over ``[0, 1)``. | |
random : Alias for `random_sample`. | |
rand : Convenience function that accepts dimensions as input, e.g., | |
``rand(2,2)`` would generate a 2-by-2 array of floats, | |
uniformly distributed over ``[0, 1)``. | |
Notes | |
----- | |
The probability density function of the uniform distribution is | |
.. math:: p(x) = \\frac{1}{b - a} | |
anywhere within the interval ``[a, b)``, and zero elsewhere. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> s = np.random.uniform(-1,0,1000) | |
All values are within the given interval: | |
>>> np.all(s >= -1) | |
True | |
>>> np.all(s < 0) | |
True | |
Display the histogram of the samples, along with the | |
probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 15, normed=True) | |
>>> plt.plot(bins, np.ones_like(bins), linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray olow, ohigh, odiff | |
cdef double flow, fhigh | |
cdef object temp | |
flow = PyFloat_AsDouble(low) | |
fhigh = PyFloat_AsDouble(high) | |
if not PyErr_Occurred(): | |
return cont2_array_sc(self.internal_state, rk_uniform, size, flow, | |
fhigh-flow, self.lock) | |
PyErr_Clear() | |
olow = <ndarray>PyArray_FROM_OTF(low, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
ohigh = <ndarray>PyArray_FROM_OTF(high, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
temp = np.subtract(ohigh, olow) | |
Py_INCREF(temp) # needed to get around Pyrex's automatic reference-counting | |
# rules because EnsureArray steals a reference | |
odiff = <ndarray>PyArray_EnsureArray(temp) | |
return cont2_array(self.internal_state, rk_uniform, size, olow, odiff, | |
self.lock) | |
def rand(self, *args): | |
""" | |
rand(d0, d1, ..., dn) | |
Random values in a given shape. | |
Create an array of the given shape and propagate it with | |
random samples from a uniform distribution | |
over ``[0, 1)``. | |
Parameters | |
---------- | |
d0, d1, ..., dn : int, optional | |
The dimensions of the returned array, should all be positive. | |
If no argument is given a single Python float is returned. | |
Returns | |
------- | |
out : ndarray, shape ``(d0, d1, ..., dn)`` | |
Random values. | |
See Also | |
-------- | |
random | |
Notes | |
----- | |
This is a convenience function. If you want an interface that | |
takes a shape-tuple as the first argument, refer to | |
np.random.random_sample . | |
Examples | |
-------- | |
>>> np.random.rand(3,2) | |
array([[ 0.14022471, 0.96360618], #random | |
[ 0.37601032, 0.25528411], #random | |
[ 0.49313049, 0.94909878]]) #random | |
""" | |
if len(args) == 0: | |
return self.random_sample() | |
else: | |
return self.random_sample(size=args) | |
def randn(self, *args): | |
""" | |
randn(d0, d1, ..., dn) | |
Return a sample (or samples) from the "standard normal" distribution. | |
If positive, int_like or int-convertible arguments are provided, | |
`randn` generates an array of shape ``(d0, d1, ..., dn)``, filled | |
with random floats sampled from a univariate "normal" (Gaussian) | |
distribution of mean 0 and variance 1 (if any of the :math:`d_i` are | |
floats, they are first converted to integers by truncation). A single | |
float randomly sampled from the distribution is returned if no | |
argument is provided. | |
This is a convenience function. If you want an interface that takes a | |
tuple as the first argument, use `numpy.random.standard_normal` instead. | |
Parameters | |
---------- | |
d0, d1, ..., dn : int, optional | |
The dimensions of the returned array, should be all positive. | |
If no argument is given a single Python float is returned. | |
Returns | |
------- | |
Z : ndarray or float | |
A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from | |
the standard normal distribution, or a single such float if | |
no parameters were supplied. | |
See Also | |
-------- | |
random.standard_normal : Similar, but takes a tuple as its argument. | |
Notes | |
----- | |
For random samples from :math:`N(\\mu, \\sigma^2)`, use: | |
``sigma * np.random.randn(...) + mu`` | |
Examples | |
-------- | |
>>> np.random.randn() | |
2.1923875335537315 #random | |
Two-by-four array of samples from N(3, 6.25): | |
>>> 2.5 * np.random.randn(2, 4) + 3 | |
array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], #random | |
[ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) #random | |
""" | |
if len(args) == 0: | |
return self.standard_normal() | |
else: | |
return self.standard_normal(args) | |
def random_integers(self, low, high=None, size=None): | |
""" | |
random_integers(low, high=None, size=None) | |
Return random integers between `low` and `high`, inclusive. | |
Return random integers from the "discrete uniform" distribution in the | |
closed interval [`low`, `high`]. If `high` is None (the default), | |
then results are from [1, `low`]. | |
Parameters | |
---------- | |
low : int | |
Lowest (signed) integer to be drawn from the distribution (unless | |
``high=None``, in which case this parameter is the *highest* such | |
integer). | |
high : int, optional | |
If provided, the largest (signed) integer to be drawn from the | |
distribution (see above for behavior if ``high=None``). | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : int or ndarray of ints | |
`size`-shaped array of random integers from the appropriate | |
distribution, or a single such random int if `size` not provided. | |
See Also | |
-------- | |
random.randint : Similar to `random_integers`, only for the half-open | |
interval [`low`, `high`), and 0 is the lowest value if `high` is | |
omitted. | |
Notes | |
----- | |
To sample from N evenly spaced floating-point numbers between a and b, | |
use:: | |
a + (b - a) * (np.random.random_integers(N) - 1) / (N - 1.) | |
Examples | |
-------- | |
>>> np.random.random_integers(5) | |
4 | |
>>> type(np.random.random_integers(5)) | |
<type 'int'> | |
>>> np.random.random_integers(5, size=(3.,2.)) | |
array([[5, 4], | |
[3, 3], | |
[4, 5]]) | |
Choose five random numbers from the set of five evenly-spaced | |
numbers between 0 and 2.5, inclusive (*i.e.*, from the set | |
:math:`{0, 5/8, 10/8, 15/8, 20/8}`): | |
>>> 2.5 * (np.random.random_integers(5, size=(5,)) - 1) / 4. | |
array([ 0.625, 1.25 , 0.625, 0.625, 2.5 ]) | |
Roll two six sided dice 1000 times and sum the results: | |
>>> d1 = np.random.random_integers(1, 6, 1000) | |
>>> d2 = np.random.random_integers(1, 6, 1000) | |
>>> dsums = d1 + d2 | |
Display results as a histogram: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(dsums, 11, normed=True) | |
>>> plt.show() | |
""" | |
if high is None: | |
high = low | |
low = 1 | |
return self.randint(low, high+1, size) | |
# Complicated, continuous distributions: | |
def standard_normal(self, size=None): | |
""" | |
standard_normal(size=None) | |
Returns samples from a Standard Normal distribution (mean=0, stdev=1). | |
Parameters | |
---------- | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : float or ndarray | |
Drawn samples. | |
Examples | |
-------- | |
>>> s = np.random.standard_normal(8000) | |
>>> s | |
array([ 0.6888893 , 0.78096262, -0.89086505, ..., 0.49876311, #random | |
-0.38672696, -0.4685006 ]) #random | |
>>> s.shape | |
(8000,) | |
>>> s = np.random.standard_normal(size=(3, 4, 2)) | |
>>> s.shape | |
(3, 4, 2) | |
""" | |
return cont0_array(self.internal_state, rk_gauss, size, self.lock) | |
def normal(self, loc=0.0, scale=1.0, size=None): | |
""" | |
normal(loc=0.0, scale=1.0, size=None) | |
Draw random samples from a normal (Gaussian) distribution. | |
The probability density function of the normal distribution, first | |
derived by De Moivre and 200 years later by both Gauss and Laplace | |
independently [2]_, is often called the bell curve because of | |
its characteristic shape (see the example below). | |
The normal distributions occurs often in nature. For example, it | |
describes the commonly occurring distribution of samples influenced | |
by a large number of tiny, random disturbances, each with its own | |
unique distribution [2]_. | |
Parameters | |
---------- | |
loc : float | |
Mean ("centre") of the distribution. | |
scale : float | |
Standard deviation (spread or "width") of the distribution. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
See Also | |
-------- | |
scipy.stats.distributions.norm : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Gaussian distribution is | |
.. math:: p(x) = \\frac{1}{\\sqrt{ 2 \\pi \\sigma^2 }} | |
e^{ - \\frac{ (x - \\mu)^2 } {2 \\sigma^2} }, | |
where :math:`\\mu` is the mean and :math:`\\sigma` the standard deviation. | |
The square of the standard deviation, :math:`\\sigma^2`, is called the | |
variance. | |
The function has its peak at the mean, and its "spread" increases with | |
the standard deviation (the function reaches 0.607 times its maximum at | |
:math:`x + \\sigma` and :math:`x - \\sigma` [2]_). This implies that | |
`numpy.random.normal` is more likely to return samples lying close to the | |
mean, rather than those far away. | |
References | |
---------- | |
.. [1] Wikipedia, "Normal distribution", | |
http://en.wikipedia.org/wiki/Normal_distribution | |
.. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability, Random | |
Variables and Random Signal Principles", 4th ed., 2001, | |
pp. 51, 51, 125. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> mu, sigma = 0, 0.1 # mean and standard deviation | |
>>> s = np.random.normal(mu, sigma, 1000) | |
Verify the mean and the variance: | |
>>> abs(mu - np.mean(s)) < 0.01 | |
True | |
>>> abs(sigma - np.std(s, ddof=1)) < 0.01 | |
True | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 30, normed=True) | |
>>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * | |
... np.exp( - (bins - mu)**2 / (2 * sigma**2) ), | |
... linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray oloc, oscale | |
cdef double floc, fscale | |
floc = PyFloat_AsDouble(loc) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_normal, size, floc, | |
fscale, self.lock) | |
PyErr_Clear() | |
oloc = <ndarray>PyArray_FROM_OTF(loc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = <ndarray>PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0)): | |
raise ValueError("scale <= 0") | |
return cont2_array(self.internal_state, rk_normal, size, oloc, oscale, | |
self.lock) | |
def beta(self, a, b, size=None): | |
""" | |
beta(a, b, size=None) | |
The Beta distribution over ``[0, 1]``. | |
The Beta distribution is a special case of the Dirichlet distribution, | |
and is related to the Gamma distribution. It has the probability | |
distribution function | |
.. math:: f(x; a,b) = \\frac{1}{B(\\alpha, \\beta)} x^{\\alpha - 1} | |
(1 - x)^{\\beta - 1}, | |
where the normalisation, B, is the beta function, | |
.. math:: B(\\alpha, \\beta) = \\int_0^1 t^{\\alpha - 1} | |
(1 - t)^{\\beta - 1} dt. | |
It is often seen in Bayesian inference and order statistics. | |
Parameters | |
---------- | |
a : float | |
Alpha, non-negative. | |
b : float | |
Beta, non-negative. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray | |
Array of the given shape, containing values drawn from a | |
Beta distribution. | |
""" | |
cdef ndarray oa, ob | |
cdef double fa, fb | |
fa = PyFloat_AsDouble(a) | |
fb = PyFloat_AsDouble(b) | |
if not PyErr_Occurred(): | |
if fa <= 0: | |
raise ValueError("a <= 0") | |
if fb <= 0: | |
raise ValueError("b <= 0") | |
return cont2_array_sc(self.internal_state, rk_beta, size, fa, fb, | |
self.lock) | |
PyErr_Clear() | |
oa = <ndarray>PyArray_FROM_OTF(a, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
ob = <ndarray>PyArray_FROM_OTF(b, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oa, 0)): | |
raise ValueError("a <= 0") | |
if np.any(np.less_equal(ob, 0)): | |
raise ValueError("b <= 0") | |
return cont2_array(self.internal_state, rk_beta, size, oa, ob, | |
self.lock) | |
def exponential(self, scale=1.0, size=None): | |
""" | |
exponential(scale=1.0, size=None) | |
Exponential distribution. | |
Its probability density function is | |
.. math:: f(x; \\frac{1}{\\beta}) = \\frac{1}{\\beta} \\exp(-\\frac{x}{\\beta}), | |
for ``x > 0`` and 0 elsewhere. :math:`\\beta` is the scale parameter, | |
which is the inverse of the rate parameter :math:`\\lambda = 1/\\beta`. | |
The rate parameter is an alternative, widely used parameterization | |
of the exponential distribution [3]_. | |
The exponential distribution is a continuous analogue of the | |
geometric distribution. It describes many common situations, such as | |
the size of raindrops measured over many rainstorms [1]_, or the time | |
between page requests to Wikipedia [2]_. | |
Parameters | |
---------- | |
scale : float | |
The scale parameter, :math:`\\beta = 1/\\lambda`. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
References | |
---------- | |
.. [1] Peyton Z. Peebles Jr., "Probability, Random Variables and | |
Random Signal Principles", 4th ed, 2001, p. 57. | |
.. [2] "Poisson Process", Wikipedia, | |
http://en.wikipedia.org/wiki/Poisson_process | |
.. [3] "Exponential Distribution, Wikipedia, | |
http://en.wikipedia.org/wiki/Exponential_distribution | |
""" | |
cdef ndarray oscale | |
cdef double fscale | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont1_array_sc(self.internal_state, rk_exponential, size, | |
fscale, self.lock) | |
PyErr_Clear() | |
oscale = <ndarray> PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0") | |
return cont1_array(self.internal_state, rk_exponential, size, oscale, | |
self.lock) | |
def standard_exponential(self, size=None): | |
""" | |
standard_exponential(size=None) | |
Draw samples from the standard exponential distribution. | |
`standard_exponential` is identical to the exponential distribution | |
with a scale parameter of 1. | |
Parameters | |
---------- | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : float or ndarray | |
Drawn samples. | |
Examples | |
-------- | |
Output a 3x8000 array: | |
>>> n = np.random.standard_exponential((3, 8000)) | |
""" | |
return cont0_array(self.internal_state, rk_standard_exponential, size, | |
self.lock) | |
def standard_gamma(self, shape, size=None): | |
""" | |
standard_gamma(shape, size=None) | |
Draw samples from a Standard Gamma distribution. | |
Samples are drawn from a Gamma distribution with specified parameters, | |
shape (sometimes designated "k") and scale=1. | |
Parameters | |
---------- | |
shape : float | |
Parameter, should be > 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
The drawn samples. | |
See Also | |
-------- | |
scipy.stats.distributions.gamma : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Gamma distribution is | |
.. math:: p(x) = x^{k-1}\\frac{e^{-x/\\theta}}{\\theta^k\\Gamma(k)}, | |
where :math:`k` is the shape and :math:`\\theta` the scale, | |
and :math:`\\Gamma` is the Gamma function. | |
The Gamma distribution is often used to model the times to failure of | |
electronic components, and arises naturally in processes for which the | |
waiting times between Poisson distributed events are relevant. | |
References | |
---------- | |
.. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A | |
Wolfram Web Resource. | |
http://mathworld.wolfram.com/GammaDistribution.html | |
.. [2] Wikipedia, "Gamma-distribution", | |
http://en.wikipedia.org/wiki/Gamma-distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> shape, scale = 2., 1. # mean and width | |
>>> s = np.random.standard_gamma(shape, 1000000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> import scipy.special as sps | |
>>> count, bins, ignored = plt.hist(s, 50, normed=True) | |
>>> y = bins**(shape-1) * ((np.exp(-bins/scale))/ \\ | |
... (sps.gamma(shape) * scale**shape)) | |
>>> plt.plot(bins, y, linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray oshape | |
cdef double fshape | |
fshape = PyFloat_AsDouble(shape) | |
if not PyErr_Occurred(): | |
if fshape <= 0: | |
raise ValueError("shape <= 0") | |
return cont1_array_sc(self.internal_state, rk_standard_gamma, size, fshape, self.lock) | |
PyErr_Clear() | |
oshape = <ndarray> PyArray_FROM_OTF(shape, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oshape, 0.0)): | |
raise ValueError("shape <= 0") | |
return cont1_array(self.internal_state, rk_standard_gamma, size, | |
oshape, self.lock) | |
def gamma(self, shape, scale=1.0, size=None): | |
""" | |
gamma(shape, scale=1.0, size=None) | |
Draw samples from a Gamma distribution. | |
Samples are drawn from a Gamma distribution with specified parameters, | |
`shape` (sometimes designated "k") and `scale` (sometimes designated | |
"theta"), where both parameters are > 0. | |
Parameters | |
---------- | |
shape : scalar > 0 | |
The shape of the gamma distribution. | |
scale : scalar > 0, optional | |
The scale of the gamma distribution. Default is equal to 1. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray, float | |
Returns one sample unless `size` parameter is specified. | |
See Also | |
-------- | |
scipy.stats.distributions.gamma : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Gamma distribution is | |
.. math:: p(x) = x^{k-1}\\frac{e^{-x/\\theta}}{\\theta^k\\Gamma(k)}, | |
where :math:`k` is the shape and :math:`\\theta` the scale, | |
and :math:`\\Gamma` is the Gamma function. | |
The Gamma distribution is often used to model the times to failure of | |
electronic components, and arises naturally in processes for which the | |
waiting times between Poisson distributed events are relevant. | |
References | |
---------- | |
.. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A | |
Wolfram Web Resource. | |
http://mathworld.wolfram.com/GammaDistribution.html | |
.. [2] Wikipedia, "Gamma-distribution", | |
http://en.wikipedia.org/wiki/Gamma-distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> shape, scale = 2., 2. # mean and dispersion | |
>>> s = np.random.gamma(shape, scale, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> import scipy.special as sps | |
>>> count, bins, ignored = plt.hist(s, 50, normed=True) | |
>>> y = bins**(shape-1)*(np.exp(-bins/scale) / | |
... (sps.gamma(shape)*scale**shape)) | |
>>> plt.plot(bins, y, linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray oshape, oscale | |
cdef double fshape, fscale | |
fshape = PyFloat_AsDouble(shape) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fshape <= 0: | |
raise ValueError("shape <= 0") | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_gamma, size, fshape, | |
fscale, self.lock) | |
PyErr_Clear() | |
oshape = <ndarray>PyArray_FROM_OTF(shape, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = <ndarray>PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oshape, 0.0)): | |
raise ValueError("shape <= 0") | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0") | |
return cont2_array(self.internal_state, rk_gamma, size, oshape, oscale, | |
self.lock) | |
def f(self, dfnum, dfden, size=None): | |
""" | |
f(dfnum, dfden, size=None) | |
Draw samples from a F distribution. | |
Samples are drawn from an F distribution with specified parameters, | |
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of freedom | |
in denominator), where both parameters should be greater than zero. | |
The random variate of the F distribution (also known as the | |
Fisher distribution) is a continuous probability distribution | |
that arises in ANOVA tests, and is the ratio of two chi-square | |
variates. | |
Parameters | |
---------- | |
dfnum : float | |
Degrees of freedom in numerator. Should be greater than zero. | |
dfden : float | |
Degrees of freedom in denominator. Should be greater than zero. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : {ndarray, scalar} | |
Samples from the Fisher distribution. | |
See Also | |
-------- | |
scipy.stats.distributions.f : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The F statistic is used to compare in-group variances to between-group | |
variances. Calculating the distribution depends on the sampling, and | |
so it is a function of the respective degrees of freedom in the | |
problem. The variable `dfnum` is the number of samples minus one, the | |
between-groups degrees of freedom, while `dfden` is the within-groups | |
degrees of freedom, the sum of the number of samples in each group | |
minus the number of groups. | |
References | |
---------- | |
.. [1] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, | |
Fifth Edition, 2002. | |
.. [2] Wikipedia, "F-distribution", | |
http://en.wikipedia.org/wiki/F-distribution | |
Examples | |
-------- | |
An example from Glantz[1], pp 47-40. | |
Two groups, children of diabetics (25 people) and children from people | |
without diabetes (25 controls). Fasting blood glucose was measured, | |
case group had a mean value of 86.1, controls had a mean value of | |
82.2. Standard deviations were 2.09 and 2.49 respectively. Are these | |
data consistent with the null hypothesis that the parents diabetic | |
status does not affect their children's blood glucose levels? | |
Calculating the F statistic from the data gives a value of 36.01. | |
Draw samples from the distribution: | |
>>> dfnum = 1. # between group degrees of freedom | |
>>> dfden = 48. # within groups degrees of freedom | |
>>> s = np.random.f(dfnum, dfden, 1000) | |
The lower bound for the top 1% of the samples is : | |
>>> sort(s)[-10] | |
7.61988120985 | |
So there is about a 1% chance that the F statistic will exceed 7.62, | |
the measured value is 36, so the null hypothesis is rejected at the 1% | |
level. | |
""" | |
cdef ndarray odfnum, odfden | |
cdef double fdfnum, fdfden | |
fdfnum = PyFloat_AsDouble(dfnum) | |
fdfden = PyFloat_AsDouble(dfden) | |
if not PyErr_Occurred(): | |
if fdfnum <= 0: | |
raise ValueError("shape <= 0") | |
if fdfden <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_f, size, fdfnum, | |
fdfden, self.lock) | |
PyErr_Clear() | |
odfnum = <ndarray>PyArray_FROM_OTF(dfnum, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
odfden = <ndarray>PyArray_FROM_OTF(dfden, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(odfnum, 0.0)): | |
raise ValueError("dfnum <= 0") | |
if np.any(np.less_equal(odfden, 0.0)): | |
raise ValueError("dfden <= 0") | |
return cont2_array(self.internal_state, rk_f, size, odfnum, odfden, | |
self.lock) | |
def noncentral_f(self, dfnum, dfden, nonc, size=None): | |
""" | |
noncentral_f(dfnum, dfden, nonc, size=None) | |
Draw samples from the noncentral F distribution. | |
Samples are drawn from an F distribution with specified parameters, | |
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of | |
freedom in denominator), where both parameters > 1. | |
`nonc` is the non-centrality parameter. | |
Parameters | |
---------- | |
dfnum : int | |
Parameter, should be > 1. | |
dfden : int | |
Parameter, should be > 1. | |
nonc : float | |
Parameter, should be >= 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : scalar or ndarray | |
Drawn samples. | |
Notes | |
----- | |
When calculating the power of an experiment (power = probability of | |
rejecting the null hypothesis when a specific alternative is true) the | |
non-central F statistic becomes important. When the null hypothesis is | |
true, the F statistic follows a central F distribution. When the null | |
hypothesis is not true, then it follows a non-central F statistic. | |
References | |
---------- | |
Weisstein, Eric W. "Noncentral F-Distribution." From MathWorld--A Wolfram | |
Web Resource. http://mathworld.wolfram.com/NoncentralF-Distribution.html | |
Wikipedia, "Noncentral F distribution", | |
http://en.wikipedia.org/wiki/Noncentral_F-distribution | |
Examples | |
-------- | |
In a study, testing for a specific alternative to the null hypothesis | |
requires use of the Noncentral F distribution. We need to calculate the | |
area in the tail of the distribution that exceeds the value of the F | |
distribution for the null hypothesis. We'll plot the two probability | |
distributions for comparison. | |
>>> dfnum = 3 # between group deg of freedom | |
>>> dfden = 20 # within groups degrees of freedom | |
>>> nonc = 3.0 | |
>>> nc_vals = np.random.noncentral_f(dfnum, dfden, nonc, 1000000) | |
>>> NF = np.histogram(nc_vals, bins=50, normed=True) | |
>>> c_vals = np.random.f(dfnum, dfden, 1000000) | |
>>> F = np.histogram(c_vals, bins=50, normed=True) | |
>>> plt.plot(F[1][1:], F[0]) | |
>>> plt.plot(NF[1][1:], NF[0]) | |
>>> plt.show() | |
""" | |
cdef ndarray odfnum, odfden, ononc | |
cdef double fdfnum, fdfden, fnonc | |
fdfnum = PyFloat_AsDouble(dfnum) | |
fdfden = PyFloat_AsDouble(dfden) | |
fnonc = PyFloat_AsDouble(nonc) | |
if not PyErr_Occurred(): | |
if fdfnum <= 1: | |
raise ValueError("dfnum <= 1") | |
if fdfden <= 0: | |
raise ValueError("dfden <= 0") | |
if fnonc < 0: | |
raise ValueError("nonc < 0") | |
return cont3_array_sc(self.internal_state, rk_noncentral_f, size, | |
fdfnum, fdfden, fnonc, self.lock) | |
PyErr_Clear() | |
odfnum = <ndarray>PyArray_FROM_OTF(dfnum, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
odfden = <ndarray>PyArray_FROM_OTF(dfden, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
ononc = <ndarray>PyArray_FROM_OTF(nonc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(odfnum, 1.0)): | |
raise ValueError("dfnum <= 1") | |
if np.any(np.less_equal(odfden, 0.0)): | |
raise ValueError("dfden <= 0") | |
if np.any(np.less(ononc, 0.0)): | |
raise ValueError("nonc < 0") | |
return cont3_array(self.internal_state, rk_noncentral_f, size, odfnum, | |
odfden, ononc, self.lock) | |
def chisquare(self, df, size=None): | |
""" | |
chisquare(df, size=None) | |
Draw samples from a chi-square distribution. | |
When `df` independent random variables, each with standard normal | |
distributions (mean 0, variance 1), are squared and summed, the | |
resulting distribution is chi-square (see Notes). This distribution | |
is often used in hypothesis testing. | |
Parameters | |
---------- | |
df : int | |
Number of degrees of freedom. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
output : ndarray | |
Samples drawn from the distribution, packed in a `size`-shaped | |
array. | |
Raises | |
------ | |
ValueError | |
When `df` <= 0 or when an inappropriate `size` (e.g. ``size=-1``) | |
is given. | |
Notes | |
----- | |
The variable obtained by summing the squares of `df` independent, | |
standard normally distributed random variables: | |
.. math:: Q = \\sum_{i=0}^{\\mathtt{df}} X^2_i | |
is chi-square distributed, denoted | |
.. math:: Q \\sim \\chi^2_k. | |
The probability density function of the chi-squared distribution is | |
.. math:: p(x) = \\frac{(1/2)^{k/2}}{\\Gamma(k/2)} | |
x^{k/2 - 1} e^{-x/2}, | |
where :math:`\\Gamma` is the gamma function, | |
.. math:: \\Gamma(x) = \\int_0^{-\\infty} t^{x - 1} e^{-t} dt. | |
References | |
---------- | |
`NIST/SEMATECH e-Handbook of Statistical Methods | |
<http://www.itl.nist.gov/div898/handbook/eda/section3/eda3666.htm>`_ | |
Examples | |
-------- | |
>>> np.random.chisquare(2,4) | |
array([ 1.89920014, 9.00867716, 3.13710533, 5.62318272]) | |
""" | |
cdef ndarray odf | |
cdef double fdf | |
fdf = PyFloat_AsDouble(df) | |
if not PyErr_Occurred(): | |
if fdf <= 0: | |
raise ValueError("df <= 0") | |
return cont1_array_sc(self.internal_state, rk_chisquare, size, fdf, | |
self.lock) | |
PyErr_Clear() | |
odf = <ndarray>PyArray_FROM_OTF(df, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(odf, 0.0)): | |
raise ValueError("df <= 0") | |
return cont1_array(self.internal_state, rk_chisquare, size, odf, | |
self.lock) | |
def noncentral_chisquare(self, df, nonc, size=None): | |
""" | |
noncentral_chisquare(df, nonc, size=None) | |
Draw samples from a noncentral chi-square distribution. | |
The noncentral :math:`\\chi^2` distribution is a generalisation of | |
the :math:`\\chi^2` distribution. | |
Parameters | |
---------- | |
df : int | |
Degrees of freedom, should be >= 1. | |
nonc : float | |
Non-centrality, should be > 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Notes | |
----- | |
The probability density function for the noncentral Chi-square distribution | |
is | |
.. math:: P(x;df,nonc) = \\sum^{\\infty}_{i=0} | |
\\frac{e^{-nonc/2}(nonc/2)^{i}}{i!}P_{Y_{df+2i}}(x), | |
where :math:`Y_{q}` is the Chi-square with q degrees of freedom. | |
In Delhi (2007), it is noted that the noncentral chi-square is useful in | |
bombing and coverage problems, the probability of killing the point target | |
given by the noncentral chi-squared distribution. | |
References | |
---------- | |
.. [1] Delhi, M.S. Holla, "On a noncentral chi-square distribution in the | |
analysis of weapon systems effectiveness", Metrika, Volume 15, | |
Number 1 / December, 1970. | |
.. [2] Wikipedia, "Noncentral chi-square distribution" | |
http://en.wikipedia.org/wiki/Noncentral_chi-square_distribution | |
Examples | |
-------- | |
Draw values from the distribution and plot the histogram | |
>>> import matplotlib.pyplot as plt | |
>>> values = plt.hist(np.random.noncentral_chisquare(3, 20, 100000), | |
... bins=200, normed=True) | |
>>> plt.show() | |
Draw values from a noncentral chisquare with very small noncentrality, | |
and compare to a chisquare. | |
>>> plt.figure() | |
>>> values = plt.hist(np.random.noncentral_chisquare(3, .0000001, 100000), | |
... bins=np.arange(0., 25, .1), normed=True) | |
>>> values2 = plt.hist(np.random.chisquare(3, 100000), | |
... bins=np.arange(0., 25, .1), normed=True) | |
>>> plt.plot(values[1][0:-1], values[0]-values2[0], 'ob') | |
>>> plt.show() | |
Demonstrate how large values of non-centrality lead to a more symmetric | |
distribution. | |
>>> plt.figure() | |
>>> values = plt.hist(np.random.noncentral_chisquare(3, 20, 100000), | |
... bins=200, normed=True) | |
>>> plt.show() | |
""" | |
cdef ndarray odf, ononc | |
cdef double fdf, fnonc | |
fdf = PyFloat_AsDouble(df) | |
fnonc = PyFloat_AsDouble(nonc) | |
if not PyErr_Occurred(): | |
if fdf <= 1: | |
raise ValueError("df <= 0") | |
if fnonc <= 0: | |
raise ValueError("nonc <= 0") | |
return cont2_array_sc(self.internal_state, rk_noncentral_chisquare, | |
size, fdf, fnonc, self.lock) | |
PyErr_Clear() | |
odf = <ndarray>PyArray_FROM_OTF(df, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
ononc = <ndarray>PyArray_FROM_OTF(nonc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(odf, 0.0)): | |
raise ValueError("df <= 1") | |
if np.any(np.less_equal(ononc, 0.0)): | |
raise ValueError("nonc < 0") | |
return cont2_array(self.internal_state, rk_noncentral_chisquare, size, | |
odf, ononc, self.lock) | |
def standard_cauchy(self, size=None): | |
""" | |
standard_cauchy(size=None) | |
Standard Cauchy distribution with mode = 0. | |
Also known as the Lorentz distribution. | |
Parameters | |
---------- | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
The drawn samples. | |
Notes | |
----- | |
The probability density function for the full Cauchy distribution is | |
.. math:: P(x; x_0, \\gamma) = \\frac{1}{\\pi \\gamma \\bigl[ 1+ | |
(\\frac{x-x_0}{\\gamma})^2 \\bigr] } | |
and the Standard Cauchy distribution just sets :math:`x_0=0` and | |
:math:`\\gamma=1` | |
The Cauchy distribution arises in the solution to the driven harmonic | |
oscillator problem, and also describes spectral line broadening. It | |
also describes the distribution of values at which a line tilted at | |
a random angle will cut the x axis. | |
When studying hypothesis tests that assume normality, seeing how the | |
tests perform on data from a Cauchy distribution is a good indicator of | |
their sensitivity to a heavy-tailed distribution, since the Cauchy looks | |
very much like a Gaussian distribution, but with heavier tails. | |
References | |
---------- | |
.. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "Cauchy | |
Distribution", | |
http://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm | |
.. [2] Weisstein, Eric W. "Cauchy Distribution." From MathWorld--A | |
Wolfram Web Resource. | |
http://mathworld.wolfram.com/CauchyDistribution.html | |
.. [3] Wikipedia, "Cauchy distribution" | |
http://en.wikipedia.org/wiki/Cauchy_distribution | |
Examples | |
-------- | |
Draw samples and plot the distribution: | |
>>> s = np.random.standard_cauchy(1000000) | |
>>> s = s[(s>-25) & (s<25)] # truncate distribution so it plots well | |
>>> plt.hist(s, bins=100) | |
>>> plt.show() | |
""" | |
return cont0_array(self.internal_state, rk_standard_cauchy, size, | |
self.lock) | |
def standard_t(self, df, size=None): | |
""" | |
standard_t(df, size=None) | |
Standard Student's t distribution with df degrees of freedom. | |
A special case of the hyperbolic distribution. | |
As `df` gets large, the result resembles that of the standard normal | |
distribution (`standard_normal`). | |
Parameters | |
---------- | |
df : int | |
Degrees of freedom, should be > 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
Drawn samples. | |
Notes | |
----- | |
The probability density function for the t distribution is | |
.. math:: P(x, df) = \\frac{\\Gamma(\\frac{df+1}{2})}{\\sqrt{\\pi df} | |
\\Gamma(\\frac{df}{2})}\\Bigl( 1+\\frac{x^2}{df} \\Bigr)^{-(df+1)/2} | |
The t test is based on an assumption that the data come from a Normal | |
distribution. The t test provides a way to test whether the sample mean | |
(that is the mean calculated from the data) is a good estimate of the true | |
mean. | |
The derivation of the t-distribution was forst published in 1908 by William | |
Gisset while working for the Guinness Brewery in Dublin. Due to proprietary | |
issues, he had to publish under a pseudonym, and so he used the name | |
Student. | |
References | |
---------- | |
.. [1] Dalgaard, Peter, "Introductory Statistics With R", | |
Springer, 2002. | |
.. [2] Wikipedia, "Student's t-distribution" | |
http://en.wikipedia.org/wiki/Student's_t-distribution | |
Examples | |
-------- | |
From Dalgaard page 83 [1]_, suppose the daily energy intake for 11 | |
women in Kj is: | |
>>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \\ | |
... 7515, 8230, 8770]) | |
Does their energy intake deviate systematically from the recommended | |
value of 7725 kJ? | |
We have 10 degrees of freedom, so is the sample mean within 95% of the | |
recommended value? | |
>>> s = np.random.standard_t(10, size=100000) | |
>>> np.mean(intake) | |
6753.636363636364 | |
>>> intake.std(ddof=1) | |
1142.1232221373727 | |
Calculate the t statistic, setting the ddof parameter to the unbiased | |
value so the divisor in the standard deviation will be degrees of | |
freedom, N-1. | |
>>> t = (np.mean(intake)-7725)/(intake.std(ddof=1)/np.sqrt(len(intake))) | |
>>> import matplotlib.pyplot as plt | |
>>> h = plt.hist(s, bins=100, normed=True) | |
For a one-sided t-test, how far out in the distribution does the t | |
statistic appear? | |
>>> >>> np.sum(s<t) / float(len(s)) | |
0.0090699999999999999 #random | |
So the p-value is about 0.009, which says the null hypothesis has a | |
probability of about 99% of being true. | |
""" | |
cdef ndarray odf | |
cdef double fdf | |
fdf = PyFloat_AsDouble(df) | |
if not PyErr_Occurred(): | |
if fdf <= 0: | |
raise ValueError("df <= 0") | |
return cont1_array_sc(self.internal_state, rk_standard_t, size, | |
fdf, self.lock) | |
PyErr_Clear() | |
odf = <ndarray> PyArray_FROM_OTF(df, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(odf, 0.0)): | |
raise ValueError("df <= 0") | |
return cont1_array(self.internal_state, rk_standard_t, size, odf, | |
self.lock) | |
def vonmises(self, mu, kappa, size=None): | |
""" | |
vonmises(mu, kappa, size=None) | |
Draw samples from a von Mises distribution. | |
Samples are drawn from a von Mises distribution with specified mode | |
(mu) and dispersion (kappa), on the interval [-pi, pi]. | |
The von Mises distribution (also known as the circular normal | |
distribution) is a continuous probability distribution on the unit | |
circle. It may be thought of as the circular analogue of the normal | |
distribution. | |
Parameters | |
---------- | |
mu : float | |
Mode ("center") of the distribution. | |
kappa : float | |
Dispersion of the distribution, has to be >=0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : scalar or ndarray | |
The returned samples, which are in the interval [-pi, pi]. | |
See Also | |
-------- | |
scipy.stats.distributions.vonmises : probability density function, | |
distribution, or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the von Mises distribution is | |
.. math:: p(x) = \\frac{e^{\\kappa cos(x-\\mu)}}{2\\pi I_0(\\kappa)}, | |
where :math:`\\mu` is the mode and :math:`\\kappa` the dispersion, | |
and :math:`I_0(\\kappa)` is the modified Bessel function of order 0. | |
The von Mises is named for Richard Edler von Mises, who was born in | |
Austria-Hungary, in what is now the Ukraine. He fled to the United | |
States in 1939 and became a professor at Harvard. He worked in | |
probability theory, aerodynamics, fluid mechanics, and philosophy of | |
science. | |
References | |
---------- | |
Abramowitz, M. and Stegun, I. A. (ed.), *Handbook of Mathematical | |
Functions*, New York: Dover, 1965. | |
von Mises, R., *Mathematical Theory of Probability and Statistics*, | |
New York: Academic Press, 1964. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> mu, kappa = 0.0, 4.0 # mean and dispersion | |
>>> s = np.random.vonmises(mu, kappa, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> import scipy.special as sps | |
>>> count, bins, ignored = plt.hist(s, 50, normed=True) | |
>>> x = np.arange(-np.pi, np.pi, 2*np.pi/50.) | |
>>> y = -np.exp(kappa*np.cos(x-mu))/(2*np.pi*sps.jn(0,kappa)) | |
>>> plt.plot(x, y/max(y), linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray omu, okappa | |
cdef double fmu, fkappa | |
fmu = PyFloat_AsDouble(mu) | |
fkappa = PyFloat_AsDouble(kappa) | |
if not PyErr_Occurred(): | |
if fkappa < 0: | |
raise ValueError("kappa < 0") | |
return cont2_array_sc(self.internal_state, rk_vonmises, size, fmu, | |
fkappa, self.lock) | |
PyErr_Clear() | |
omu = <ndarray> PyArray_FROM_OTF(mu, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
okappa = <ndarray> PyArray_FROM_OTF(kappa, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less(okappa, 0.0)): | |
raise ValueError("kappa < 0") | |
return cont2_array(self.internal_state, rk_vonmises, size, omu, okappa, | |
self.lock) | |
def pareto(self, a, size=None): | |
""" | |
pareto(a, size=None) | |
Draw samples from a Pareto II or Lomax distribution with specified shape. | |
The Lomax or Pareto II distribution is a shifted Pareto distribution. The | |
classical Pareto distribution can be obtained from the Lomax distribution | |
by adding the location parameter m, see below. The smallest value of the | |
Lomax distribution is zero while for the classical Pareto distribution it | |
is m, where the standard Pareto distribution has location m=1. | |
Lomax can also be considered as a simplified version of the Generalized | |
Pareto distribution (available in SciPy), with the scale set to one and | |
the location set to zero. | |
The Pareto distribution must be greater than zero, and is unbounded above. | |
It is also known as the "80-20 rule". In this distribution, 80 percent of | |
the weights are in the lowest 20 percent of the range, while the other 20 | |
percent fill the remaining 80 percent of the range. | |
Parameters | |
---------- | |
shape : float, > 0. | |
Shape of the distribution. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
See Also | |
-------- | |
scipy.stats.distributions.lomax.pdf : probability density function, | |
distribution or cumulative density function, etc. | |
scipy.stats.distributions.genpareto.pdf : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Pareto distribution is | |
.. math:: p(x) = \\frac{am^a}{x^{a+1}} | |
where :math:`a` is the shape and :math:`m` the location | |
The Pareto distribution, named after the Italian economist Vilfredo Pareto, | |
is a power law probability distribution useful in many real world problems. | |
Outside the field of economics it is generally referred to as the Bradford | |
distribution. Pareto developed the distribution to describe the | |
distribution of wealth in an economy. It has also found use in insurance, | |
web page access statistics, oil field sizes, and many other problems, | |
including the download frequency for projects in Sourceforge [1]. It is | |
one of the so-called "fat-tailed" distributions. | |
References | |
---------- | |
.. [1] Francis Hunt and Paul Johnson, On the Pareto Distribution of | |
Sourceforge projects. | |
.. [2] Pareto, V. (1896). Course of Political Economy. Lausanne. | |
.. [3] Reiss, R.D., Thomas, M.(2001), Statistical Analysis of Extreme | |
Values, Birkhauser Verlag, Basel, pp 23-30. | |
.. [4] Wikipedia, "Pareto distribution", | |
http://en.wikipedia.org/wiki/Pareto_distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> a, m = 3., 1. # shape and mode | |
>>> s = np.random.pareto(a, 1000) + m | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 100, normed=True, align='center') | |
>>> fit = a*m**a/bins**(a+1) | |
>>> plt.plot(bins, max(count)*fit/max(fit),linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray oa | |
cdef double fa | |
fa = PyFloat_AsDouble(a) | |
if not PyErr_Occurred(): | |
if fa <= 0: | |
raise ValueError("a <= 0") | |
return cont1_array_sc(self.internal_state, rk_pareto, size, fa, | |
self.lock) | |
PyErr_Clear() | |
oa = <ndarray>PyArray_FROM_OTF(a, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oa, 0.0)): | |
raise ValueError("a <= 0") | |
return cont1_array(self.internal_state, rk_pareto, size, oa, self.lock) | |
def weibull(self, a, size=None): | |
""" | |
weibull(a, size=None) | |
Weibull distribution. | |
Draw samples from a 1-parameter Weibull distribution with the given | |
shape parameter `a`. | |
.. math:: X = (-ln(U))^{1/a} | |
Here, U is drawn from the uniform distribution over (0,1]. | |
The more common 2-parameter Weibull, including a scale parameter | |
:math:`\\lambda` is just :math:`X = \\lambda(-ln(U))^{1/a}`. | |
Parameters | |
---------- | |
a : float | |
Shape of the distribution. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
See Also | |
-------- | |
scipy.stats.distributions.weibull_max | |
scipy.stats.distributions.weibull_min | |
scipy.stats.distributions.genextreme | |
gumbel | |
Notes | |
----- | |
The Weibull (or Type III asymptotic extreme value distribution for smallest | |
values, SEV Type III, or Rosin-Rammler distribution) is one of a class of | |
Generalized Extreme Value (GEV) distributions used in modeling extreme | |
value problems. This class includes the Gumbel and Frechet distributions. | |
The probability density for the Weibull distribution is | |
.. math:: p(x) = \\frac{a} | |
{\\lambda}(\\frac{x}{\\lambda})^{a-1}e^{-(x/\\lambda)^a}, | |
where :math:`a` is the shape and :math:`\\lambda` the scale. | |
The function has its peak (the mode) at | |
:math:`\\lambda(\\frac{a-1}{a})^{1/a}`. | |
When ``a = 1``, the Weibull distribution reduces to the exponential | |
distribution. | |
References | |
---------- | |
.. [1] Waloddi Weibull, Professor, Royal Technical University, Stockholm, | |
1939 "A Statistical Theory Of The Strength Of Materials", | |
Ingeniorsvetenskapsakademiens Handlingar Nr 151, 1939, | |
Generalstabens Litografiska Anstalts Forlag, Stockholm. | |
.. [2] Waloddi Weibull, 1951 "A Statistical Distribution Function of Wide | |
Applicability", Journal Of Applied Mechanics ASME Paper. | |
.. [3] Wikipedia, "Weibull distribution", | |
http://en.wikipedia.org/wiki/Weibull_distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> a = 5. # shape | |
>>> s = np.random.weibull(a, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> x = np.arange(1,100.)/50. | |
>>> def weib(x,n,a): | |
... return (a / n) * (x / n)**(a - 1) * np.exp(-(x / n)**a) | |
>>> count, bins, ignored = plt.hist(np.random.weibull(5.,1000)) | |
>>> x = np.arange(1,100.)/50. | |
>>> scale = count.max()/weib(x, 1., 5.).max() | |
>>> plt.plot(x, weib(x, 1., 5.)*scale) | |
>>> plt.show() | |
""" | |
cdef ndarray oa | |
cdef double fa | |
fa = PyFloat_AsDouble(a) | |
if not PyErr_Occurred(): | |
if fa <= 0: | |
raise ValueError("a <= 0") | |
return cont1_array_sc(self.internal_state, rk_weibull, size, fa, | |
self.lock) | |
PyErr_Clear() | |
oa = <ndarray>PyArray_FROM_OTF(a, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oa, 0.0)): | |
raise ValueError("a <= 0") | |
return cont1_array(self.internal_state, rk_weibull, size, oa, | |
self.lock) | |
def power(self, a, size=None): | |
""" | |
power(a, size=None) | |
Draws samples in [0, 1] from a power distribution with positive | |
exponent a - 1. | |
Also known as the power function distribution. | |
Parameters | |
---------- | |
a : float | |
parameter, > 0 | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : {ndarray, scalar} | |
The returned samples lie in [0, 1]. | |
Raises | |
------ | |
ValueError | |
If a<1. | |
Notes | |
----- | |
The probability density function is | |
.. math:: P(x; a) = ax^{a-1}, 0 \\le x \\le 1, a>0. | |
The power function distribution is just the inverse of the Pareto | |
distribution. It may also be seen as a special case of the Beta | |
distribution. | |
It is used, for example, in modeling the over-reporting of insurance | |
claims. | |
References | |
---------- | |
.. [1] Christian Kleiber, Samuel Kotz, "Statistical size distributions | |
in economics and actuarial sciences", Wiley, 2003. | |
.. [2] Heckert, N. A. and Filliben, James J. (2003). NIST Handbook 148: | |
Dataplot Reference Manual, Volume 2: Let Subcommands and Library | |
Functions", National Institute of Standards and Technology Handbook | |
Series, June 2003. | |
http://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/powpdf.pdf | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> a = 5. # shape | |
>>> samples = 1000 | |
>>> s = np.random.power(a, samples) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, bins=30) | |
>>> x = np.linspace(0, 1, 100) | |
>>> y = a*x**(a-1.) | |
>>> normed_y = samples*np.diff(bins)[0]*y | |
>>> plt.plot(x, normed_y) | |
>>> plt.show() | |
Compare the power function distribution to the inverse of the Pareto. | |
>>> from scipy import stats | |
>>> rvs = np.random.power(5, 1000000) | |
>>> rvsp = np.random.pareto(5, 1000000) | |
>>> xx = np.linspace(0,1,100) | |
>>> powpdf = stats.powerlaw.pdf(xx,5) | |
>>> plt.figure() | |
>>> plt.hist(rvs, bins=50, normed=True) | |
>>> plt.plot(xx,powpdf,'r-') | |
>>> plt.title('np.random.power(5)') | |
>>> plt.figure() | |
>>> plt.hist(1./(1.+rvsp), bins=50, normed=True) | |
>>> plt.plot(xx,powpdf,'r-') | |
>>> plt.title('inverse of 1 + np.random.pareto(5)') | |
>>> plt.figure() | |
>>> plt.hist(1./(1.+rvsp), bins=50, normed=True) | |
>>> plt.plot(xx,powpdf,'r-') | |
>>> plt.title('inverse of stats.pareto(5)') | |
""" | |
cdef ndarray oa | |
cdef double fa | |
fa = PyFloat_AsDouble(a) | |
if not PyErr_Occurred(): | |
if fa <= 0: | |
raise ValueError("a <= 0") | |
return cont1_array_sc(self.internal_state, rk_power, size, fa, | |
self.lock) | |
PyErr_Clear() | |
oa = <ndarray>PyArray_FROM_OTF(a, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oa, 0.0)): | |
raise ValueError("a <= 0") | |
return cont1_array(self.internal_state, rk_power, size, oa, self.lock) | |
def laplace(self, loc=0.0, scale=1.0, size=None): | |
""" | |
laplace(loc=0.0, scale=1.0, size=None) | |
Draw samples from the Laplace or double exponential distribution with | |
specified location (or mean) and scale (decay). | |
The Laplace distribution is similar to the Gaussian/normal distribution, | |
but is sharper at the peak and has fatter tails. It represents the | |
difference between two independent, identically distributed exponential | |
random variables. | |
Parameters | |
---------- | |
loc : float | |
The position, :math:`\\mu`, of the distribution peak. | |
scale : float | |
:math:`\\lambda`, the exponential decay. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Notes | |
----- | |
It has the probability density function | |
.. math:: f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda} | |
\\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right). | |
The first law of Laplace, from 1774, states that the frequency of an error | |
can be expressed as an exponential function of the absolute magnitude of | |
the error, which leads to the Laplace distribution. For many problems in | |
Economics and Health sciences, this distribution seems to model the data | |
better than the standard Gaussian distribution | |
References | |
---------- | |
.. [1] Abramowitz, M. and Stegun, I. A. (Eds.). Handbook of Mathematical | |
Functions with Formulas, Graphs, and Mathematical Tables, 9th | |
printing. New York: Dover, 1972. | |
.. [2] The Laplace distribution and generalizations | |
By Samuel Kotz, Tomasz J. Kozubowski, Krzysztof Podgorski, | |
Birkhauser, 2001. | |
.. [3] Weisstein, Eric W. "Laplace Distribution." | |
From MathWorld--A Wolfram Web Resource. | |
http://mathworld.wolfram.com/LaplaceDistribution.html | |
.. [4] Wikipedia, "Laplace distribution", | |
http://en.wikipedia.org/wiki/Laplace_distribution | |
Examples | |
-------- | |
Draw samples from the distribution | |
>>> loc, scale = 0., 1. | |
>>> s = np.random.laplace(loc, scale, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 30, normed=True) | |
>>> x = np.arange(-8., 8., .01) | |
>>> pdf = np.exp(-abs(x-loc)/scale)/(2.*scale) | |
>>> plt.plot(x, pdf) | |
Plot Gaussian for comparison: | |
>>> g = (1/(scale * np.sqrt(2 * np.pi)) * | |
... np.exp(-(x - loc)**2 / (2 * scale**2))) | |
>>> plt.plot(x,g) | |
""" | |
cdef ndarray oloc, oscale | |
cdef double floc, fscale | |
floc = PyFloat_AsDouble(loc) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_laplace, size, floc, | |
fscale, self.lock) | |
PyErr_Clear() | |
oloc = PyArray_FROM_OTF(loc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0") | |
return cont2_array(self.internal_state, rk_laplace, size, oloc, oscale, | |
self.lock) | |
def gumbel(self, loc=0.0, scale=1.0, size=None): | |
""" | |
gumbel(loc=0.0, scale=1.0, size=None) | |
Gumbel distribution. | |
Draw samples from a Gumbel distribution with specified location and scale. | |
For more information on the Gumbel distribution, see Notes and References | |
below. | |
Parameters | |
---------- | |
loc : float | |
The location of the mode of the distribution. | |
scale : float | |
The scale parameter of the distribution. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray | |
The samples | |
See Also | |
-------- | |
scipy.stats.gumbel_l | |
scipy.stats.gumbel_r | |
scipy.stats.genextreme | |
probability density function, distribution, or cumulative density | |
function, etc. for each of the above | |
weibull | |
Notes | |
----- | |
The Gumbel (or Smallest Extreme Value (SEV) or the Smallest Extreme Value | |
Type I) distribution is one of a class of Generalized Extreme Value (GEV) | |
distributions used in modeling extreme value problems. The Gumbel is a | |
special case of the Extreme Value Type I distribution for maximums from | |
distributions with "exponential-like" tails. | |
The probability density for the Gumbel distribution is | |
.. math:: p(x) = \\frac{e^{-(x - \\mu)/ \\beta}}{\\beta} e^{ -e^{-(x - \\mu)/ | |
\\beta}}, | |
where :math:`\\mu` is the mode, a location parameter, and :math:`\\beta` is | |
the scale parameter. | |
The Gumbel (named for German mathematician Emil Julius Gumbel) was used | |
very early in the hydrology literature, for modeling the occurrence of | |
flood events. It is also used for modeling maximum wind speed and rainfall | |
rates. It is a "fat-tailed" distribution - the probability of an event in | |
the tail of the distribution is larger than if one used a Gaussian, hence | |
the surprisingly frequent occurrence of 100-year floods. Floods were | |
initially modeled as a Gaussian process, which underestimated the frequency | |
of extreme events. | |
It is one of a class of extreme value distributions, the Generalized | |
Extreme Value (GEV) distributions, which also includes the Weibull and | |
Frechet. | |
The function has a mean of :math:`\\mu + 0.57721\\beta` and a variance of | |
:math:`\\frac{\\pi^2}{6}\\beta^2`. | |
References | |
---------- | |
Gumbel, E. J., *Statistics of Extremes*, New York: Columbia University | |
Press, 1958. | |
Reiss, R.-D. and Thomas, M., *Statistical Analysis of Extreme Values from | |
Insurance, Finance, Hydrology and Other Fields*, Basel: Birkhauser Verlag, | |
2001. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> mu, beta = 0, 0.1 # location and scale | |
>>> s = np.random.gumbel(mu, beta, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 30, normed=True) | |
>>> plt.plot(bins, (1/beta)*np.exp(-(bins - mu)/beta) | |
... * np.exp( -np.exp( -(bins - mu) /beta) ), | |
... linewidth=2, color='r') | |
>>> plt.show() | |
Show how an extreme value distribution can arise from a Gaussian process | |
and compare to a Gaussian: | |
>>> means = [] | |
>>> maxima = [] | |
>>> for i in range(0,1000) : | |
... a = np.random.normal(mu, beta, 1000) | |
... means.append(a.mean()) | |
... maxima.append(a.max()) | |
>>> count, bins, ignored = plt.hist(maxima, 30, normed=True) | |
>>> beta = np.std(maxima)*np.pi/np.sqrt(6) | |
>>> mu = np.mean(maxima) - 0.57721*beta | |
>>> plt.plot(bins, (1/beta)*np.exp(-(bins - mu)/beta) | |
... * np.exp(-np.exp(-(bins - mu)/beta)), | |
... linewidth=2, color='r') | |
>>> plt.plot(bins, 1/(beta * np.sqrt(2 * np.pi)) | |
... * np.exp(-(bins - mu)**2 / (2 * beta**2)), | |
... linewidth=2, color='g') | |
>>> plt.show() | |
""" | |
cdef ndarray oloc, oscale | |
cdef double floc, fscale | |
floc = PyFloat_AsDouble(loc) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_gumbel, size, floc, | |
fscale, self.lock) | |
PyErr_Clear() | |
oloc = PyArray_FROM_OTF(loc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0") | |
return cont2_array(self.internal_state, rk_gumbel, size, oloc, oscale, | |
self.lock) | |
def logistic(self, loc=0.0, scale=1.0, size=None): | |
""" | |
logistic(loc=0.0, scale=1.0, size=None) | |
Draw samples from a Logistic distribution. | |
Samples are drawn from a Logistic distribution with specified | |
parameters, loc (location or mean, also median), and scale (>0). | |
Parameters | |
---------- | |
loc : float | |
scale : float > 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : {ndarray, scalar} | |
where the values are all integers in [0, n]. | |
See Also | |
-------- | |
scipy.stats.distributions.logistic : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Logistic distribution is | |
.. math:: P(x) = P(x) = \\frac{e^{-(x-\\mu)/s}}{s(1+e^{-(x-\\mu)/s})^2}, | |
where :math:`\\mu` = location and :math:`s` = scale. | |
The Logistic distribution is used in Extreme Value problems where it | |
can act as a mixture of Gumbel distributions, in Epidemiology, and by | |
the World Chess Federation (FIDE) where it is used in the Elo ranking | |
system, assuming the performance of each player is a logistically | |
distributed random variable. | |
References | |
---------- | |
.. [1] Reiss, R.-D. and Thomas M. (2001), Statistical Analysis of Extreme | |
Values, from Insurance, Finance, Hydrology and Other Fields, | |
Birkhauser Verlag, Basel, pp 132-133. | |
.. [2] Weisstein, Eric W. "Logistic Distribution." From | |
MathWorld--A Wolfram Web Resource. | |
http://mathworld.wolfram.com/LogisticDistribution.html | |
.. [3] Wikipedia, "Logistic-distribution", | |
http://en.wikipedia.org/wiki/Logistic-distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> loc, scale = 10, 1 | |
>>> s = np.random.logistic(loc, scale, 10000) | |
>>> count, bins, ignored = plt.hist(s, bins=50) | |
# plot against distribution | |
>>> def logist(x, loc, scale): | |
... return exp((loc-x)/scale)/(scale*(1+exp((loc-x)/scale))**2) | |
>>> plt.plot(bins, logist(bins, loc, scale)*count.max()/\\ | |
... logist(bins, loc, scale).max()) | |
>>> plt.show() | |
""" | |
cdef ndarray oloc, oscale | |
cdef double floc, fscale | |
floc = PyFloat_AsDouble(loc) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_logistic, size, floc, | |
fscale, self.lock) | |
PyErr_Clear() | |
oloc = PyArray_FROM_OTF(loc, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0") | |
return cont2_array(self.internal_state, rk_logistic, size, oloc, | |
oscale, self.lock) | |
def lognormal(self, mean=0.0, sigma=1.0, size=None): | |
""" | |
lognormal(mean=0.0, sigma=1.0, size=None) | |
Return samples drawn from a log-normal distribution. | |
Draw samples from a log-normal distribution with specified mean, | |
standard deviation, and array shape. Note that the mean and standard | |
deviation are not the values for the distribution itself, but of the | |
underlying normal distribution it is derived from. | |
Parameters | |
---------- | |
mean : float | |
Mean value of the underlying normal distribution | |
sigma : float, > 0. | |
Standard deviation of the underlying normal distribution | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or float | |
The desired samples. An array of the same shape as `size` if given, | |
if `size` is None a float is returned. | |
See Also | |
-------- | |
scipy.stats.lognorm : probability density function, distribution, | |
cumulative density function, etc. | |
Notes | |
----- | |
A variable `x` has a log-normal distribution if `log(x)` is normally | |
distributed. The probability density function for the log-normal | |
distribution is: | |
.. math:: p(x) = \\frac{1}{\\sigma x \\sqrt{2\\pi}} | |
e^{(-\\frac{(ln(x)-\\mu)^2}{2\\sigma^2})} | |
where :math:`\\mu` is the mean and :math:`\\sigma` is the standard | |
deviation of the normally distributed logarithm of the variable. | |
A log-normal distribution results if a random variable is the *product* | |
of a large number of independent, identically-distributed variables in | |
the same way that a normal distribution results if the variable is the | |
*sum* of a large number of independent, identically-distributed | |
variables. | |
References | |
---------- | |
Limpert, E., Stahel, W. A., and Abbt, M., "Log-normal Distributions | |
across the Sciences: Keys and Clues," *BioScience*, Vol. 51, No. 5, | |
May, 2001. http://stat.ethz.ch/~stahel/lognormal/bioscience.pdf | |
Reiss, R.D. and Thomas, M., *Statistical Analysis of Extreme Values*, | |
Basel: Birkhauser Verlag, 2001, pp. 31-32. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> mu, sigma = 3., 1. # mean and standard deviation | |
>>> s = np.random.lognormal(mu, sigma, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 100, normed=True, align='mid') | |
>>> x = np.linspace(min(bins), max(bins), 10000) | |
>>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) | |
... / (x * sigma * np.sqrt(2 * np.pi))) | |
>>> plt.plot(x, pdf, linewidth=2, color='r') | |
>>> plt.axis('tight') | |
>>> plt.show() | |
Demonstrate that taking the products of random samples from a uniform | |
distribution can be fit well by a log-normal probability density function. | |
>>> # Generate a thousand samples: each is the product of 100 random | |
>>> # values, drawn from a normal distribution. | |
>>> b = [] | |
>>> for i in range(1000): | |
... a = 10. + np.random.random(100) | |
... b.append(np.product(a)) | |
>>> b = np.array(b) / np.min(b) # scale values to be positive | |
>>> count, bins, ignored = plt.hist(b, 100, normed=True, align='center') | |
>>> sigma = np.std(np.log(b)) | |
>>> mu = np.mean(np.log(b)) | |
>>> x = np.linspace(min(bins), max(bins), 10000) | |
>>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) | |
... / (x * sigma * np.sqrt(2 * np.pi))) | |
>>> plt.plot(x, pdf, color='r', linewidth=2) | |
>>> plt.show() | |
""" | |
cdef ndarray omean, osigma | |
cdef double fmean, fsigma | |
fmean = PyFloat_AsDouble(mean) | |
fsigma = PyFloat_AsDouble(sigma) | |
if not PyErr_Occurred(): | |
if fsigma <= 0: | |
raise ValueError("sigma <= 0") | |
return cont2_array_sc(self.internal_state, rk_lognormal, size, | |
fmean, fsigma, self.lock) | |
PyErr_Clear() | |
omean = PyArray_FROM_OTF(mean, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
osigma = PyArray_FROM_OTF(sigma, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(osigma, 0.0)): | |
raise ValueError("sigma <= 0.0") | |
return cont2_array(self.internal_state, rk_lognormal, size, omean, | |
osigma, self.lock) | |
def rayleigh(self, scale=1.0, size=None): | |
""" | |
rayleigh(scale=1.0, size=None) | |
Draw samples from a Rayleigh distribution. | |
The :math:`\\chi` and Weibull distributions are generalizations of the | |
Rayleigh. | |
Parameters | |
---------- | |
scale : scalar | |
Scale, also equals the mode. Should be >= 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Notes | |
----- | |
The probability density function for the Rayleigh distribution is | |
.. math:: P(x;scale) = \\frac{x}{scale^2}e^{\\frac{-x^2}{2 \\cdotp scale^2}} | |
The Rayleigh distribution arises if the wind speed and wind direction are | |
both gaussian variables, then the vector wind velocity forms a Rayleigh | |
distribution. The Rayleigh distribution is used to model the expected | |
output from wind turbines. | |
References | |
---------- | |
.. [1] Brighton Webs Ltd., Rayleigh Distribution, | |
http://www.brighton-webs.co.uk/distributions/rayleigh.asp | |
.. [2] Wikipedia, "Rayleigh distribution" | |
http://en.wikipedia.org/wiki/Rayleigh_distribution | |
Examples | |
-------- | |
Draw values from the distribution and plot the histogram | |
>>> values = hist(np.random.rayleigh(3, 100000), bins=200, normed=True) | |
Wave heights tend to follow a Rayleigh distribution. If the mean wave | |
height is 1 meter, what fraction of waves are likely to be larger than 3 | |
meters? | |
>>> meanvalue = 1 | |
>>> modevalue = np.sqrt(2 / np.pi) * meanvalue | |
>>> s = np.random.rayleigh(modevalue, 1000000) | |
The percentage of waves larger than 3 meters is: | |
>>> 100.*sum(s>3)/1000000. | |
0.087300000000000003 | |
""" | |
cdef ndarray oscale | |
cdef double fscale | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont1_array_sc(self.internal_state, rk_rayleigh, size, | |
fscale, self.lock) | |
PyErr_Clear() | |
oscale = <ndarray>PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oscale, 0.0)): | |
raise ValueError("scale <= 0.0") | |
return cont1_array(self.internal_state, rk_rayleigh, size, oscale, | |
self.lock) | |
def wald(self, mean, scale, size=None): | |
""" | |
wald(mean, scale, size=None) | |
Draw samples from a Wald, or Inverse Gaussian, distribution. | |
As the scale approaches infinity, the distribution becomes more like a | |
Gaussian. | |
Some references claim that the Wald is an Inverse Gaussian with mean=1, but | |
this is by no means universal. | |
The Inverse Gaussian distribution was first studied in relationship to | |
Brownian motion. In 1956 M.C.K. Tweedie used the name Inverse Gaussian | |
because there is an inverse relationship between the time to cover a unit | |
distance and distance covered in unit time. | |
Parameters | |
---------- | |
mean : scalar | |
Distribution mean, should be > 0. | |
scale : scalar | |
Scale parameter, should be >= 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
Drawn sample, all greater than zero. | |
Notes | |
----- | |
The probability density function for the Wald distribution is | |
.. math:: P(x;mean,scale) = \\sqrt{\\frac{scale}{2\\pi x^3}}e^ | |
\\frac{-scale(x-mean)^2}{2\\cdotp mean^2x} | |
As noted above the Inverse Gaussian distribution first arise from attempts | |
to model Brownian Motion. It is also a competitor to the Weibull for use in | |
reliability modeling and modeling stock returns and interest rate | |
processes. | |
References | |
---------- | |
.. [1] Brighton Webs Ltd., Wald Distribution, | |
http://www.brighton-webs.co.uk/distributions/wald.asp | |
.. [2] Chhikara, Raj S., and Folks, J. Leroy, "The Inverse Gaussian | |
Distribution: Theory : Methodology, and Applications", CRC Press, | |
1988. | |
.. [3] Wikipedia, "Wald distribution" | |
http://en.wikipedia.org/wiki/Wald_distribution | |
Examples | |
-------- | |
Draw values from the distribution and plot the histogram: | |
>>> import matplotlib.pyplot as plt | |
>>> h = plt.hist(np.random.wald(3, 2, 100000), bins=200, normed=True) | |
>>> plt.show() | |
""" | |
cdef ndarray omean, oscale | |
cdef double fmean, fscale | |
fmean = PyFloat_AsDouble(mean) | |
fscale = PyFloat_AsDouble(scale) | |
if not PyErr_Occurred(): | |
if fmean <= 0: | |
raise ValueError("mean <= 0") | |
if fscale <= 0: | |
raise ValueError("scale <= 0") | |
return cont2_array_sc(self.internal_state, rk_wald, size, fmean, | |
fscale, self.lock) | |
PyErr_Clear() | |
omean = PyArray_FROM_OTF(mean, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oscale = PyArray_FROM_OTF(scale, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(omean,0.0)): | |
raise ValueError("mean <= 0.0") | |
elif np.any(np.less_equal(oscale,0.0)): | |
raise ValueError("scale <= 0.0") | |
return cont2_array(self.internal_state, rk_wald, size, omean, oscale, | |
self.lock) | |
def triangular(self, left, mode, right, size=None): | |
""" | |
triangular(left, mode, right, size=None) | |
Draw samples from the triangular distribution. | |
The triangular distribution is a continuous probability distribution with | |
lower limit left, peak at mode, and upper limit right. Unlike the other | |
distributions, these parameters directly define the shape of the pdf. | |
Parameters | |
---------- | |
left : scalar | |
Lower limit. | |
mode : scalar | |
The value where the peak of the distribution occurs. | |
The value should fulfill the condition ``left <= mode <= right``. | |
right : scalar | |
Upper limit, should be larger than `left`. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
The returned samples all lie in the interval [left, right]. | |
Notes | |
----- | |
The probability density function for the Triangular distribution is | |
.. math:: P(x;l, m, r) = \\begin{cases} | |
\\frac{2(x-l)}{(r-l)(m-l)}& \\text{for $l \\leq x \\leq m$},\\\\ | |
\\frac{2(m-x)}{(r-l)(r-m)}& \\text{for $m \\leq x \\leq r$},\\\\ | |
0& \\text{otherwise}. | |
\\end{cases} | |
The triangular distribution is often used in ill-defined problems where the | |
underlying distribution is not known, but some knowledge of the limits and | |
mode exists. Often it is used in simulations. | |
References | |
---------- | |
.. [1] Wikipedia, "Triangular distribution" | |
http://en.wikipedia.org/wiki/Triangular_distribution | |
Examples | |
-------- | |
Draw values from the distribution and plot the histogram: | |
>>> import matplotlib.pyplot as plt | |
>>> h = plt.hist(np.random.triangular(-3, 0, 8, 100000), bins=200, | |
... normed=True) | |
>>> plt.show() | |
""" | |
cdef ndarray oleft, omode, oright | |
cdef double fleft, fmode, fright | |
fleft = PyFloat_AsDouble(left) | |
fright = PyFloat_AsDouble(right) | |
fmode = PyFloat_AsDouble(mode) | |
if not PyErr_Occurred(): | |
if fleft > fmode: | |
raise ValueError("left > mode") | |
if fmode > fright: | |
raise ValueError("mode > right") | |
if fleft == fright: | |
raise ValueError("left == right") | |
return cont3_array_sc(self.internal_state, rk_triangular, size, fleft, | |
fmode, fright, self.lock) | |
PyErr_Clear() | |
oleft = <ndarray>PyArray_FROM_OTF(left, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
omode = <ndarray>PyArray_FROM_OTF(mode, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
oright = <ndarray>PyArray_FROM_OTF(right, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.greater(oleft, omode)): | |
raise ValueError("left > mode") | |
if np.any(np.greater(omode, oright)): | |
raise ValueError("mode > right") | |
if np.any(np.equal(oleft, oright)): | |
raise ValueError("left == right") | |
return cont3_array(self.internal_state, rk_triangular, size, oleft, | |
omode, oright, self.lock) | |
# Complicated, discrete distributions: | |
def binomial(self, n, p, size=None): | |
""" | |
binomial(n, p, size=None) | |
Draw samples from a binomial distribution. | |
Samples are drawn from a Binomial distribution with specified | |
parameters, n trials and p probability of success where | |
n an integer >= 0 and p is in the interval [0,1]. (n may be | |
input as a float, but it is truncated to an integer in use) | |
Parameters | |
---------- | |
n : float (but truncated to an integer) | |
parameter, >= 0. | |
p : float | |
parameter, >= 0 and <=1. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : {ndarray, scalar} | |
where the values are all integers in [0, n]. | |
See Also | |
-------- | |
scipy.stats.distributions.binom : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Binomial distribution is | |
.. math:: P(N) = \\binom{n}{N}p^N(1-p)^{n-N}, | |
where :math:`n` is the number of trials, :math:`p` is the probability | |
of success, and :math:`N` is the number of successes. | |
When estimating the standard error of a proportion in a population by | |
using a random sample, the normal distribution works well unless the | |
product p*n <=5, where p = population proportion estimate, and n = | |
number of samples, in which case the binomial distribution is used | |
instead. For example, a sample of 15 people shows 4 who are left | |
handed, and 11 who are right handed. Then p = 4/15 = 27%. 0.27*15 = 4, | |
so the binomial distribution should be used in this case. | |
References | |
---------- | |
.. [1] Dalgaard, Peter, "Introductory Statistics with R", | |
Springer-Verlag, 2002. | |
.. [2] Glantz, Stanton A. "Primer of Biostatistics.", McGraw-Hill, | |
Fifth Edition, 2002. | |
.. [3] Lentner, Marvin, "Elementary Applied Statistics", Bogden | |
and Quigley, 1972. | |
.. [4] Weisstein, Eric W. "Binomial Distribution." From MathWorld--A | |
Wolfram Web Resource. | |
http://mathworld.wolfram.com/BinomialDistribution.html | |
.. [5] Wikipedia, "Binomial-distribution", | |
http://en.wikipedia.org/wiki/Binomial_distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> n, p = 10, .5 # number of trials, probability of each trial | |
>>> s = np.random.binomial(n, p, 1000) | |
# result of flipping a coin 10 times, tested 1000 times. | |
A real world example. A company drills 9 wild-cat oil exploration | |
wells, each with an estimated probability of success of 0.1. All nine | |
wells fail. What is the probability of that happening? | |
Let's do 20,000 trials of the model, and count the number that | |
generate zero positive results. | |
>>> sum(np.random.binomial(9,0.1,20000)==0)/20000. | |
answer = 0.38885, or 38%. | |
""" | |
cdef ndarray on, op | |
cdef long ln | |
cdef double fp | |
fp = PyFloat_AsDouble(p) | |
ln = PyInt_AsLong(n) | |
if not PyErr_Occurred(): | |
if ln < 0: | |
raise ValueError("n < 0") | |
if fp < 0: | |
raise ValueError("p < 0") | |
elif fp > 1: | |
raise ValueError("p > 1") | |
elif np.isnan(fp): | |
raise ValueError("p is nan") | |
return discnp_array_sc(self.internal_state, rk_binomial, size, ln, | |
fp, self.lock) | |
PyErr_Clear() | |
on = <ndarray>PyArray_FROM_OTF(n, NPY_LONG, NPY_ARRAY_ALIGNED) | |
op = <ndarray>PyArray_FROM_OTF(p, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less(n, 0)): | |
raise ValueError("n < 0") | |
if np.any(np.less(p, 0)): | |
raise ValueError("p < 0") | |
if np.any(np.greater(p, 1)): | |
raise ValueError("p > 1") | |
return discnp_array(self.internal_state, rk_binomial, size, on, op, | |
self.lock) | |
def negative_binomial(self, n, p, size=None): | |
""" | |
negative_binomial(n, p, size=None) | |
Draw samples from a negative_binomial distribution. | |
Samples are drawn from a negative_Binomial distribution with specified | |
parameters, `n` trials and `p` probability of success where `n` is an | |
integer > 0 and `p` is in the interval [0, 1]. | |
Parameters | |
---------- | |
n : int | |
Parameter, > 0. | |
p : float | |
Parameter, >= 0 and <=1. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : int or ndarray of ints | |
Drawn samples. | |
Notes | |
----- | |
The probability density for the Negative Binomial distribution is | |
.. math:: P(N;n,p) = \\binom{N+n-1}{n-1}p^{n}(1-p)^{N}, | |
where :math:`n-1` is the number of successes, :math:`p` is the probability | |
of success, and :math:`N+n-1` is the number of trials. | |
The negative binomial distribution gives the probability of n-1 successes | |
and N failures in N+n-1 trials, and success on the (N+n)th trial. | |
If one throws a die repeatedly until the third time a "1" appears, then the | |
probability distribution of the number of non-"1"s that appear before the | |
third "1" is a negative binomial distribution. | |
References | |
---------- | |
.. [1] Weisstein, Eric W. "Negative Binomial Distribution." From | |
MathWorld--A Wolfram Web Resource. | |
http://mathworld.wolfram.com/NegativeBinomialDistribution.html | |
.. [2] Wikipedia, "Negative binomial distribution", | |
http://en.wikipedia.org/wiki/Negative_binomial_distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
A real world example. A company drills wild-cat oil exploration wells, each | |
with an estimated probability of success of 0.1. What is the probability | |
of having one success for each successive well, that is what is the | |
probability of a single success after drilling 5 wells, after 6 wells, | |
etc.? | |
>>> s = np.random.negative_binomial(1, 0.1, 100000) | |
>>> for i in range(1, 11): | |
... probability = sum(s<i) / 100000. | |
... print i, "wells drilled, probability of one success =", probability | |
""" | |
cdef ndarray on | |
cdef ndarray op | |
cdef double fn | |
cdef double fp | |
fp = PyFloat_AsDouble(p) | |
fn = PyFloat_AsDouble(n) | |
if not PyErr_Occurred(): | |
if fn <= 0: | |
raise ValueError("n <= 0") | |
if fp < 0: | |
raise ValueError("p < 0") | |
elif fp > 1: | |
raise ValueError("p > 1") | |
return discdd_array_sc(self.internal_state, rk_negative_binomial, | |
size, fn, fp, self.lock) | |
PyErr_Clear() | |
on = <ndarray>PyArray_FROM_OTF(n, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
op = <ndarray>PyArray_FROM_OTF(p, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(n, 0)): | |
raise ValueError("n <= 0") | |
if np.any(np.less(p, 0)): | |
raise ValueError("p < 0") | |
if np.any(np.greater(p, 1)): | |
raise ValueError("p > 1") | |
return discdd_array(self.internal_state, rk_negative_binomial, size, | |
on, op, self.lock) | |
def poisson(self, lam=1.0, size=None): | |
""" | |
poisson(lam=1.0, size=None) | |
Draw samples from a Poisson distribution. | |
The Poisson distribution is the limit of the Binomial | |
distribution for large N. | |
Parameters | |
---------- | |
lam : float or sequence of float | |
Expectation of interval, should be >= 0. A sequence of expectation | |
intervals must be broadcastable over the requested size. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Notes | |
----- | |
The Poisson distribution | |
.. math:: f(k; \\lambda)=\\frac{\\lambda^k e^{-\\lambda}}{k!} | |
For events with an expected separation :math:`\\lambda` the Poisson | |
distribution :math:`f(k; \\lambda)` describes the probability of | |
:math:`k` events occurring within the observed interval :math:`\\lambda`. | |
Because the output is limited to the range of the C long type, a | |
ValueError is raised when `lam` is within 10 sigma of the maximum | |
representable value. | |
References | |
---------- | |
.. [1] Weisstein, Eric W. "Poisson Distribution." From MathWorld--A Wolfram | |
Web Resource. http://mathworld.wolfram.com/PoissonDistribution.html | |
.. [2] Wikipedia, "Poisson distribution", | |
http://en.wikipedia.org/wiki/Poisson_distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> import numpy as np | |
>>> s = np.random.poisson(5, 10000) | |
Display histogram of the sample: | |
>>> import matplotlib.pyplot as plt | |
>>> count, bins, ignored = plt.hist(s, 14, normed=True) | |
>>> plt.show() | |
Draw each 100 values for lambda 100 and 500: | |
>>> s = np.random.poisson(lam=(100., 500.), size=(100, 2)) | |
""" | |
cdef ndarray olam | |
cdef double flam | |
flam = PyFloat_AsDouble(lam) | |
if not PyErr_Occurred(): | |
if lam < 0: | |
raise ValueError("lam < 0") | |
if lam > self.poisson_lam_max: | |
raise ValueError("lam value too large") | |
return discd_array_sc(self.internal_state, rk_poisson, size, flam, | |
self.lock) | |
PyErr_Clear() | |
olam = <ndarray>PyArray_FROM_OTF(lam, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less(olam, 0)): | |
raise ValueError("lam < 0") | |
if np.any(np.greater(olam, self.poisson_lam_max)): | |
raise ValueError("lam value too large.") | |
return discd_array(self.internal_state, rk_poisson, size, olam, self.lock) | |
def zipf(self, a, size=None): | |
""" | |
zipf(a, size=None) | |
Draw samples from a Zipf distribution. | |
Samples are drawn from a Zipf distribution with specified parameter | |
`a` > 1. | |
The Zipf distribution (also known as the zeta distribution) is a | |
continuous probability distribution that satisfies Zipf's law: the | |
frequency of an item is inversely proportional to its rank in a | |
frequency table. | |
Parameters | |
---------- | |
a : float > 1 | |
Distribution parameter. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : scalar or ndarray | |
The returned samples are greater than or equal to one. | |
See Also | |
-------- | |
scipy.stats.distributions.zipf : probability density function, | |
distribution, or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Zipf distribution is | |
.. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, | |
where :math:`\\zeta` is the Riemann Zeta function. | |
It is named for the American linguist George Kingsley Zipf, who noted | |
that the frequency of any word in a sample of a language is inversely | |
proportional to its rank in the frequency table. | |
References | |
---------- | |
Zipf, G. K., *Selected Studies of the Principle of Relative Frequency | |
in Language*, Cambridge, MA: Harvard Univ. Press, 1932. | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> a = 2. # parameter | |
>>> s = np.random.zipf(a, 1000) | |
Display the histogram of the samples, along with | |
the probability density function: | |
>>> import matplotlib.pyplot as plt | |
>>> import scipy.special as sps | |
Truncate s values at 50 so plot is interesting | |
>>> count, bins, ignored = plt.hist(s[s<50], 50, normed=True) | |
>>> x = np.arange(1., 50.) | |
>>> y = x**(-a)/sps.zetac(a) | |
>>> plt.plot(x, y/max(y), linewidth=2, color='r') | |
>>> plt.show() | |
""" | |
cdef ndarray oa | |
cdef double fa | |
fa = PyFloat_AsDouble(a) | |
if not PyErr_Occurred(): | |
if fa <= 1.0: | |
raise ValueError("a <= 1.0") | |
return discd_array_sc(self.internal_state, rk_zipf, size, fa, | |
self.lock) | |
PyErr_Clear() | |
oa = <ndarray>PyArray_FROM_OTF(a, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(oa, 1.0)): | |
raise ValueError("a <= 1.0") | |
return discd_array(self.internal_state, rk_zipf, size, oa, self.lock) | |
def geometric(self, p, size=None): | |
""" | |
geometric(p, size=None) | |
Draw samples from the geometric distribution. | |
Bernoulli trials are experiments with one of two outcomes: | |
success or failure (an example of such an experiment is flipping | |
a coin). The geometric distribution models the number of trials | |
that must be run in order to achieve success. It is therefore | |
supported on the positive integers, ``k = 1, 2, ...``. | |
The probability mass function of the geometric distribution is | |
.. math:: f(k) = (1 - p)^{k - 1} p | |
where `p` is the probability of success of an individual trial. | |
Parameters | |
---------- | |
p : float | |
The probability of success of an individual trial. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
out : ndarray | |
Samples from the geometric distribution, shaped according to | |
`size`. | |
Examples | |
-------- | |
Draw ten thousand values from the geometric distribution, | |
with the probability of an individual success equal to 0.35: | |
>>> z = np.random.geometric(p=0.35, size=10000) | |
How many trials succeeded after a single run? | |
>>> (z == 1).sum() / 10000. | |
0.34889999999999999 #random | |
""" | |
cdef ndarray op | |
cdef double fp | |
fp = PyFloat_AsDouble(p) | |
if not PyErr_Occurred(): | |
if fp < 0.0: | |
raise ValueError("p < 0.0") | |
if fp > 1.0: | |
raise ValueError("p > 1.0") | |
return discd_array_sc(self.internal_state, rk_geometric, size, fp, | |
self.lock) | |
PyErr_Clear() | |
op = <ndarray>PyArray_FROM_OTF(p, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less(op, 0.0)): | |
raise ValueError("p < 0.0") | |
if np.any(np.greater(op, 1.0)): | |
raise ValueError("p > 1.0") | |
return discd_array(self.internal_state, rk_geometric, size, op, self.lock) | |
def hypergeometric(self, ngood, nbad, nsample, size=None): | |
""" | |
hypergeometric(ngood, nbad, nsample, size=None) | |
Draw samples from a Hypergeometric distribution. | |
Samples are drawn from a Hypergeometric distribution with specified | |
parameters, ngood (ways to make a good selection), nbad (ways to make | |
a bad selection), and nsample = number of items sampled, which is less | |
than or equal to the sum ngood + nbad. | |
Parameters | |
---------- | |
ngood : int or array_like | |
Number of ways to make a good selection. Must be nonnegative. | |
nbad : int or array_like | |
Number of ways to make a bad selection. Must be nonnegative. | |
nsample : int or array_like | |
Number of items sampled. Must be at least 1 and at most | |
``ngood + nbad``. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : ndarray or scalar | |
The values are all integers in [0, n]. | |
See Also | |
-------- | |
scipy.stats.distributions.hypergeom : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Hypergeometric distribution is | |
.. math:: P(x) = \\frac{\\binom{m}{n}\\binom{N-m}{n-x}}{\\binom{N}{n}}, | |
where :math:`0 \\le x \\le m` and :math:`n+m-N \\le x \\le n` | |
for P(x) the probability of x successes, n = ngood, m = nbad, and | |
N = number of samples. | |
Consider an urn with black and white marbles in it, ngood of them | |
black and nbad are white. If you draw nsample balls without | |
replacement, then the Hypergeometric distribution describes the | |
distribution of black balls in the drawn sample. | |
Note that this distribution is very similar to the Binomial | |
distribution, except that in this case, samples are drawn without | |
replacement, whereas in the Binomial case samples are drawn with | |
replacement (or the sample space is infinite). As the sample space | |
becomes large, this distribution approaches the Binomial. | |
References | |
---------- | |
.. [1] Lentner, Marvin, "Elementary Applied Statistics", Bogden | |
and Quigley, 1972. | |
.. [2] Weisstein, Eric W. "Hypergeometric Distribution." From | |
MathWorld--A Wolfram Web Resource. | |
http://mathworld.wolfram.com/HypergeometricDistribution.html | |
.. [3] Wikipedia, "Hypergeometric-distribution", | |
http://en.wikipedia.org/wiki/Hypergeometric-distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> ngood, nbad, nsamp = 100, 2, 10 | |
# number of good, number of bad, and number of samples | |
>>> s = np.random.hypergeometric(ngood, nbad, nsamp, 1000) | |
>>> hist(s) | |
# note that it is very unlikely to grab both bad items | |
Suppose you have an urn with 15 white and 15 black marbles. | |
If you pull 15 marbles at random, how likely is it that | |
12 or more of them are one color? | |
>>> s = np.random.hypergeometric(15, 15, 15, 100000) | |
>>> sum(s>=12)/100000. + sum(s<=3)/100000. | |
# answer = 0.003 ... pretty unlikely! | |
""" | |
cdef ndarray ongood, onbad, onsample | |
cdef long lngood, lnbad, lnsample | |
lngood = PyInt_AsLong(ngood) | |
lnbad = PyInt_AsLong(nbad) | |
lnsample = PyInt_AsLong(nsample) | |
if not PyErr_Occurred(): | |
if lngood < 0: | |
raise ValueError("ngood < 0") | |
if lnbad < 0: | |
raise ValueError("nbad < 0") | |
if lnsample < 1: | |
raise ValueError("nsample < 1") | |
if lngood + lnbad < lnsample: | |
raise ValueError("ngood + nbad < nsample") | |
return discnmN_array_sc(self.internal_state, rk_hypergeometric, | |
size, lngood, lnbad, lnsample, self.lock) | |
PyErr_Clear() | |
ongood = <ndarray>PyArray_FROM_OTF(ngood, NPY_LONG, NPY_ARRAY_ALIGNED) | |
onbad = <ndarray>PyArray_FROM_OTF(nbad, NPY_LONG, NPY_ARRAY_ALIGNED) | |
onsample = <ndarray>PyArray_FROM_OTF(nsample, NPY_LONG, NPY_ARRAY_ALIGNED) | |
if np.any(np.less(ongood, 0)): | |
raise ValueError("ngood < 0") | |
if np.any(np.less(onbad, 0)): | |
raise ValueError("nbad < 0") | |
if np.any(np.less(onsample, 1)): | |
raise ValueError("nsample < 1") | |
if np.any(np.less(np.add(ongood, onbad),onsample)): | |
raise ValueError("ngood + nbad < nsample") | |
return discnmN_array(self.internal_state, rk_hypergeometric, size, | |
ongood, onbad, onsample, self.lock) | |
def logseries(self, p, size=None): | |
""" | |
logseries(p, size=None) | |
Draw samples from a Logarithmic Series distribution. | |
Samples are drawn from a Log Series distribution with specified | |
parameter, p (probability, 0 < p < 1). | |
Parameters | |
---------- | |
loc : float | |
scale : float > 0. | |
size : int or tuple of ints, optional | |
Output shape. If the given shape is, e.g., ``(m, n, k)``, then | |
``m * n * k`` samples are drawn. Default is None, in which case a | |
single value is returned. | |
Returns | |
------- | |
samples : {ndarray, scalar} | |
where the values are all integers in [0, n]. | |
See Also | |
-------- | |
scipy.stats.distributions.logser : probability density function, | |
distribution or cumulative density function, etc. | |
Notes | |
----- | |
The probability density for the Log Series distribution is | |
.. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)}, | |
where p = probability. | |
The Log Series distribution is frequently used to represent species | |
richness and occurrence, first proposed by Fisher, Corbet, and | |
Williams in 1943 [2]. It may also be used to model the numbers of | |
occupants seen in cars [3]. | |
References | |
---------- | |
.. [1] Buzas, Martin A.; Culver, Stephen J., Understanding regional | |
species diversity through the log series distribution of | |
occurrences: BIODIVERSITY RESEARCH Diversity & Distributions, | |
Volume 5, Number 5, September 1999 , pp. 187-195(9). | |
.. [2] Fisher, R.A,, A.S. Corbet, and C.B. Williams. 1943. The | |
relation between the number of species and the number of | |
individuals in a random sample of an animal population. | |
Journal of Animal Ecology, 12:42-58. | |
.. [3] D. J. Hand, F. Daly, D. Lunn, E. Ostrowski, A Handbook of Small | |
Data Sets, CRC Press, 1994. | |
.. [4] Wikipedia, "Logarithmic-distribution", | |
http://en.wikipedia.org/wiki/Logarithmic-distribution | |
Examples | |
-------- | |
Draw samples from the distribution: | |
>>> a = .6 | |
>>> s = np.random.logseries(a, 10000) | |
>>> count, bins, ignored = plt.hist(s) | |
# plot against distribution | |
>>> def logseries(k, p): | |
... return -p**k/(k*log(1-p)) | |
>>> plt.plot(bins, logseries(bins, a)*count.max()/ | |
logseries(bins, a).max(), 'r') | |
>>> plt.show() | |
""" | |
cdef ndarray op | |
cdef double fp | |
fp = PyFloat_AsDouble(p) | |
if not PyErr_Occurred(): | |
if fp <= 0.0: | |
raise ValueError("p <= 0.0") | |
if fp >= 1.0: | |
raise ValueError("p >= 1.0") | |
return discd_array_sc(self.internal_state, rk_logseries, size, fp, | |
self.lock) | |
PyErr_Clear() | |
op = <ndarray>PyArray_FROM_OTF(p, NPY_DOUBLE, NPY_ARRAY_ALIGNED) | |
if np.any(np.less_equal(op, 0.0)): | |
raise ValueError("p <= 0.0") | |
if np.any(np.greater_equal(op, 1.0)): | |
raise ValueError("p >= 1.0") | |
return discd_array(self.internal_state, rk_logseries, size, op, self.lock) | |
# Multivariate distributions: | |
def multivariate_normal(self, mean, cov, size=None): | |
""" | |
multivariate_normal(mean, cov[, size]) | |
Draw random samples from a multivariate normal distribution. | |
The multivariate normal, multinormal or Gaussian distribution is a | |
generalization of the one-dimensional normal distribution to higher | |
dimensions. Such a distribution is specified by its mean and | |
covariance matrix. These parameters are analogous to the mean | |
(average or "center") and variance (standard deviation, or "width," | |
squared) of the one-dimensional normal distribution. | |
Parameters | |
---------- | |
mean : 1-D array_like, of length N | |
Mean of the N-dimensional distribution. | |
cov : 2-D array_like, of shape (N, N) | |
Covariance matrix of the distribution. Must be symmetric and | |
positive-semidefinite for "physically meaningful" results. | |
size : int or tuple of ints, optional | |
Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are | |
generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because | |
each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. | |
If no shape is specified, a single (`N`-D) sample is returned. | |
Returns | |
------- | |
out : ndarray | |
The drawn samples, of shape *size*, if that was provided. If not, | |
the shape is ``(N,)``. | |
In other words, each entry ``out[i,j,...,:]`` is an N-dimensional | |
value drawn from the distribution. | |
Notes | |
----- | |
The mean is a coordinate in N-dimensional space, which represents the | |
location where samples are most likely to be generated. This is | |
analogous to the peak of the bell curve for the one-dimensional or | |
univariate normal distribution. | |
Covariance indicates the level to which two variables vary together. | |
From the multivariate normal distribution, we draw N-dimensional | |
samples, :math:`X = [x_1, x_2, ... x_N]`. The covariance matrix | |
element :math:`C_{ij}` is the covariance of :math:`x_i` and :math:`x_j`. | |
The element :math:`C_{ii}` is the variance of :math:`x_i` (i.e. its | |
"spread"). | |
Instead of specifying the full covariance matrix, popular | |
approximations include: | |
- Spherical covariance (*cov* is a multiple of the identity matrix) | |
- Diagonal covariance (*cov* has non-negative elements, and only on | |
the diagonal) | |
This geometrical property can be seen in two dimensions by plotting | |
generated data-points: | |
>>> mean = [0,0] | |
>>> cov = [[1,0],[0,100]] # diagonal covariance, points lie on x or y-axis | |
>>> import matplotlib.pyplot as plt | |
>>> x,y = np.random.multivariate_normal(mean,cov,5000).T | |
>>> plt.plot(x,y,'x'); plt.axis('equal'); plt.show() | |
Note that the covariance matrix must be non-negative definite. | |
References | |
---------- | |
Papoulis, A., *Probability, Random Variables, and Stochastic Processes*, | |
3rd ed., New York: McGraw-Hill, 1991. | |
Duda, R. O., Hart, P. E., and Stork, D. G., *Pattern Classification*, | |
2nd ed., New York: Wiley, 2001. | |
Examples | |
-------- | |
>>> mean = (1,2) | |
>>> cov = [[1,0],[1,0]] | |
>>> x = np.random.multivariate_normal(mean,cov,(3,3)) | |
>>> x.shape | |
(3, 3, 2) | |
The following is probably true, given that 0.6 is roughly twice the | |
standard deviation: | |
>>> print list( (x[0,0,:] - mean) < 0.6 ) | |
[True, True] | |
""" | |
from numpy.dual import svd | |
# Check preconditions on arguments | |
mean = np.array(mean) | |
cov = np.array(cov) | |
if size is None: | |
shape = [] | |
elif isinstance(size, (int, long, np.integer)): | |
shape = [size] | |
else: | |
shape = size | |
if len(mean.shape) != 1: | |
raise ValueError("mean must be 1 dimensional") | |
if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]): | |
raise ValueError("cov must be 2 dimensional and square") | |
if mean.shape[0] != cov.shape[0]: | |
raise ValueError("mean and cov must have same length") | |
# Compute shape of output and create a matrix of independent | |
# standard normally distributed random numbers. The matrix has rows | |
# with the same length as mean and as many rows are necessary to | |
# form a matrix of shape final_shape. | |
final_shape = list(shape[:]) | |
final_shape.append(mean.shape[0]) | |
x = self.standard_normal(final_shape).reshape(-1, mean.shape[0]) | |
# Transform matrix of standard normals into matrix where each row | |
# contains multivariate normals with the desired covariance. | |
# Compute A such that dot(transpose(A),A) == cov. | |
# Then the matrix products of the rows of x and A has the desired | |
# covariance. Note that sqrt(s)*v where (u,s,v) is the singular value | |
# decomposition of cov is such an A. | |
# | |
# Also check that cov is positive-semidefinite. If so, the u.T and v | |
# matrices should be equal up to roundoff error if cov is | |
# symmetrical and the singular value of the corresponding row is | |
# not zero. We continue to use the SVD rather than Cholesky in | |
# order to preserve current outputs. Note that symmetry has not | |
# been checked. | |
(u, s, v) = svd(cov) | |
neg = (np.sum(u.T * v, axis=1) < 0) & (s > 0) | |
if np.any(neg): | |
s[neg] = 0. | |
warnings.warn("covariance is not positive-semidefinite.", | |
RuntimeWarning) | |
x = np.dot(x, np.sqrt(s)[:, None] * v) | |
x += mean | |
x.shape = tuple(final_shape) | |
return x | |
def multinomial(self, npy_intp n, object pvals, size=None): | |
""" | |
multinomial(n, pvals, size=None) | |
Draw samples from a multinomial distribution. | |
The multinomial distribution is a multivariate generalisation of the | |
binomial distribution. Take an experiment with one of ``p`` | |
possible outcomes. An example of such an experiment is throwing a dice, | |
where the outcome can be 1 through 6. Each sample drawn from the | |
distribution represents `n` such experiments. Its values, | |
``X_i = [X_0, X_1, ..., X_p]``, represent the number of times the outcome | |
was ``i``. | |
Parameters | |
---------- | |
n : int | |
Number of experiments. | |
pvals : sequence of floats, length p | |
Probabilities of each of the ``p`` different outcomes. These | |
should sum to 1 (however, the last element is always assumed to | |
account for the remaining probability, as long as | |
``sum(pvals[:-1]) <= 1)``. | |
size : tuple of ints | |
Given a `size` of ``(M, N, K)``, then ``M*N*K`` samples are drawn, | |
and the output shape becomes ``(M, N, K, p)``, since each sample | |
has shape ``(p,)``. | |
Examples | |
-------- | |
Throw a dice 20 times: | |
>>> np.random.multinomial(20, [1/6.]*6, size=1) | |
array([[4, 1, 7, 5, 2, 1]]) | |
It landed 4 times on 1, once on 2, etc. | |
Now, throw the dice 20 times, and 20 times again: | |
>>> np.random.multinomial(20, [1/6.]*6, size=2) | |
array([[3, 4, 3, 3, 4, 3], | |
[2, 4, 3, 4, 0, 7]]) | |
For the first run, we threw 3 times 1, 4 times 2, etc. For the second, | |
we threw 2 times 1, 4 times 2, etc. | |
A loaded dice is more likely to land on number 6: | |
>>> np.random.multinomial(100, [1/7.]*5) | |
array([13, 16, 13, 16, 42]) | |
""" | |
cdef npy_intp d | |
cdef ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr" | |
cdef double *pix | |
cdef long *mnix | |
cdef npy_intp i, j, dn, sz | |
cdef double Sum | |
d = len(pvals) | |
parr = <ndarray>PyArray_ContiguousFromObject(pvals, NPY_DOUBLE, 1, 1) | |
pix = <double*>PyArray_DATA(parr) | |
if kahan_sum(pix, d-1) > (1.0 + 1e-12): | |
raise ValueError("sum(pvals[:-1]) > 1.0") | |
shape = _shape_from_size(size, d) | |
multin = np.zeros(shape, int) | |
mnarr = <ndarray>multin | |
mnix = <long*>PyArray_DATA(mnarr) | |
sz = PyArray_SIZE(mnarr) | |
with self.lock, nogil: | |
i = 0 | |
while i < sz: | |
Sum = 1.0 | |
dn = n | |
for j from 0 <= j < d-1: | |
mnix[i+j] = rk_binomial(self.internal_state, dn, pix[j]/Sum) | |
dn = dn - mnix[i+j] | |
if dn <= 0: | |
break | |
Sum = Sum - pix[j] | |
if dn > 0: | |
mnix[i+d-1] = dn | |
i = i + d | |
return multin | |
def dirichlet(self, object alpha, size=None): | |
""" | |
dirichlet(alpha, size=None) | |
Draw samples from the Dirichlet distribution. | |
Draw `size` samples of dimension k from a Dirichlet distribution. A | |
Dirichlet-distributed random variable can be seen as a multivariate | |
generalization of a Beta distribution. Dirichlet pdf is the conjugate | |
prior of a multinomial in Bayesian inference. | |
Parameters | |
---------- | |
alpha : array | |
Parameter of the distribution (k dimension for sample of | |
dimension k). | |
size : array | |
Number of samples to draw. | |
Returns | |
------- | |
samples : ndarray, | |
The drawn samples, of shape (alpha.ndim, size). | |
Notes | |
----- | |
.. math:: X \\approx \\prod_{i=1}^{k}{x^{\\alpha_i-1}_i} | |
Uses the following property for computation: for each dimension, | |
draw a random sample y_i from a standard gamma generator of shape | |
`alpha_i`, then | |
:math:`X = \\frac{1}{\\sum_{i=1}^k{y_i}} (y_1, \\ldots, y_n)` is | |
Dirichlet distributed. | |
References | |
---------- | |
.. [1] David McKay, "Information Theory, Inference and Learning | |
Algorithms," chapter 23, | |
http://www.inference.phy.cam.ac.uk/mackay/ | |
.. [2] Wikipedia, "Dirichlet distribution", | |
http://en.wikipedia.org/wiki/Dirichlet_distribution | |
Examples | |
-------- | |
Taking an example cited in Wikipedia, this distribution can be used if | |
one wanted to cut strings (each of initial length 1.0) into K pieces | |
with different lengths, where each piece had, on average, a designated | |
average length, but allowing some variation in the relative sizes of the | |
pieces. | |
>>> s = np.random.dirichlet((10, 5, 3), 20).transpose() | |
>>> plt.barh(range(20), s[0]) | |
>>> plt.barh(range(20), s[1], left=s[0], color='g') | |
>>> plt.barh(range(20), s[2], left=s[0]+s[1], color='r') | |
>>> plt.title("Lengths of Strings") | |
""" | |
#================= | |
# Pure python algo | |
#================= | |
#alpha = N.atleast_1d(alpha) | |
#k = alpha.size | |
#if n == 1: | |
# val = N.zeros(k) | |
# for i in range(k): | |
# val[i] = sgamma(alpha[i], n) | |
# val /= N.sum(val) | |
#else: | |
# val = N.zeros((k, n)) | |
# for i in range(k): | |
# val[i] = sgamma(alpha[i], n) | |
# val /= N.sum(val, axis = 0) | |
# val = val.T | |
#return val | |
cdef npy_intp k | |
cdef npy_intp totsize | |
cdef ndarray alpha_arr, val_arr | |
cdef double *alpha_data | |
cdef double *val_data | |
cdef npy_intp i, j | |
cdef double acc, invacc | |
k = len(alpha) | |
alpha_arr = <ndarray>PyArray_ContiguousFromObject(alpha, NPY_DOUBLE, 1, 1) | |
alpha_data = <double*>PyArray_DATA(alpha_arr) | |
shape = _shape_from_size(size, k) | |
diric = np.zeros(shape, np.float64) | |
val_arr = <ndarray>diric | |
val_data= <double*>PyArray_DATA(val_arr) | |
i = 0 | |
totsize = PyArray_SIZE(val_arr) | |
with self.lock, nogil: | |
while i < totsize: | |
acc = 0.0 | |
for j from 0 <= j < k: | |
val_data[i+j] = rk_standard_gamma(self.internal_state, | |
alpha_data[j]) | |
acc = acc + val_data[i+j] | |
invacc = 1/acc | |
for j from 0 <= j < k: | |
val_data[i+j] = val_data[i+j] * invacc | |
i = i + k | |
return diric | |
# Shuffling and permutations: | |
def shuffle(self, object x): | |
""" | |
shuffle(x) | |
Modify a sequence in-place by shuffling its contents. | |
Parameters | |
---------- | |
x : array_like | |
The array or list to be shuffled. | |
Returns | |
------- | |
None | |
Examples | |
-------- | |
>>> arr = np.arange(10) | |
>>> np.random.shuffle(arr) | |
>>> arr | |
[1 7 5 2 9 4 3 6 0 8] | |
This function only shuffles the array along the first index of a | |
multi-dimensional array: | |
>>> arr = np.arange(9).reshape((3, 3)) | |
>>> np.random.shuffle(arr) | |
>>> arr | |
array([[3, 4, 5], | |
[6, 7, 8], | |
[0, 1, 2]]) | |
""" | |
cdef npy_intp i, j | |
i = len(x) - 1 | |
# Logic adapted from random.shuffle() | |
if isinstance(x, np.ndarray) and \ | |
(x.ndim > 1 or x.dtype.fields is not None): | |
# For a multi-dimensional ndarray, indexing returns a view onto | |
# each row. So we can't just use ordinary assignment to swap the | |
# rows; we need a bounce buffer. | |
buf = np.empty_like(x[0]) | |
while i > 0: | |
j = rk_interval(i, self.internal_state) | |
buf[...] = x[j] | |
x[j] = x[i] | |
x[i] = buf | |
i = i - 1 | |
else: | |
# For single-dimensional arrays, lists, and any other Python | |
# sequence types, indexing returns a real object that's | |
# independent of the array contents, so we can just swap directly. | |
while i > 0: | |
j = rk_interval(i, self.internal_state) | |
x[i], x[j] = x[j], x[i] | |
i = i - 1 | |
def permutation(self, object x): | |
""" | |
permutation(x) | |
Randomly permute a sequence, or return a permuted range. | |
If `x` is a multi-dimensional array, it is only shuffled along its | |
first index. | |
Parameters | |
---------- | |
x : int or array_like | |
If `x` is an integer, randomly permute ``np.arange(x)``. | |
If `x` is an array, make a copy and shuffle the elements | |
randomly. | |
Returns | |
------- | |
out : ndarray | |
Permuted sequence or array range. | |
Examples | |
-------- | |
>>> np.random.permutation(10) | |
array([1, 7, 4, 3, 0, 9, 2, 5, 8, 6]) | |
>>> np.random.permutation([1, 4, 9, 12, 15]) | |
array([15, 1, 9, 4, 12]) | |
>>> arr = np.arange(9).reshape((3, 3)) | |
>>> np.random.permutation(arr) | |
array([[6, 7, 8], | |
[0, 1, 2], | |
[3, 4, 5]]) | |
""" | |
if isinstance(x, (int, long, np.integer)): | |
arr = np.arange(x) | |
else: | |
arr = np.array(x) | |
self.shuffle(arr) | |
return arr | |
_rand = RandomState() | |
seed = _rand.seed | |
get_state = _rand.get_state | |
set_state = _rand.set_state | |
random_sample = _rand.random_sample | |
choice = _rand.choice | |
randint = _rand.randint | |
bytes = _rand.bytes | |
uniform = _rand.uniform | |
rand = _rand.rand | |
randn = _rand.randn | |
random_integers = _rand.random_integers | |
standard_normal = _rand.standard_normal | |
normal = _rand.normal | |
beta = _rand.beta | |
exponential = _rand.exponential | |
standard_exponential = _rand.standard_exponential | |
standard_gamma = _rand.standard_gamma | |
gamma = _rand.gamma | |
f = _rand.f | |
noncentral_f = _rand.noncentral_f | |
chisquare = _rand.chisquare | |
noncentral_chisquare = _rand.noncentral_chisquare | |
standard_cauchy = _rand.standard_cauchy | |
standard_t = _rand.standard_t | |
vonmises = _rand.vonmises | |
pareto = _rand.pareto | |
weibull = _rand.weibull | |
power = _rand.power | |
laplace = _rand.laplace | |
gumbel = _rand.gumbel | |
logistic = _rand.logistic | |
lognormal = _rand.lognormal | |
rayleigh = _rand.rayleigh | |
wald = _rand.wald | |
triangular = _rand.triangular | |
binomial = _rand.binomial | |
negative_binomial = _rand.negative_binomial | |
poisson = _rand.poisson | |
zipf = _rand.zipf | |
geometric = _rand.geometric | |
hypergeometric = _rand.hypergeometric | |
logseries = _rand.logseries | |
multivariate_normal = _rand.multivariate_normal | |
multinomial = _rand.multinomial | |
dirichlet = _rand.dirichlet | |
shuffle = _rand.shuffle | |
permutation = _rand.permutation | |