tmp
/
pip-install-ghxuqwgs
/numpy_78e94bf2b6094bf9a1f3d92042f9bf46
/numpy
/linalg
/umath_linalg.c.src
/* -*- c -*- */ | |
/* | |
***************************************************************************** | |
** INCLUDES ** | |
***************************************************************************** | |
*/ | |
#define NPY_NO_DEPRECATED_API NPY_API_VERSION | |
#include "Python.h" | |
#include "numpy/arrayobject.h" | |
#include "numpy/ufuncobject.h" | |
#include "npy_pycompat.h" | |
#include "npy_config.h" | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <math.h> | |
static const char* umath_linalg_version_string = "0.1.4"; | |
/* | |
**************************************************************************** | |
* Debugging support * | |
**************************************************************************** | |
*/ | |
#define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0) | |
#define STACK_TRACE do {} while (0) | |
#define TRACE\ | |
do { \ | |
fprintf (stderr, \ | |
"%s:%d:%s\n", \ | |
__FILE__, \ | |
__LINE__, \ | |
__FUNCTION__); \ | |
STACK_TRACE; \ | |
} while (0) | |
#if 0 | |
#include <execinfo.h> | |
void | |
dbg_stack_trace() | |
{ | |
void *trace[32]; | |
size_t size; | |
size = backtrace(trace, sizeof(trace)/sizeof(trace[0])); | |
backtrace_symbols_fd(trace, size, 1); | |
} | |
#undef STACK_TRACE | |
#define STACK_TRACE do { dbg_stack_trace(); } while (0) | |
#endif | |
/* | |
***************************************************************************** | |
* BLAS/LAPACK calling macros * | |
***************************************************************************** | |
*/ | |
#ifdef NO_APPEND_FORTRAN | |
# define FNAME(x) x | |
#else | |
# define FNAME(x) x##_ | |
#endif | |
typedef struct { float r, i; } f2c_complex; | |
typedef struct { double r, i; } f2c_doublecomplex; | |
/* typedef long int (*L_fp)(); */ | |
extern int | |
FNAME(sgeev)(char *jobvl, char *jobvr, int *n, | |
float a[], int *lda, float wr[], float wi[], | |
float vl[], int *ldvl, float vr[], int *ldvr, | |
float work[], int lwork[], | |
int *info); | |
extern int | |
FNAME(dgeev)(char *jobvl, char *jobvr, int *n, | |
double a[], int *lda, double wr[], double wi[], | |
double vl[], int *ldvl, double vr[], int *ldvr, | |
double work[], int lwork[], | |
int *info); | |
extern int | |
FNAME(cgeev)(char *jobvl, char *jobvr, int *n, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex w[], | |
f2c_doublecomplex vl[], int *ldvl, | |
f2c_doublecomplex vr[], int *ldvr, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], | |
int *info); | |
extern int | |
FNAME(zgeev)(char *jobvl, char *jobvr, int *n, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex w[], | |
f2c_doublecomplex vl[], int *ldvl, | |
f2c_doublecomplex vr[], int *ldvr, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], | |
int *info); | |
extern int | |
FNAME(ssyevd)(char *jobz, char *uplo, int *n, | |
float a[], int *lda, float w[], float work[], | |
int *lwork, int iwork[], int *liwork, | |
int *info); | |
extern int | |
FNAME(dsyevd)(char *jobz, char *uplo, int *n, | |
double a[], int *lda, double w[], double work[], | |
int *lwork, int iwork[], int *liwork, | |
int *info); | |
extern int | |
FNAME(cheevd)(char *jobz, char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
float w[], f2c_complex work[], | |
int *lwork, float rwork[], int *lrwork, int iwork[], | |
int *liwork, | |
int *info); | |
extern int | |
FNAME(zheevd)(char *jobz, char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
double w[], f2c_doublecomplex work[], | |
int *lwork, double rwork[], int *lrwork, int iwork[], | |
int *liwork, | |
int *info); | |
extern int | |
FNAME(dgelsd)(int *m, int *n, int *nrhs, | |
double a[], int *lda, double b[], int *ldb, | |
double s[], double *rcond, int *rank, | |
double work[], int *lwork, int iwork[], | |
int *info); | |
extern int | |
FNAME(zgelsd)(int *m, int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex b[], int *ldb, | |
double s[], double *rcond, int *rank, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], int iwork[], | |
int *info); | |
extern int | |
FNAME(sgesv)(int *n, int *nrhs, | |
float a[], int *lda, | |
int ipiv[], | |
float b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(dgesv)(int *n, int *nrhs, | |
double a[], int *lda, | |
int ipiv[], | |
double b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(cgesv)(int *n, int *nrhs, | |
f2c_complex a[], int *lda, | |
int ipiv[], | |
f2c_complex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(zgesv)(int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
int ipiv[], | |
f2c_doublecomplex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(sgetrf)(int *m, int *n, | |
float a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(dgetrf)(int *m, int *n, | |
double a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(cgetrf)(int *m, int *n, | |
f2c_complex a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(zgetrf)(int *m, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(spotrf)(char *uplo, int *n, | |
float a[], int *lda, | |
int *info); | |
extern int | |
FNAME(dpotrf)(char *uplo, int *n, | |
double a[], int *lda, | |
int *info); | |
extern int | |
FNAME(cpotrf)(char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(zpotrf)(char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(sgesdd)(char *jobz, int *m, int *n, | |
float a[], int *lda, float s[], float u[], | |
int *ldu, float vt[], int *ldvt, float work[], | |
int *lwork, int iwork[], int *info); | |
extern int | |
FNAME(dgesdd)(char *jobz, int *m, int *n, | |
double a[], int *lda, double s[], double u[], | |
int *ldu, double vt[], int *ldvt, double work[], | |
int *lwork, int iwork[], int *info); | |
extern int | |
FNAME(cgesdd)(char *jobz, int *m, int *n, | |
f2c_complex a[], int *lda, | |
float s[], f2c_complex u[], int *ldu, | |
f2c_complex vt[], int *ldvt, | |
f2c_complex work[], int *lwork, | |
float rwork[], int iwork[], int *info); | |
extern int | |
FNAME(zgesdd)(char *jobz, int *m, int *n, | |
f2c_doublecomplex a[], int *lda, | |
double s[], f2c_doublecomplex u[], int *ldu, | |
f2c_doublecomplex vt[], int *ldvt, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], int iwork[], int *info); | |
extern int | |
FNAME(spotrs)(char *uplo, int *n, int *nrhs, | |
float a[], int *lda, | |
float b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(dpotrs)(char *uplo, int *n, int *nrhs, | |
double a[], int *lda, | |
double b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(cpotrs)(char *uplo, int *n, int *nrhs, | |
f2c_complex a[], int *lda, | |
f2c_complex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(zpotrs)(char *uplo, int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(spotri)(char *uplo, int *n, | |
float a[], int *lda, | |
int *info); | |
extern int | |
FNAME(dpotri)(char *uplo, int *n, | |
double a[], int *lda, | |
int *info); | |
extern int | |
FNAME(cpotri)(char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(zpotri)(char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(scopy)(int *n, | |
float *sx, int *incx, | |
float *sy, int *incy); | |
extern int | |
FNAME(dcopy)(int *n, | |
double *sx, int *incx, | |
double *sy, int *incy); | |
extern int | |
FNAME(ccopy)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern int | |
FNAME(zcopy)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern float | |
FNAME(sdot)(int *n, | |
float *sx, int *incx, | |
float *sy, int *incy); | |
extern double | |
FNAME(ddot)(int *n, | |
double *sx, int *incx, | |
double *sy, int *incy); | |
extern f2c_complex | |
FNAME(cdotu)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern f2c_doublecomplex | |
FNAME(zdotu)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern f2c_complex | |
FNAME(cdotc)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern f2c_doublecomplex | |
FNAME(zdotc)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern int | |
FNAME(sgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
float *alpha, | |
float *a, int *lda, | |
float *b, int *ldb, | |
float *beta, | |
float *c, int *ldc); | |
extern int | |
FNAME(dgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
double *alpha, | |
double *a, int *lda, | |
double *b, int *ldb, | |
double *beta, | |
double *c, int *ldc); | |
extern int | |
FNAME(cgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
f2c_complex *alpha, | |
f2c_complex *a, int *lda, | |
f2c_complex *b, int *ldb, | |
f2c_complex *beta, | |
f2c_complex *c, int *ldc); | |
extern int | |
FNAME(zgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
f2c_doublecomplex *alpha, | |
f2c_doublecomplex *a, int *lda, | |
f2c_doublecomplex *b, int *ldb, | |
f2c_doublecomplex *beta, | |
f2c_doublecomplex *c, int *ldc); | |
#define LAPACK_T(FUNC) \ | |
TRACE_TXT("Calling LAPACK ( " # FUNC " )\n"); \ | |
FNAME(FUNC) | |
#define BLAS(FUNC) \ | |
FNAME(FUNC) | |
#define LAPACK(FUNC) \ | |
FNAME(FUNC) | |
typedef int fortran_int; | |
typedef float fortran_real; | |
typedef double fortran_doublereal; | |
typedef f2c_complex fortran_complex; | |
typedef f2c_doublecomplex fortran_doublecomplex; | |
/* | |
***************************************************************************** | |
** Some handy functions ** | |
***************************************************************************** | |
*/ | |
static inline void * | |
offset_ptr(void* ptr, ptrdiff_t offset) | |
{ | |
return (void*)((npy_uint8*)ptr + offset); | |
} | |
static inline int | |
get_fp_invalid_and_clear(void) | |
{ | |
int status; | |
status = npy_clear_floatstatus(); | |
return !!(status & NPY_FPE_INVALID); | |
} | |
static inline void | |
set_fp_invalid_or_clear(int error_occurred) | |
{ | |
if (error_occurred) { | |
npy_set_floatstatus_invalid(); | |
} | |
else { | |
npy_clear_floatstatus(); | |
} | |
} | |
/* | |
***************************************************************************** | |
** Some handy constants ** | |
***************************************************************************** | |
*/ | |
#define UMATH_LINALG_MODULE_NAME "_umath_linalg" | |
typedef union { | |
fortran_complex f; | |
npy_cfloat npy; | |
float array[2]; | |
} COMPLEX_t; | |
typedef union { | |
fortran_doublecomplex f; | |
npy_cdouble npy; | |
double array[2]; | |
} DOUBLECOMPLEX_t; | |
static float s_one; | |
static float s_zero; | |
static float s_minus_one; | |
static float s_ninf; | |
static float s_nan; | |
static double d_one; | |
static double d_zero; | |
static double d_minus_one; | |
static double d_ninf; | |
static double d_nan; | |
static COMPLEX_t c_one; | |
static COMPLEX_t c_zero; | |
static COMPLEX_t c_minus_one; | |
static COMPLEX_t c_ninf; | |
static COMPLEX_t c_nan; | |
static DOUBLECOMPLEX_t z_one; | |
static DOUBLECOMPLEX_t z_zero; | |
static DOUBLECOMPLEX_t z_minus_one; | |
static DOUBLECOMPLEX_t z_ninf; | |
static DOUBLECOMPLEX_t z_nan; | |
static void init_constants(void) | |
{ | |
/* | |
this is needed as NPY_INFINITY and NPY_NAN macros | |
can't be used as initializers. I prefer to just set | |
all the constants the same way. | |
*/ | |
s_one = 1.0f; | |
s_zero = 0.0f; | |
s_minus_one = -1.0f; | |
s_ninf = -NPY_INFINITYF; | |
s_nan = NPY_NANF; | |
d_one = 1.0; | |
d_zero = 0.0; | |
d_minus_one = -1.0; | |
d_ninf = -NPY_INFINITY; | |
d_nan = NPY_NAN; | |
c_one.array[0] = 1.0f; | |
c_one.array[1] = 0.0f; | |
c_zero.array[0] = 0.0f; | |
c_zero.array[1] = 0.0f; | |
c_minus_one.array[0] = -1.0f; | |
c_minus_one.array[1] = 0.0f; | |
c_ninf.array[0] = -NPY_INFINITYF; | |
c_ninf.array[1] = 0.0f; | |
c_nan.array[0] = NPY_NANF; | |
c_nan.array[1] = NPY_NANF; | |
z_one.array[0] = 1.0; | |
z_one.array[1] = 0.0; | |
z_zero.array[0] = 0.0; | |
z_zero.array[1] = 0.0; | |
z_minus_one.array[0] = -1.0; | |
z_minus_one.array[1] = 0.0; | |
z_ninf.array[0] = -NPY_INFINITY; | |
z_ninf.array[1] = 0.0; | |
z_nan.array[0] = NPY_NAN; | |
z_nan.array[1] = NPY_NAN; | |
} | |
/* | |
***************************************************************************** | |
** Structs used for data rearrangement ** | |
***************************************************************************** | |
*/ | |
/* this struct contains information about how to linearize in a local buffer | |
a matrix so that it can be used by blas functions. | |
All strides are specified in number of elements (similar to what blas | |
expects) | |
dst_row_strides: number of elements between different row. Matrix is | |
considered row-major | |
dst_column_strides: number of elements between differnt columns in the | |
destination buffer | |
rows: number of rows of the matrix | |
columns: number of columns of the matrix | |
src_row_strides: strides needed to access the next row in the source matrix | |
src_column_strides: strides needed to access the next column in the source | |
matrix | |
*/ | |
typedef struct linearize_data_struct | |
{ | |
size_t rows; | |
size_t columns; | |
ptrdiff_t row_strides; | |
ptrdiff_t column_strides; | |
} LINEARIZE_DATA_t; | |
static inline void | |
init_linearize_data(LINEARIZE_DATA_t *lin_data, | |
int rows, | |
int columns, | |
ptrdiff_t row_strides, | |
ptrdiff_t column_strides) | |
{ | |
lin_data->rows = rows; | |
lin_data->columns = columns; | |
lin_data->row_strides = row_strides; | |
lin_data->column_strides = column_strides; | |
} | |
static inline void | |
dump_ufunc_object(PyUFuncObject* ufunc) | |
{ | |
TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", | |
ufunc->core_enabled? "generalized ufunc" : "scalar ufunc", | |
ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes); | |
if (ufunc->core_enabled) { | |
int arg; | |
int dim; | |
TRACE_TXT("\t%s (%d dimension(s) detected).\n", | |
ufunc->core_signature, ufunc->core_num_dim_ix); | |
for (arg = 0; arg < ufunc->nargs; arg++){ | |
int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg]; | |
TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (", | |
arg, arg < ufunc->nin? "INPUT" : "OUTPUT", | |
ufunc->core_num_dims[arg]); | |
for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) { | |
TRACE_TXT(" %d", arg_dim_ix[dim]); | |
} | |
TRACE_TXT(" )\n"); | |
} | |
} | |
} | |
static inline void | |
dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) | |
{ | |
TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ | |
"\n\t\trow_strides: %td column_strides: %td"\ | |
"\n", name, params->rows, params->columns, | |
params->row_strides, params->column_strides); | |
} | |
static inline float | |
FLOAT_add(float op1, float op2) | |
{ | |
return op1 + op2; | |
} | |
static inline double | |
DOUBLE_add(double op1, double op2) | |
{ | |
return op1 + op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_add(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0] + op2.array[0]; | |
result.array[1] = op1.array[1] + op2.array[1]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_add(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0] + op2.array[0]; | |
result.array[1] = op1.array[1] + op2.array[1]; | |
return result; | |
} | |
static inline float | |
FLOAT_mul(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline double | |
DOUBLE_mul(double op1, double op2) | |
{ | |
return op1*op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_mul(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_mul(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
return result; | |
} | |
static inline float | |
FLOAT_mulc(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline double | |
DOUBLE_mulc(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_mulc(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_mulc(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
return result; | |
} | |
static inline void | |
print_FLOAT(npy_float s) | |
{ | |
TRACE_TXT(" %8.4f", s); | |
} | |
static inline void | |
print_DOUBLE(npy_double d) | |
{ | |
TRACE_TXT(" %10.6f", d); | |
} | |
static inline void | |
print_CFLOAT(npy_cfloat c) | |
{ | |
float* c_parts = (float*)&c; | |
TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); | |
} | |
static inline void | |
print_CDOUBLE(npy_cdouble z) | |
{ | |
double* z_parts = (double*)&z; | |
TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); | |
} | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
*/ | |
static inline void | |
dump_@TYPE@_matrix(const char* name, | |
size_t rows, size_t columns, | |
const @typ@* ptr) | |
{ | |
size_t i,j; | |
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
for (i=0; i<rows; i++) | |
{ | |
TRACE_TXT("| "); | |
for (j=0; j<columns; j++) | |
{ | |
print_@TYPE@(ptr[j*rows + i]); | |
TRACE_TXT(", "); | |
} | |
TRACE_TXT(" |\n"); | |
} | |
} | |
/**end repeat**/ | |
/* | |
***************************************************************************** | |
** Basics ** | |
***************************************************************************** | |
*/ | |
#define INIT_OUTER_LOOP_1 \ | |
npy_intp dN = *dimensions++;\ | |
npy_intp N_;\ | |
npy_intp s0 = *steps++; | |
#define INIT_OUTER_LOOP_2 \ | |
INIT_OUTER_LOOP_1\ | |
npy_intp s1 = *steps++; | |
#define INIT_OUTER_LOOP_3 \ | |
INIT_OUTER_LOOP_2\ | |
npy_intp s2 = *steps++; | |
#define INIT_OUTER_LOOP_4 \ | |
INIT_OUTER_LOOP_3\ | |
npy_intp s3 = *steps++; | |
#define INIT_OUTER_LOOP_5 \ | |
INIT_OUTER_LOOP_4\ | |
npy_intp s4 = *steps++; | |
#define INIT_OUTER_LOOP_6 \ | |
INIT_OUTER_LOOP_5\ | |
npy_intp s5 = *steps++; | |
#define BEGIN_OUTER_LOOP_2 \ | |
for (N_ = 0;\ | |
N_ < dN;\ | |
N_++, args[0] += s0,\ | |
args[1] += s1) { | |
#define BEGIN_OUTER_LOOP_3 \ | |
for (N_ = 0;\ | |
N_ < dN;\ | |
N_++, args[0] += s0,\ | |
args[1] += s1,\ | |
args[2] += s2) { | |
#define BEGIN_OUTER_LOOP_4 \ | |
for (N_ = 0;\ | |
N_ < dN;\ | |
N_++, args[0] += s0,\ | |
args[1] += s1,\ | |
args[2] += s2,\ | |
args[3] += s3) { | |
#define BEGIN_OUTER_LOOP_5 \ | |
for (N_ = 0;\ | |
N_ < dN;\ | |
N_++, args[0] += s0,\ | |
args[1] += s1,\ | |
args[2] += s2,\ | |
args[3] += s3,\ | |
args[4] += s4) { | |
#define BEGIN_OUTER_LOOP_6 \ | |
for (N_ = 0;\ | |
N_ < dN;\ | |
N_++, args[0] += s0,\ | |
args[1] += s1,\ | |
args[2] += s2,\ | |
args[3] += s3,\ | |
args[4] += s4,\ | |
args[5] += s5) { | |
#define END_OUTER_LOOP } | |
static inline void | |
update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) | |
{ | |
size_t i; | |
for (i=0; i < count; ++i) { | |
bases[i] += offsets[i]; | |
} | |
} | |
/* disable -Wmaybe-uninitialized as there is some code that generate false | |
positives with this warning | |
*/ | |
#pragma GCC diagnostic push | |
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" | |
/* | |
***************************************************************************** | |
** HELPER FUNCS ** | |
***************************************************************************** | |
*/ | |
/* rearranging of 2D matrices using blas */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
#copy=scopy,dcopy,ccopy,zcopy# | |
#nan=s_nan, d_nan, c_nan, z_nan# | |
*/ | |
static inline void * | |
linearize_@TYPE@_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
@typ@ *src = (@typ@ *) src_in; | |
@typ@ *dst = (@typ@ *) dst_in; | |
if (dst) { | |
int i, j; | |
@typ@* rv = dst; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(@typ@)); | |
fortran_int one = 1; | |
for (i=0; i< data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(@copy@)(&columns, | |
(void*)src, &column_strides, | |
(void*)dst, &one); | |
} | |
else if (column_strides < 0) { | |
FNAME(@copy@)(&columns, | |
(void*)((@typ@*)src + (columns-1)*column_strides), | |
&column_strides, | |
(void*)dst, &one); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
for (j = 0; j < columns; ++j) { | |
memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@)); | |
} | |
} | |
src += data->row_strides/sizeof(@typ@); | |
dst += data->columns; | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void * | |
delinearize_@TYPE@_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
@typ@ *src = (@typ@ *) src_in; | |
@typ@ *dst = (@typ@ *) dst_in; | |
if (src) { | |
int i; | |
@typ@ *rv = src; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(@typ@)); | |
fortran_int one = 1; | |
for (i=0; i < data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(@copy@)(&columns, | |
(void*)src, &one, | |
(void*)dst, &column_strides); | |
} | |
else if (column_strides < 0) { | |
FNAME(@copy@)(&columns, | |
(void*)src, &one, | |
(void*)((@typ@*)dst + (columns-1)*column_strides), | |
&column_strides); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
if (columns > 0) { | |
memcpy((@typ@*)dst, (@typ@*)src + (columns-1), sizeof(@typ@)); | |
} | |
} | |
src += data->columns; | |
dst += data->row_strides/sizeof(@typ@); | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void | |
nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
{ | |
@typ@ *dst = (@typ@ *) dst_in; | |
int i,j; | |
for (i=0; i < data->rows; i++) { | |
@typ@ *cp = dst; | |
ptrdiff_t cs = data->column_strides/sizeof(@typ@); | |
for (j=0; j< data->columns; ++j) { | |
*cp = @nan@; | |
cp += cs; | |
} | |
dst += data->row_strides/sizeof(@typ@); | |
} | |
} | |
/**end repeat**/ | |
/* identity square matrix generation */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
#cblas_type=s,d,c,z# | |
*/ | |
static inline void | |
identity_@TYPE@_matrix(void *ptr, size_t n) | |
{ | |
size_t i; | |
@typ@ *matrix = (@typ@*) ptr; | |
/* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
memset(matrix, 0, n*n*sizeof(@typ@)); | |
for (i = 0; i < n; ++i) | |
{ | |
*matrix = @cblas_type@_one; | |
matrix += n+1; | |
} | |
} | |
/**end repeat**/ | |
/* lower/upper triangular matrix using blas (in place) */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# | |
#cblas_type=s,d,c,z# | |
*/ | |
static inline void | |
triu_@TYPE@_matrix(void *ptr, size_t n) | |
{ | |
size_t i,j; | |
@typ@ *matrix = (@typ@*)ptr; | |
matrix += n; | |
for (i=1; i < n; ++i) { | |
for (j=0; j<i; ++j) { | |
matrix[j] = @cblas_type@_zero; | |
} | |
matrix += n; | |
} | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* Determinants */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE# | |
#typ=npy_float, npy_double# | |
#log_func=npy_logf,npy_log# | |
#exp_func=npy_expf,npy_exp# | |
#zero=0.0f,0.0# | |
*/ | |
static inline void | |
@TYPE@_slogdet_from_factored_diagonal(@typ@* src, | |
fortran_int m, | |
@typ@ *sign, | |
@typ@ *logdet) | |
{ | |
@typ@ acc_sign = *sign; | |
@typ@ acc_logdet = @zero@; | |
int i; | |
for (i = 0; i < m; i++) { | |
@typ@ abs_element = *src; | |
if (abs_element < @zero@) { | |
acc_sign = -acc_sign; | |
abs_element = -abs_element; | |
} | |
acc_logdet += @log_func@(abs_element); | |
src += m+1; | |
} | |
*sign = acc_sign; | |
*logdet = acc_logdet; | |
} | |
static inline @typ@ | |
@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet) | |
{ | |
@typ@ result = sign * @exp_func@(logdet); | |
return result; | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=CFLOAT,CDOUBLE# | |
#typ=npy_cfloat, npy_cdouble# | |
#basetyp=npy_float, npy_double# | |
#abs_func=npy_cabsf, npy_cabs# | |
#log_func=npy_logf, npy_log# | |
#exp_func=npy_expf, npy_exp# | |
#zero=0.0f,0.0# | |
*/ | |
#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0]) | |
#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1]) | |
static inline @typ@ | |
@TYPE@_mult(@typ@ op1, @typ@ op2) | |
{ | |
@typ@ rv; | |
RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); | |
IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); | |
return rv; | |
} | |
static inline void | |
@TYPE@_slogdet_from_factored_diagonal(@typ@* src, | |
fortran_int m, | |
@typ@ *sign, | |
@basetyp@ *logdet) | |
{ | |
int i; | |
@typ@ sign_acc = *sign; | |
@basetyp@ logdet_acc = @zero@; | |
for (i = 0; i < m; i++) | |
{ | |
@basetyp@ abs_element = @abs_func@(*src); | |
@typ@ sign_element; | |
RE(sign_element) = RE(*src) / abs_element; | |
IM(sign_element) = IM(*src) / abs_element; | |
sign_acc = @TYPE@_mult(sign_acc, sign_element); | |
logdet_acc += @log_func@(abs_element); | |
src += m + 1; | |
} | |
*sign = sign_acc; | |
*logdet = logdet_acc; | |
} | |
static inline @typ@ | |
@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet) | |
{ | |
@typ@ tmp; | |
RE(tmp) = @exp_func@(logdet); | |
IM(tmp) = @zero@; | |
return @TYPE@_mult(sign, tmp); | |
} | |
#undef RE | |
#undef IM | |
/**end repeat**/ | |
/* As in the linalg package, the determinant is computed via LU factorization | |
* using LAPACK. | |
* slogdet computes sign + log(determinant). | |
* det computes sign * exp(slogdet). | |
*/ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
#basetyp=npy_float,npy_double,npy_float,npy_double# | |
#cblas_type=s,d,c,z# | |
*/ | |
static inline void | |
@TYPE@_slogdet_single_element(fortran_int m, | |
void* src, | |
fortran_int* pivots, | |
@typ@ *sign, | |
@basetyp@ *logdet) | |
{ | |
fortran_int info = 0; | |
int i; | |
/* note: done in place */ | |
LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &m, pivots, &info); | |
if (info == 0) | |
{ | |
int change_sign = 0; | |
/* note: fortran uses 1 based indexing */ | |
for (i=0; i < m; i++) | |
{ | |
change_sign += (pivots[i] != (i+1)); | |
} | |
memcpy(sign, | |
(change_sign % 2)? | |
&@cblas_type@_minus_one : | |
&@cblas_type@_one | |
, sizeof(*sign)); | |
@TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
} else { | |
/* | |
if getrf fails, use 0 as sign and -inf as logdet | |
*/ | |
memcpy(sign, &@cblas_type@_zero, sizeof(*sign)); | |
memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet)); | |
} | |
} | |
static void | |
@TYPE@_slogdet(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff = NULL; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_3 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(@typ@); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_3 | |
linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); | |
@TYPE@_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
(@typ@*)args[1], | |
(@basetyp@*)args[2]); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static void | |
@TYPE@_det(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_2 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(@typ@); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
@typ@ sign; | |
@basetyp@ logdet; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_2 | |
linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); | |
@TYPE@_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
&sign, | |
&logdet); | |
*(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* Eigh family */ | |
typedef struct eigh_params_struct { | |
void *A; /* matrix */ | |
void *W; /* eigenvalue vector */ | |
void *WORK; /* main work buffer */ | |
void *RWORK; /* secondary work buffer (for complex versions) */ | |
void *IWORK; | |
fortran_int N; | |
fortran_int LWORK; | |
fortran_int LRWORK; | |
fortran_int LIWORK; | |
char JOBZ; | |
char UPLO; | |
} EIGH_PARAMS_t; | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE# | |
#typ=npy_float,npy_double# | |
#ftyp=fortran_real,fortran_doublereal# | |
#lapack_func=ssyevd,dsyevd# | |
*/ | |
/* | |
* Initialize the parameters to use in for the lapack function _syevd | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
@typ@ query_work_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int liwork = -1; | |
fortran_int info; | |
npy_uint8 *a, *w, *work, *iwork; | |
size_t alloc_size = N*(N+1)*sizeof(@typ@); | |
mem_buff = malloc(alloc_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff + N*N*sizeof(@typ@); | |
LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N, | |
(@ftyp@*)a, &N, (@ftyp@*)w, | |
&query_work_size, &lwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
work = mem_buff; | |
lwork = (fortran_int)query_work_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
iwork = mem_buff2 + lwork*sizeof(@typ@); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = NULL; /* unused */ | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = 0; /* unused */ | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
/* something failed */ | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=CFLOAT,CDOUBLE# | |
#typ=npy_cfloat,npy_cdouble# | |
#basetyp=npy_float,npy_double# | |
#ftyp=fortran_complex,fortran_doublecomplex# | |
#fbasetyp=fortran_real,fortran_doublereal# | |
#lapack_func=cheevd,zheevd# | |
*/ | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_@lapack_func@(EIGH_PARAMS_t *params, | |
char JOBZ, | |
char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
@ftyp@ query_work_size; | |
@fbasetyp@ query_rwork_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int lrwork = -1; | |
fortran_int liwork = -1; | |
npy_uint8 *a, *w, *work, *rwork, *iwork; | |
fortran_int info; | |
mem_buff = malloc(N*N*sizeof(@typ@)+N*sizeof(@basetyp@)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff+N*N*sizeof(@typ@); | |
LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N, | |
(@ftyp@*)a, &N, (@fbasetyp@*)w, | |
&query_work_size, &lwork, | |
&query_rwork_size, &lrwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
lwork = (fortran_int)*(@fbasetyp@*)&query_work_size; | |
lrwork = (fortran_int)query_rwork_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(@typ@) + | |
lrwork*sizeof(@basetyp@) + | |
liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
rwork = work + lwork*sizeof(@typ@); | |
iwork = rwork + lrwork*sizeof(@basetyp@); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = lrwork; | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
/* something failed */ | |
error: | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, ¶ms->LRWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#BASETYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# | |
#typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
#basetyp=npy_float,npy_double,npy_float,npy_double# | |
#lapack_func=ssyevd,dsyevd,cheevd,zheevd# | |
**/ | |
/* | |
* (M,M)->(M,)(M,M) | |
* dimensions[1] -> M | |
* args[0] -> A[in] | |
* args[1] -> W | |
* args[2] -> A[out] | |
*/ | |
static inline void | |
release_@lapack_func@(EIGH_PARAMS_t *params) | |
{ | |
/* allocated memory in A and WORK */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
@TYPE@_eigh_wrapper(char JOBZ, | |
char UPLO, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[3]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:3; | |
EIGH_PARAMS_t eigh_params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_@lapack_func@(&eigh_params, | |
JOBZ, | |
UPLO, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t matrix_in_ld; | |
LINEARIZE_DATA_t eigenvectors_out_ld; | |
LINEARIZE_DATA_t eigenvalues_out_ld; | |
init_linearize_data(&matrix_in_ld, | |
eigh_params.N, eigh_params.N, | |
steps[1], steps[0]); | |
init_linearize_data(&eigenvalues_out_ld, | |
1, eigh_params.N, | |
0, steps[2]); | |
if ('V' == eigh_params.JOBZ) { | |
init_linearize_data(&eigenvectors_out_ld, | |
eigh_params.N, eigh_params.N, | |
steps[4], steps[3]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
not_ok = call_@lapack_func@(&eigh_params); | |
if (!not_ok) { | |
/* lapack ok, copy result out */ | |
delinearize_@BASETYPE@_matrix(args[1], | |
eigh_params.W, | |
&eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
delinearize_@TYPE@_matrix(args[2], | |
eigh_params.A, | |
&eigenvectors_out_ld); | |
} | |
} else { | |
/* lapack fail, set result to nan */ | |
error_occurred = 1; | |
nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_@lapack_func@(&eigh_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
@TYPE@_eighlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_eighup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
@TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_eigvalshlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
@TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_eigvalshup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
@TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps); | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* Solve family (includes inv) */ | |
typedef struct gesv_params_struct | |
{ | |
void *A; /* A is (N,N) of base type */ | |
void *B; /* B is (N,NRHS) of base type */ | |
fortran_int * IPIV; /* IPIV is (N) */ | |
fortran_int N; | |
fortran_int NRHS; | |
fortran_int LDA; | |
fortran_int LDB; | |
} GESV_PARAMS_t; | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#typ=npy_float,npy_double,npy_cfloat,npy_cdouble# | |
#ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex# | |
#lapack_func=sgesv,dgesv,cgesv,zgesv# | |
*/ | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a, *b, *ipiv; | |
mem_buff = malloc(N*N*sizeof(@ftyp@) + | |
N*NRHS*sizeof(@ftyp@) + | |
N*sizeof(fortran_int)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
b = a + N*N*sizeof(@ftyp@); | |
ipiv = b + N*NRHS*sizeof(@ftyp@); | |
params->A = a; | |
params->B = b; | |
params->IPIV = (fortran_int*)ipiv; | |
params->N = N; | |
params->NRHS = NRHS; | |
params->LDA = N; | |
params->LDB = N; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_@lapack_func@(GESV_PARAMS_t *params) | |
{ | |
/* memory block base is in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_@lapack_func@(GESV_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->N, ¶ms->NRHS, | |
params->A, ¶ms->LDA, | |
params->IPIV, | |
params->B, ¶ms->LDB, | |
&rv); | |
return rv; | |
} | |
static void | |
@TYPE@_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n, nrhs; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
nrhs = (fortran_int)dimensions[1]; | |
if (init_@lapack_func@(¶ms, n, nrhs)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
linearize_@TYPE@_matrix(params.B, args[1], &b_in); | |
not_ok =call_@lapack_func@(¶ms); | |
if (!not_ok) { | |
delinearize_@TYPE@_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_@TYPE@_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_@lapack_func@(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
@TYPE@_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
if (init_@lapack_func@(¶ms, n, 1)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
linearize_@TYPE@_matrix(params.B, args[1], &b_in); | |
not_ok = call_@lapack_func@(¶ms); | |
if (!not_ok) { | |
delinearize_@TYPE@_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_@TYPE@_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_@lapack_func@(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
@TYPE@_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_2 | |
n = (fortran_int)dimensions[0]; | |
if (init_@lapack_func@(¶ms, n, n)) { | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
identity_@TYPE@_matrix(params.B, n); | |
not_ok = call_@lapack_func@(¶ms); | |
if (!not_ok) { | |
delinearize_@TYPE@_matrix(args[1], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_@TYPE@_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_@lapack_func@(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* Cholesky decomposition */ | |
typedef struct potr_params_struct | |
{ | |
void *A; | |
fortran_int N; | |
fortran_int LDA; | |
char UPLO; | |
} POTR_PARAMS_t; | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#ftyp=fortran_real, fortran_doublereal, | |
fortran_complex, fortran_doublecomplex# | |
#lapack_func=spotrf,dpotrf,cpotrf,zpotrf# | |
*/ | |
static inline int | |
init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a; | |
mem_buff = malloc(N*N*sizeof(@ftyp@)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
params->A = a; | |
params->N = N; | |
params->LDA = N; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_@lapack_func@(POTR_PARAMS_t *params) | |
{ | |
/* memory block base in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_@lapack_func@(POTR_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->UPLO, | |
¶ms->N, params->A, ¶ms->LDA, | |
&rv); | |
return rv; | |
} | |
static void | |
@TYPE@_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
{ | |
POTR_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_2 | |
assert(uplo == 'L'); | |
n = (fortran_int)dimensions[0]; | |
if (init_@lapack_func@(¶ms, uplo, n)) | |
{ | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
not_ok = call_@lapack_func@(¶ms); | |
if (!not_ok) { | |
triu_@TYPE@_matrix(params.A, params.N); | |
delinearize_@TYPE@_matrix(args[1], params.A, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_@TYPE@_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_@lapack_func@(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
@TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_cholesky('L', args, dimensions, steps); | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* eig family */ | |
typedef struct geev_params_struct { | |
void *A; | |
void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ | |
void *WI; | |
void *VLR; /* REAL VL buffers for _geev where _ is s, d */ | |
void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */ | |
void *WORK; | |
void *W; /* final w */ | |
void *VL; /* final vl */ | |
void *VR; /* final vr */ | |
fortran_int N; | |
fortran_int LDA; | |
fortran_int LDVL; | |
fortran_int LDVR; | |
fortran_int LWORK; | |
char JOBVL; | |
char JOBVR; | |
} GEEV_PARAMS_t; | |
static inline void | |
dump_geev_params(const char *name, GEEV_PARAMS_t* params) | |
{ | |
TRACE_TXT("\n%s\n" | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %c\n"\ | |
"\t%10s: %c\n", | |
name, | |
"A", params->A, | |
"WR", params->WR, | |
"WI", params->WI, | |
"VLR", params->VLR, | |
"VRR", params->VRR, | |
"WORK", params->WORK, | |
"W", params->W, | |
"VL", params->VL, | |
"VR", params->VR, | |
"N", (int)params->N, | |
"LDA", (int)params->LDA, | |
"LDVL", (int)params->LDVL, | |
"LDVR", (int)params->LDVR, | |
"LWORK", (int)params->LWORK, | |
"JOBVL", params->JOBVL, | |
"JOBVR", params->JOBVR); | |
} | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE# | |
#CTYPE=CFLOAT,CDOUBLE# | |
#typ=float,double# | |
#complextyp=COMPLEX_t,DOUBLECOMPLEX_t# | |
#lapack_func=sgeev,dgeev# | |
#zero=0.0f,0.0# | |
*/ | |
static inline int | |
init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) | |
{ | |
npy_uint8 *mem_buff=NULL; | |
npy_uint8 *mem_buff2=NULL; | |
npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; | |
size_t a_size = n*n*sizeof(@typ@); | |
size_t wr_size = n*sizeof(@typ@); | |
size_t wi_size = n*sizeof(@typ@); | |
size_t vlr_size = jobvl=='V' ? n*n*sizeof(@typ@) : 0; | |
size_t vrr_size = jobvr=='V' ? n*n*sizeof(@typ@) : 0; | |
size_t w_size = wr_size*2; | |
size_t vl_size = vlr_size*2; | |
size_t vr_size = vrr_size*2; | |
size_t work_count = 0; | |
@typ@ work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
/* allocate data for known sizes (all but work) */ | |
mem_buff = malloc(a_size + wr_size + wi_size + | |
vlr_size + vrr_size + | |
w_size + vl_size + vr_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
wr = a + a_size; | |
wi = wr + wr_size; | |
vlr = wi + wi_size; | |
vrr = vlr + vlr_size; | |
w = vrr + vrr_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
LAPACK(@lapack_func@)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)wr, (void *)wi, | |
(void *)vl, &n, (void *)vr, &n, | |
&work_size_query, &do_size_query, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t)work_size_query; | |
mem_buff2 = malloc(work_count*sizeof(@typ@)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = wr; | |
params->WI = wi; | |
params->VLR = vlr; | |
params->VRR = vrr; | |
params->WORK = work; | |
params->W = w; | |
params->VL = vl; | |
params->VR = vr; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->WR, params->WI, | |
params->VLR, ¶ms->LDVL, | |
params->VRR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
&rv); | |
return rv; | |
} | |
static inline void | |
mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = @zero@; | |
} | |
} | |
static inline void | |
mk_@TYPE@_complex_array(@complextyp@ *c, | |
const @typ@ *re, | |
const @typ@ *im, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = im[iter]; | |
} | |
} | |
static inline void | |
mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c, | |
const @typ@ *r, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
@typ@ re = r[iter]; | |
@typ@ im = r[iter+n]; | |
c[iter].array[0] = re; | |
c[iter].array[1] = im; | |
c[iter+n].array[0] = re; | |
c[iter+n].array[1] = -im; | |
} | |
} | |
/* | |
* make the complex eigenvectors from the real array produced by sgeev/zgeev. | |
* c is the array where the results will be left. | |
* r is the source array of reals produced by sgeev/zgeev | |
* i is the eigenvalue imaginary part produced by sgeev/zgeev | |
* n is so that the order of the matrix is n by n | |
*/ | |
static inline void | |
mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c, | |
const @typ@ *r, | |
const @typ@ *i, | |
size_t n) | |
{ | |
size_t iter = 0; | |
while (iter < n) | |
{ | |
if (i[iter] == @zero@) { | |
/* eigenvalue was real, eigenvectors as well... */ | |
mk_@TYPE@_complex_array_from_real(c, r, n); | |
c += n; | |
r += n; | |
iter ++; | |
} else { | |
/* eigenvalue was complex, generate a pair of eigenvectors */ | |
mk_@TYPE@_complex_array_conjugate_pair(c, r, n); | |
c += 2*n; | |
r += 2*n; | |
iter += 2; | |
} | |
} | |
} | |
static inline void | |
process_@lapack_func@_results(GEEV_PARAMS_t *params) | |
{ | |
/* REAL versions of geev need the results to be translated | |
* into complex versions. This is the way to deal with imaginary | |
* results. In our gufuncs we will always return complex arrays! | |
*/ | |
mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N); | |
/* handle the eigenvectors */ | |
if ('V' == params->JOBVL) { | |
mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR, | |
params->WI, params->N); | |
} | |
if ('V' == params->JOBVR) { | |
mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR, | |
params->WI, params->N); | |
} | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=CFLOAT,CDOUBLE# | |
#typ=COMPLEX_t,DOUBLECOMPLEX_t# | |
#ftyp=fortran_complex,fortran_doublecomplex# | |
#realtyp=float,double# | |
#lapack_func=cgeev,zgeev# | |
*/ | |
static inline int | |
init_@lapack_func@(GEEV_PARAMS_t* params, | |
char jobvl, | |
char jobvr, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *w, *vl, *vr, *work, *rwork; | |
size_t a_size = n*n*sizeof(@ftyp@); | |
size_t w_size = n*sizeof(@ftyp@); | |
size_t vl_size = jobvl=='V'? n*n*sizeof(@ftyp@) : 0; | |
size_t vr_size = jobvr=='V'? n*n*sizeof(@ftyp@) : 0; | |
size_t rwork_size = 2*n*sizeof(@realtyp@); | |
size_t work_count = 0; | |
@typ@ work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; | |
mem_buff = malloc(total_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = a + a_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
rwork = vr + vr_size; | |
LAPACK(@lapack_func@)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)w, | |
(void *)vl, &n, (void *)vr, &n, | |
(void *)&work_size_query, &do_size_query, | |
(void *)rwork, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t) work_size_query.array[0]; | |
mem_buff2 = malloc(work_count*sizeof(@ftyp@)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = rwork; | |
params->WI = NULL; | |
params->VLR = NULL; | |
params->VRR = NULL; | |
params->VL = vl; | |
params->VR = vr; | |
params->WORK = work; | |
params->W = w; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->W, | |
params->VL, ¶ms->LDVL, | |
params->VR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
params->WR, /* actually RWORK */ | |
&rv); | |
return rv; | |
} | |
static inline void | |
process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params)) | |
{ | |
/* nothing to do here, complex versions are ready to copy out */ | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CDOUBLE# | |
#COMPLEXTYPE=CFLOAT,CDOUBLE,CDOUBLE# | |
#ftype=fortran_real,fortran_doublereal,fortran_doublecomplex# | |
#lapack_func=sgeev,dgeev,zgeev# | |
*/ | |
static inline void | |
release_@lapack_func@(GEEV_PARAMS_t *params) | |
{ | |
free(params->WORK); | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
@TYPE@_eig_wrapper(char JOBVL, | |
char JOBVR, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = 2; | |
int error_occurred = get_fp_invalid_and_clear(); | |
GEEV_PARAMS_t geev_params; | |
assert(JOBVL == 'N'); | |
STACK_TRACE; | |
op_count += 'V'==JOBVL?1:0; | |
op_count += 'V'==JOBVR?1:0; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_@lapack_func@(&geev_params, | |
JOBVL, JOBVR, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t a_in; | |
LINEARIZE_DATA_t w_out; | |
LINEARIZE_DATA_t vl_out; | |
LINEARIZE_DATA_t vr_out; | |
init_linearize_data(&a_in, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
init_linearize_data(&w_out, | |
1, geev_params.N, | |
0, steps[0]); | |
steps += 1; | |
if ('V' == geev_params.JOBVL) { | |
init_linearize_data(&vl_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
} | |
if ('V' == geev_params.JOBVR) { | |
init_linearize_data(&vr_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
char **arg_iter = args; | |
/* copy the matrix in */ | |
linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in); | |
not_ok = call_@lapack_func@(&geev_params); | |
if (!not_ok) { | |
process_@lapack_func@_results(&geev_params); | |
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
geev_params.W, | |
&w_out); | |
if ('V' == geev_params.JOBVL) | |
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
geev_params.VL, | |
&vl_out); | |
if ('V' == geev_params.JOBVR) | |
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, | |
geev_params.VR, | |
&vr_out); | |
} else { | |
/* geev failed */ | |
error_occurred = 1; | |
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out); | |
if ('V' == geev_params.JOBVL) | |
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out); | |
if ('V' == geev_params.JOBVR) | |
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out); | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_@lapack_func@(&geev_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
@TYPE@_eig(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_eig_wrapper('N', 'V', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_eigvals(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_eig_wrapper('N', 'N', args, dimensions, steps); | |
} | |
/**end repeat**/ | |
/* -------------------------------------------------------------------------- */ | |
/* singular value decomposition */ | |
typedef struct gessd_params_struct | |
{ | |
void *A; | |
void *S; | |
void *U; | |
void *VT; | |
void *WORK; | |
void *RWORK; | |
void *IWORK; | |
fortran_int M; | |
fortran_int N; | |
fortran_int LDA; | |
fortran_int LDU; | |
fortran_int LDVT; | |
fortran_int LWORK; | |
char JOBZ; | |
} GESDD_PARAMS_t; | |
static inline void | |
dump_gesdd_params(const char *name, | |
GESDD_PARAMS_t *params) | |
{ | |
TRACE_TXT("\n%s:\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %15c'%c'\n", | |
name, | |
"A", params->A, | |
"S", params->S, | |
"U", params->U, | |
"VT", params->VT, | |
"WORK", params->WORK, | |
"RWORK", params->RWORK, | |
"IWORK", params->IWORK, | |
"M", (int)params->M, | |
"N", (int)params->N, | |
"LDA", (int)params->LDA, | |
"LDU", (int)params->LDU, | |
"LDVT", (int)params->LDVT, | |
"LWORK", (int)params->LWORK, | |
"JOBZ", ' ',params->JOBZ); | |
} | |
static inline int | |
compute_urows_vtcolumns(char jobz, | |
fortran_int m, fortran_int n, | |
fortran_int *urows, fortran_int *vtcolumns) | |
{ | |
fortran_int min_m_n = m<n?m:n; | |
switch(jobz) | |
{ | |
case 'N': | |
*urows = 0; | |
*vtcolumns = 0; | |
break; | |
case 'A': | |
*urows = m; | |
*vtcolumns = n; | |
break; | |
case 'S': | |
{ | |
*urows = min_m_n; | |
*vtcolumns = min_m_n; | |
} | |
break; | |
default: | |
return 0; | |
} | |
return 1; | |
} | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE# | |
#lapack_func=sgesdd,dgesdd# | |
#ftyp=fortran_real,fortran_doublereal# | |
*/ | |
static inline int | |
init_@lapack_func@(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *s, *u, *vt, *work, *iwork; | |
size_t a_size = (size_t)m*(size_t)n*sizeof(@ftyp@); | |
fortran_int min_m_n = m<n?m:n; | |
size_t s_size = ((size_t)min_m_n)*sizeof(@ftyp@); | |
fortran_int u_row_count, vt_column_count; | |
size_t u_size, vt_size; | |
fortran_int work_count; | |
size_t work_size; | |
size_t iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@); | |
vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@); | |
mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
iwork = vt + vt_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
@ftyp@ work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(@lapack_func@)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)work_size_query; | |
work_size = (size_t)work_count * sizeof(@ftyp@); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->M = m; | |
params->N = n; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = NULL; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff); | |
free(mem_buff2); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=CFLOAT,CDOUBLE# | |
#ftyp=fortran_complex,fortran_doublecomplex# | |
#frealtyp=fortran_real,fortran_doublereal# | |
#typ=COMPLEX_t,DOUBLECOMPLEX_t# | |
#lapack_func=cgesdd,zgesdd# | |
*/ | |
static inline int | |
init_@lapack_func@(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; | |
npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; | |
size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; | |
fortran_int u_row_count, vt_column_count, work_count; | |
fortran_int min_m_n = m<n?m:n; | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
a_size = ((size_t)m)*((size_t)n)*sizeof(@ftyp@); | |
s_size = ((size_t)min_m_n)*sizeof(@frealtyp@); | |
u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@); | |
vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@); | |
rwork_size = 'N'==jobz? | |
7*((size_t)min_m_n) : | |
(5*(size_t)min_m_n*(size_t)min_m_n + 5*(size_t)min_m_n); | |
rwork_size *= sizeof(@ftyp@); | |
iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
mem_buff = malloc(a_size + | |
s_size + | |
u_size + | |
vt_size + | |
rwork_size + | |
iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
rwork = vt + vt_size; | |
iwork = rwork + rwork_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
@ftyp@ work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(@lapack_func@)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)rwork, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)((@typ@*)&work_size_query)->array[0]; | |
work_size = (size_t)work_count * sizeof(@ftyp@); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_@lapack_func@(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
/**end repeat**/ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
#REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# | |
#lapack_func=sgesdd,dgesdd,cgesdd,zgesdd# | |
*/ | |
static inline void | |
release_@lapack_func@(GESDD_PARAMS_t* params) | |
{ | |
/* A and WORK contain allocated blocks */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
@TYPE@_svd_wrapper(char JOBZ, | |
char **args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
int error_occurred = get_fp_invalid_and_clear(); | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:4; | |
GESDD_PARAMS_t params; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_@lapack_func@(¶ms, | |
JOBZ, | |
(fortran_int)dimensions[0], | |
(fortran_int)dimensions[1])) { | |
LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
if ('N' == params.JOBZ) { | |
/* only the singular values are wanted */ | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
} else { | |
fortran_int u_columns, v_rows; | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
if ('S' == params.JOBZ) { | |
u_columns = min_m_n; | |
v_rows = min_m_n; | |
} else { | |
u_columns = params.M; | |
v_rows = params.N; | |
} | |
init_linearize_data(&u_out, | |
u_columns, params.M, | |
steps[3], steps[2]); | |
init_linearize_data(&s_out, | |
1, min_m_n, | |
0, steps[4]); | |
init_linearize_data(&v_out, | |
params.N, v_rows, | |
steps[6], steps[5]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_@TYPE@_matrix(params.A, args[0], &a_in); | |
not_ok = call_@lapack_func@(¶ms); | |
if (!not_ok) { | |
if ('N' == params.JOBZ) { | |
delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out); | |
} else { | |
delinearize_@TYPE@_matrix(args[1], params.U, &u_out); | |
delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out); | |
delinearize_@TYPE@_matrix(args[3], params.VT, &v_out); | |
} | |
} else { | |
error_occurred = 1; | |
if ('N' == params.JOBZ) { | |
nan_@REALTYPE@_matrix(args[1], &s_out); | |
} else { | |
nan_@TYPE@_matrix(args[1], &u_out); | |
nan_@REALTYPE@_matrix(args[2], &s_out); | |
nan_@TYPE@_matrix(args[3], &v_out); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_@lapack_func@(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat*/ | |
/* svd gufunc entry points */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
@TYPE@_svd_N(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_svd_wrapper('N', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_svd_S(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_svd_wrapper('S', args, dimensions, steps); | |
} | |
static void | |
@TYPE@_svd_A(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
@TYPE@_svd_wrapper('A', args, dimensions, steps); | |
} | |
/**end repeat**/ | |
#pragma GCC diagnostic pop | |
/* -------------------------------------------------------------------------- */ | |
/* gufunc registration */ | |
static void *array_of_nulls[] = { | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL | |
}; | |
#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs | |
#define GUFUNC_FUNC_ARRAY_REAL(NAME) \ | |
static PyUFuncGenericFunction \ | |
FUNC_ARRAY_NAME(NAME)[] = { \ | |
FLOAT_ ## NAME, \ | |
DOUBLE_ ## NAME \ | |
} | |
#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME) \ | |
static PyUFuncGenericFunction \ | |
FUNC_ARRAY_NAME(NAME)[] = { \ | |
FLOAT_ ## NAME, \ | |
DOUBLE_ ## NAME, \ | |
CFLOAT_ ## NAME, \ | |
CDOUBLE_ ## NAME \ | |
} | |
/* There are problems with eig in complex single precision. | |
* That kernel is disabled | |
*/ | |
#define GUFUNC_FUNC_ARRAY_EIG(NAME) \ | |
static PyUFuncGenericFunction \ | |
FUNC_ARRAY_NAME(NAME)[] = { \ | |
FLOAT_ ## NAME, \ | |
DOUBLE_ ## NAME, \ | |
CDOUBLE_ ## NAME \ | |
} | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); | |
GUFUNC_FUNC_ARRAY_EIG(eig); | |
GUFUNC_FUNC_ARRAY_EIG(eigvals); | |
static char equal_2_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char equal_3_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
/* second result is logdet, that will always be a REAL */ | |
static char slogdet_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char eigh_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
}; | |
static char eighvals_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char eig_types[] = { | |
NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char eigvals_types[] = { | |
NPY_FLOAT, NPY_CFLOAT, | |
NPY_DOUBLE, NPY_CDOUBLE, | |
NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char svd_1_1_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char svd_1_3_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
}; | |
typedef struct gufunc_descriptor_struct { | |
char *name; | |
char *signature; | |
char *doc; | |
int ntypes; | |
int nin; | |
int nout; | |
PyUFuncGenericFunction *funcs; | |
char *types; | |
} GUFUNC_DESCRIPTOR_t; | |
GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { | |
{ | |
"slogdet", | |
"(m,m)->(),()", | |
"slogdet on the last two dimensions and broadcast on the rest. \n"\ | |
"Results in two arrays, one with sign and the other with log of the"\ | |
" determinants. \n"\ | |
" \"(m,m)->(),()\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(slogdet), | |
slogdet_types | |
}, | |
{ | |
"det", | |
"(m,m)->()", | |
"det of the last two dimensions and broadcast on the rest. \n"\ | |
" \"(m,m)->()\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(det), | |
equal_2_types | |
}, | |
{ | |
"eigh_lo", | |
"(m,m)->(m),(m,m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" lower triangle \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(eighlo), | |
eigh_types | |
}, | |
{ | |
"eigh_up", | |
"(m,m)->(m),(m,m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" upper triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
" eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(eighup), | |
eigh_types | |
}, | |
{ | |
"eigvalsh_lo", | |
"(m,m)->(m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" lower triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors. \n"\ | |
" \"(m,m)->(m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(eigvalshlo), | |
eighvals_types | |
}, | |
{ | |
"eigvalsh_up", | |
"(m,m)->(m)", | |
"eigvalsh on the last two dimension and broadcast to the rest,"\ | |
" using upper triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors.\n"\ | |
" \"(m,m)->(m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(eigvalshup), | |
eighvals_types | |
}, | |
{ | |
"solve", | |
"(m,m),(m,n)->(m,n)", | |
"solve the system a x = b, on the last two dimensions, broadcast"\ | |
" to the rest. \n"\ | |
"Results in a matrices with the solutions. \n"\ | |
" \"(m,m),(m,n)->(m,n)\" \n", | |
4, 2, 1, | |
FUNC_ARRAY_NAME(solve), | |
equal_3_types | |
}, | |
{ | |
"solve1", | |
"(m,m),(m)->(m)", | |
"solve the system a x = b, for b being a vector, broadcast in"\ | |
" the outer dimensions. \n"\ | |
"Results in vectors with the solutions. \n"\ | |
" \"(m,m),(m)->(m)\" \n", | |
4,2,1, | |
FUNC_ARRAY_NAME(solve1), | |
equal_3_types | |
}, | |
{ | |
"inv", | |
"(m,m)->(m,m)", | |
"compute the inverse of the last two dimensions and broadcast"\ | |
" to the rest. \n"\ | |
"Results in the inverse matrices. \n"\ | |
" \"(m,m)->(m,m)\" \n", | |
4,1,1, | |
FUNC_ARRAY_NAME(inv), | |
equal_2_types | |
}, | |
{ | |
"cholesky_lo", | |
"(m,m)->(m,m)", | |
"cholesky decomposition of hermitian positive-definite matrices. \n"\ | |
"Broadcast to all outer dimensions. \n"\ | |
" \"(m,m)->(m,m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(cholesky_lo), | |
equal_2_types | |
}, | |
{ | |
"svd_m", | |
"(m,n)->(m)", | |
"svd when n>=m. ", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(svd_N), | |
svd_1_1_types | |
}, | |
{ | |
"svd_n", | |
"(m,n)->(n)", | |
"svd when n<=m", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(svd_N), | |
svd_1_1_types | |
}, | |
{ | |
"svd_m_s", | |
"(m,n)->(m,m),(m),(m,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_S), | |
svd_1_3_types | |
}, | |
{ | |
"svd_n_s", | |
"(m,n)->(m,n),(n),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_S), | |
svd_1_3_types | |
}, | |
{ | |
"svd_m_f", | |
"(m,n)->(m,m),(m),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_A), | |
svd_1_3_types | |
}, | |
{ | |
"svd_n_f", | |
"(m,n)->(m,m),(n),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_A), | |
svd_1_3_types | |
}, | |
{ | |
"eig", | |
"(m,m)->(m),(m,m)", | |
"eig on the last two dimension and broadcast to the rest. \n"\ | |
"Results in a vector with the eigenvalues and a matrix with the"\ | |
" eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
3, 1, 2, | |
FUNC_ARRAY_NAME(eig), | |
eig_types | |
}, | |
{ | |
"eigvals", | |
"(m,m)->(m)", | |
"eigvals on the last two dimension and broadcast to the rest. \n"\ | |
"Results in a vector of eigenvalues. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
3, 1, 1, | |
FUNC_ARRAY_NAME(eigvals), | |
eigvals_types | |
}, | |
}; | |
static void | |
addUfuncs(PyObject *dictionary) { | |
PyObject *f; | |
int i; | |
const int gufunc_count = sizeof(gufunc_descriptors)/ | |
sizeof(gufunc_descriptors[0]); | |
for (i=0; i < gufunc_count; i++) { | |
GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i]; | |
f = PyUFunc_FromFuncAndDataAndSignature(d->funcs, | |
array_of_nulls, | |
d->types, | |
d->ntypes, | |
d->nin, | |
d->nout, | |
PyUFunc_None, | |
d->name, | |
d->doc, | |
0, | |
d->signature); | |
PyDict_SetItemString(dictionary, d->name, f); | |
#if 0 | |
dump_ufunc_object((PyUFuncObject*) f); | |
#endif | |
Py_DECREF(f); | |
} | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Module initialization stuff */ | |
static PyMethodDef UMath_LinAlgMethods[] = { | |
{NULL, NULL, 0, NULL} /* Sentinel */ | |
}; | |
#if defined(NPY_PY3K) | |
static struct PyModuleDef moduledef = { | |
PyModuleDef_HEAD_INIT, | |
UMATH_LINALG_MODULE_NAME, | |
NULL, | |
-1, | |
UMath_LinAlgMethods, | |
NULL, | |
NULL, | |
NULL, | |
NULL | |
}; | |
#endif | |
#if defined(NPY_PY3K) | |
#define RETVAL m | |
PyObject *PyInit__umath_linalg(void) | |
#else | |
#define RETVAL | |
PyMODINIT_FUNC | |
init_umath_linalg(void) | |
#endif | |
{ | |
PyObject *m; | |
PyObject *d; | |
PyObject *version; | |
init_constants(); | |
#if defined(NPY_PY3K) | |
m = PyModule_Create(&moduledef); | |
#else | |
m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods); | |
#endif | |
if (m == NULL) | |
return RETVAL; | |
import_array(); | |
import_ufunc(); | |
d = PyModule_GetDict(m); | |
version = PyString_FromString(umath_linalg_version_string); | |
PyDict_SetItemString(d, "__version__", version); | |
Py_DECREF(version); | |
/* Load the ufunc operators into the module's namespace */ | |
addUfuncs(d); | |
if (PyErr_Occurred()) { | |
PyErr_SetString(PyExc_RuntimeError, | |
"cannot load _umath_linalg module."); | |
} | |
return RETVAL; | |
} | |