Spaces:

Guy24
/

inner_lexicon

Running on Zero

App Files Files Community

inner_lexicon / utils /procrustes /utils.py

Guy24

adding application

9e67a8e 7 days ago

raw

history blame contribute delete

18.7 kB

	# -- coding: utf-8 --
	# The Procrustes library provides a set of functions for transforming
	# a matrix to make it as similar as possible to a target matrix.
	#
	# Copyright (C) 2017-2022 The QC-Devs Community
	#
	# This file is part of Procrustes.
	#
	# Procrustes is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 3
	# of the License, or (at your option) any later version.
	#
	# Procrustes is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, see <http://www.gnu.org/licenses/>
	#
	# --
	"""Utility Module."""
	from typing import List, Optional, Tuple

	import numpy as np

	__all__ = [
	"compute_error",
	"setup_input_arrays",
	"ProcrustesResult",
	]


	def _zero_padding(
	array_a: np.ndarray, array_b: np.ndarray, pad_mode: str = "row-col"
	) -> Tuple[np.ndarray, np.ndarray]:
	r"""
	Return arrays padded with rows and/or columns of zero.

	Parameters
	----------
	array_a : ndarray
	The 2D-array :math:`\mathbf{A}_{n_a \times m_a}`.
	array_b : ndarray
	The 2D-array :math:`\mathbf{B}_{n_b \times m_b}`.
	pad_mode : str
	Specifying how to pad the arrays. Should be one of
	- "row"
	The array with fewer rows is padded with zero rows so that both have the same
	number of rows.
	- "col"
	The array with fewer columns is padded with zero columns so that both have the
	same number of columns.
	- "row-col"
	The array with fewer rows is padded with zero rows, and the array with fewer
	columns is padded with zero columns, so that both have the same dimensions.
	This does not necessarily result in square arrays.
	- "square"
	The arrays are padded with zero rows and zero columns so that they are both
	squared arrays. The dimension of square array is specified based on the highest
	dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.

	Returns
	-------
	padded_a : ndarray
	Padded array_a.
	padded_b : ndarray
	Padded array_b.

	"""
	# sanity checks
	if not isinstance(array_a, np.ndarray) or not isinstance(array_b, np.ndarray):
	raise ValueError("Arguments array_a & array_b should be numpy arrays.")
	if array_a.ndim != 2 or array_b.ndim != 2:
	raise ValueError("Arguments array_a & array_b should be 2D arrays.")

	if array_a.shape == array_b.shape and array_a.shape[0] == array_a.shape[1]:
	# special case of square arrays, mode is set to None so that array_a & array_b are returned.
	pad_mode = None

	if pad_mode == "square":
	# calculate desired dimension of square array
	(a_n1, a_m1), (a_n2, a_m2) = array_a.shape, array_b.shape
	dim = max(a_n1, a_n2, a_m1, a_m2)
	# padding rows to have both arrays have dim rows
	if a_n1 < dim:
	array_a = np.pad(array_a, [[0, dim - a_n1], [0, 0]], "constant", constant_values=0)
	if a_n2 < dim:
	array_b = np.pad(array_b, [[0, dim - a_n2], [0, 0]], "constant", constant_values=0)
	# padding columns to have both arrays have dim columns
	if a_m1 < dim:
	array_a = np.pad(array_a, [[0, 0], [0, dim - a_m1]], "constant", constant_values=0)
	if a_m2 < dim:
	array_b = np.pad(array_b, [[0, 0], [0, dim - a_m2]], "constant", constant_values=0)

	if pad_mode in ["row", "row-col"]:
	# padding rows to have both arrays have the same number of rows
	diff = array_a.shape[0] - array_b.shape[0]
	if diff < 0:
	array_a = np.pad(array_a, [[0, -diff], [0, 0]], "constant", constant_values=0)
	else:
	array_b = np.pad(array_b, [[0, diff], [0, 0]], "constant", constant_values=0)

	if pad_mode in ["col", "row-col"]:
	# padding columns to have both arrays have the same number of columns
	diff = array_a.shape[1] - array_b.shape[1]
	if diff < 0:
	array_a = np.pad(array_a, [[0, 0], [0, -diff]], "constant", constant_values=0)
	else:
	array_b = np.pad(array_b, [[0, 0], [0, diff]], "constant", constant_values=0)

	return array_a, array_b


	def _translate_array(
	array_a: np.ndarray, array_b: Optional[np.ndarray] = None, weight: Optional[np.ndarray] = None
	) -> Tuple[np.ndarray, float]:
	"""
	Return translated array_a and translation vector.

	Columns of both arrays will have mean zero.

	Parameters
	----------
	array_a : ndarray
	The 2D-array to translate.
	array_b : ndarray, optional
	The 2D-array to translate array_a based on.
	weight : ndarray, optional
	The weight vector.

	Returns
	-------
	array_a : ndarray
	If array_b is None, array_a is translated to origin using its centroid.
	If array_b is given, array_a is translated to centroid of array_b (the centroid of
	translated array_a will centroid with the centroid array_b).
	centroid : float
	If array_b is given, the centroid is returned.

	"""
	# The mean is strongly affected by outliers and is not a robust estimator for central location
	# see https://docs.python.org/3.6/library/statistics.html?highlight=mean#statistics.mean
	if weight is not None:
	if weight.ndim != 1:
	raise ValueError("The weight should be a 1d row vector.")
	if not (weight >= 0).all():
	raise ValueError("The elements of the weight should be non-negative.")

	centroid_a = np.average(array_a, axis=0, weights=weight)
	if array_b is not None:
	# translation vector to b centroid
	centroid_a -= np.average(array_b, axis=0, weights=weight)
	return array_a - centroid_a, -1 * centroid_a


	def _scale_array(array_a, array_b=None) -> Tuple[np.ndarray, float]:
	"""
	Return scaled/normalized array_a and scaling vector.

	Parameters
	----------
	array_a : ndarray
	The 2D-array to scale
	array_b : ndarray, default=None
	The 2D-array to scale array_a based on.

	Returns
	-------
	scaled_a, ndarray
	If array_b is None, array_a is normalized using the Frobenius norm.
	If array_b is given, array_a is scaled to match array_b"s norm (the norm of array_a
	will be equal norm of array_b).
	scale : float
	The scaling factor to match array_b norm.

	"""
	# scaling factor to match unit sphere
	scale = 1.0 / np.linalg.norm(array_a)
	if array_b is not None:
	# scaling factor to match array_b norm
	scale *= np.linalg.norm(array_b)
	return array_a * scale, scale


	def _hide_zero_padding(
	array_a: np.ndarray,
	remove_zero_col: bool = True,
	remove_zero_row: bool = True,
	tol: float = 1.0e-8,
	) -> np.ndarray:
	r"""
	Return array with zero-padded rows (bottom) and columns (right) removed.

	Parameters
	----------
	array_a : ndarray
	The initial array.
	remove_zero_col : bool, optional
	If True, zero columns (values less than 1e-8) on the right side will be removed.
	remove_zero_row : bool, optional
	If True, zero rows (values less than 1e-8) on the bottom will be removed.
	tol : float, optional
	Tolerance value.

	Returns
	-------
	new_A : ndarray
	Array, with either near zero columns and/or zero rows are removed.

	"""
	# Input checking
	if array_a.ndim > 2:
	raise TypeError("Matrix inputs must be 1- or 2- dimensional arrays")
	# Check zero rows from bottom to top
	if remove_zero_row:
	num_row = array_a.shape[0]
	tmp_a = array_a[..., np.newaxis] if array_a.ndim == 1 else array_a
	for array_v in tmp_a[::-1]:
	if any(abs(i) > tol for i in array_v):
	break
	num_row -= 1
	array_a = array_a[:num_row]
	# Cut off zero rows
	if remove_zero_col:
	if array_a.ndim == 2:
	# Check zero columns from right to left
	col_m = array_a.shape[1]
	for array_v in array_a.T[::-1]:
	if any(abs(i) > tol for i in array_v):
	break
	col_m -= 1
	# Cut off zero columns
	array_a = array_a[:, :col_m]
	return array_a


	def compute_error(
	a: np.ndarray, b: np.ndarray, t: np.ndarray, s: Optional[np.ndarray] = None
	) -> float:
	r"""Return the one- or two-sided Procrustes (squared Frobenius norm) error.

	The double-sided Procrustes error is defined as

	.. math::
	\\|\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\\|_{F}^2 =
	\text{Tr}\left[
	\left(\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\right)^\dagger
	\left(\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\right)\right]

	when :math:`\mathbf{S}` is the identity matrix :math:`\mathbf{I}`, this is called the one-sided
	Procrustes error.

	Parameters
	----------
	a : ndarray
	The 2D-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed.
	b : ndarray
	The 2D-array :math:`\mathbf{B}_{m \times n}` representing the reference matrix.
	t : ndarray
	The 2D-array :math:`\mathbf{T}_{n \times n}` representing the right-hand-side transformation
	matrix.
	s : ndarray, optional
	The 2D-array :math:`\mathbf{S}_{m \times m}` representing the left-hand-side transformation
	matrix. If set to `None`, the one-sided Procrustes error is computed.

	Returns
	-------
	error : float
	The squared Frobenius norm of difference between the transformed array, :math:`\mathbf{S}
	\mathbf{A}\mathbf{T}`, and the reference array, :math:`\mathbf{B}`.

	"""
	# transform matrix A to either AT or SAT
	a_trans = np.dot(a, t) if s is None else np.dot(np.dot(s, a), t)
	# subtract matrix B and compute Frobenius norm squared
	return np.linalg.norm(a_trans - b, ord=None) ** 2


	def setup_input_arrays(
	array_a: np.ndarray,
	array_b: np.ndarray,
	remove_zero_col: bool,
	remove_zero_row: bool,
	pad: bool,
	translate: bool,
	scale: bool,
	check_finite: bool,
	weight: Optional[np.ndarray] = None,
	) -> Tuple[np.ndarray, np.ndarray]:
	r"""
	Check and process array inputs for the Procrustes transformation routines.

	Usually, the precursor step before all Procrustes methods.

	Parameters
	----------
	array_a : npdarray
	The 2D array :math:`A` being transformed.
	array_b : npdarray
	The 2D reference array :math:`B`.
	remove_zero_col : bool
	If True, zero columns (values less than 1e-8) on the right side will be removed.
	remove_zero_row : bool
	If True, zero rows (values less than 1e-8) on the bottom will be removed.
	pad : bool
	Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
	:math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
	translate : bool
	If true, then translate both arrays :math:`A, B` to the origin, ie columns of the arrays
	will have mean zero.
	scale :
	If True, both arrays are normalized to one with respect to the Frobenius norm, ie
	:math:`Tr(A^T A) = 1`.
	check_finite : bool
	If true, then checks if both arrays :math:`A, B` are numpy arrays and two-dimensional.
	weight : A list of ndarray or ndarray
	A list of the weight arrays or one numpy array. When only on numpy array provided,
	it is assumed that the two arrays :math:`A` and :math:`B` share the same weight matrix.

	Returns
	-------
	(ndarray, ndarray) :
	Returns the padded arrays, in that they have the same matrix dimensions.

	"""
	array_a = _setup_input_array_lower(
	array_a, None, remove_zero_col, remove_zero_row, translate, scale, check_finite, weight
	)
	array_b = _setup_input_array_lower(
	array_b, None, remove_zero_col, remove_zero_row, translate, scale, check_finite, weight
	)
	if pad:
	array_a, array_b = _zero_padding(array_a, array_b, pad_mode="row-col")
	return array_a, array_b


	def setup_input_arrays_multi(
	array_list: List[np.ndarray],
	array_ref: np.ndarray,
	remove_zero_col: bool,
	remove_zero_row: bool,
	pad_mode: str,
	translate: bool,
	scale: bool,
	check_finite: bool,
	weight: Optional[np.ndarray] = None,
	) -> List[np.ndarray]:
	r"""
	Check and process array inputs for the Procrustes transformation routines.

	Parameters
	----------
	array_list : List
	A list of 2D arrays that being transformed.
	array_ref : ndarray
	The 2D reference array :math:`B`.
	remove_zero_col : bool
	If True, zero columns (values less than 1e-8) on the right side will be removed.
	remove_zero_row : bool
	If True, zero rows (values less than 1e-8) on the bottom will be removed.
	pad_mode : str
	Specifying how to pad the arrays. Should be one of
	- "row"
	The array with fewer rows is padded with zero rows so that both have the same
	number of rows.
	- "col"
	The array with fewer columns is padded with zero columns so that both have the
	same number of columns.
	- "row-col"
	The array with fewer rows is padded with zero rows, and the array with fewer
	columns is padded with zero columns, so that both have the same dimensions.
	This does not necessarily result in square arrays.
	- "square"
	The arrays are padded with zero rows and zero columns so that they are both
	squared arrays. The dimension of square array is specified based on the highest
	dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.
	translate : bool
	If true, then translate both arrays :math:`A, B` to the origin, ie columns of the arrays
	will have mean zero.
	scale :
	If True, both arrays are normalized to one with respect to the Frobenius norm, ie
	:math:`Tr(A^T A) = 1`.
	check_finite : bool
	If true, then checks if both arrays :math:`A, B` are numpy arrays and two-dimensional.
	weight : A list of ndarray or ndarray, optional
	A list of the weight arrays or one numpy array. When only on numpy array provided,
	it is assumed that the two arrays :math:`A` and :math:`B` share the same weight matrix.

	Returns
	-------
	List of arrays :
	Returns the padded arrays, in that they have the same matrix dimensions.
	"""
	array_list_new = [
	_setup_input_array_lower(
	array_a=arr,
	array_ref=array_ref,
	remove_zero_col=remove_zero_col,
	remove_zero_row=remove_zero_row,
	translate=translate,
	scale=scale,
	check_finite=check_finite,
	weight=weight,
	)
	for arr in array_list
	]
	arr_shape = np.array([arr.shape for arr in array_list_new])
	array_b = np.ones(np.max(arr_shape, axis=0), dtype=int)
	array_list_new = [_zero_padding(arr, array_b, pad_mode=pad_mode) for arr in array_list_new]
	return array_list_new


	def _setup_input_array_lower(
	array_a: np.ndarray,
	array_ref: np.ndarray,
	remove_zero_col: np.ndarray,
	remove_zero_row: np.ndarray,
	translate: bool,
	scale: bool,
	check_finite: bool,
	weight: Optional[np.ndarray] = None,
	) -> np.ndarray:
	"""Pre-processing the matrices with translation, scaling."""
	_check_arraytypes(array_a)
	if check_finite:
	array_a = np.asarray_chkfinite(array_a)
	# Sometimes arrays already have zero padding that messes up zero padding below.
	array_a = _hide_zero_padding(array_a, remove_zero_col, remove_zero_row)
	if translate:
	array_a, _ = _translate_array(array_a, array_ref, weight)
	# scale the matrix when translate is False, but weight is True
	else:
	if weight is not None:
	array_a = np.dot(np.diag(weight), array_a)

	if scale:
	array_a, _ = _scale_array(array_a, array_ref)
	return array_a


	def _check_arraytypes(*args) -> None:
	r"""Check array input types to Procrustes transformation routines."""
	if any(not isinstance(arr_x, np.ndarray) for arr_x in args):
	raise TypeError("Matrix inputs must be NumPy arrays")
	if any(x.ndim != 2 for x in args):
	raise TypeError("Matrix inputs must be 2-dimensional arrays")


	class ProcrustesResult(dict):
	r"""Represents the Procrustes analysis result.

	Attributes
	----------
	error : float
	The Procrustes (squared Frobenius norm) error.
	new_a : ndarray
	The translated/scaled numpy ndarray :math:`\mathbf{A}`.
	new_b : ndarray
	The translated/scaled numpy ndarray :math:`\mathbf{B}`.
	t : ndarray
	The 2D-array :math:`\mathbf{T}` representing the right-hand-side transformation matrix.
	s : ndarray
	The 2D-array :math:`\mathbf{S}` representing the left-hand-side transformation
	matrix. If set to `None`, the one-sided Procrustes was performed.

	"""

	# modification on https://github.com/scipy/scipy/blob/v1.4.1/scipy/optimize/optimize.py#L77-L132
	def __getattr__(self, name: str):
	"""Deal with attributes which it doesn't explicitly manage."""
	try:
	return self[name]
	# Not using raise from makes the traceback inaccurate, because the message implies there
	# is a bug in the exception-handling code itself, which is a separate situation than
	# wrapping an exception
	# W0707 from http://pylint.pycqa.org/en/latest/technical_reference/features.html
	except KeyError as ke_info:
	raise AttributeError(name) from ke_info

	__setattr__ = dict.__setitem__
	__delattr__ = dict.__delitem__

	def __repr__(self):
	"""Return a human friendly representation."""
	if self.keys():
	max_len = max(map(len, list(self.keys()))) + 1
	return "\n".join([k.rjust(max_len) + ": " + repr(v) for k, v in sorted(self.items())])
	else:
	return self.__class__.__name__ + "()"

	def __dir__(self):
	"""Provide basic customization of module attribute access with a list."""
	return list(self.keys())