File size: 18,673 Bytes
0108542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# -*- coding: utf-8 -*-
# The Procrustes library provides a set of functions for transforming
# a matrix to make it as similar as possible to a target matrix.
#
# Copyright (C) 2017-2022 The QC-Devs Community
#
# This file is part of Procrustes.
#
# Procrustes is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# Procrustes is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
# --
"""Utility Module."""
from typing import List, Optional, Tuple

import numpy as np

__all__ = [
    "compute_error",
    "setup_input_arrays",
    "ProcrustesResult",
]


def _zero_padding(
    array_a: np.ndarray, array_b: np.ndarray, pad_mode: str = "row-col"
) -> Tuple[np.ndarray, np.ndarray]:
    r"""
    Return arrays padded with rows and/or columns of zero.

    Parameters
    ----------
    array_a : ndarray
        The 2D-array :math:`\mathbf{A}_{n_a \times m_a}`.
    array_b : ndarray
        The 2D-array :math:`\mathbf{B}_{n_b \times m_b}`.
    pad_mode : str
        Specifying how to pad the arrays. Should be one of
        - "row"
            The array with fewer rows is padded with zero rows so that both have the same
            number of rows.
        - "col"
            The array with fewer columns is padded with zero columns so that both have the
            same number of columns.
        - "row-col"
            The array with fewer rows is padded with zero rows, and the array with fewer
            columns is padded with zero columns, so that both have the same dimensions.
            This does not necessarily result in square arrays.
        - "square"
            The arrays are padded with zero rows and zero columns so that they are both
            squared arrays. The dimension of square array is specified based on the highest
            dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.

    Returns
    -------
    padded_a : ndarray
        Padded array_a.
    padded_b : ndarray
        Padded array_b.

    """
    # sanity checks
    if not isinstance(array_a, np.ndarray) or not isinstance(array_b, np.ndarray):
        raise ValueError("Arguments array_a & array_b should be numpy arrays.")
    if array_a.ndim != 2 or array_b.ndim != 2:
        raise ValueError("Arguments array_a & array_b should be 2D arrays.")

    if array_a.shape == array_b.shape and array_a.shape[0] == array_a.shape[1]:
        # special case of square arrays, mode is set to None so that array_a & array_b are returned.
        pad_mode = None

    if pad_mode == "square":
        # calculate desired dimension of square array
        (a_n1, a_m1), (a_n2, a_m2) = array_a.shape, array_b.shape
        dim = max(a_n1, a_n2, a_m1, a_m2)
        # padding rows to have both arrays have dim rows
        if a_n1 < dim:
            array_a = np.pad(array_a, [[0, dim - a_n1], [0, 0]], "constant", constant_values=0)
        if a_n2 < dim:
            array_b = np.pad(array_b, [[0, dim - a_n2], [0, 0]], "constant", constant_values=0)
        # padding columns to have both arrays have dim columns
        if a_m1 < dim:
            array_a = np.pad(array_a, [[0, 0], [0, dim - a_m1]], "constant", constant_values=0)
        if a_m2 < dim:
            array_b = np.pad(array_b, [[0, 0], [0, dim - a_m2]], "constant", constant_values=0)

    if pad_mode in ["row", "row-col"]:
        # padding rows to have both arrays have the same number of rows
        diff = array_a.shape[0] - array_b.shape[0]
        if diff < 0:
            array_a = np.pad(array_a, [[0, -diff], [0, 0]], "constant", constant_values=0)
        else:
            array_b = np.pad(array_b, [[0, diff], [0, 0]], "constant", constant_values=0)

    if pad_mode in ["col", "row-col"]:
        # padding columns to have both arrays have the same number of columns
        diff = array_a.shape[1] - array_b.shape[1]
        if diff < 0:
            array_a = np.pad(array_a, [[0, 0], [0, -diff]], "constant", constant_values=0)
        else:
            array_b = np.pad(array_b, [[0, 0], [0, diff]], "constant", constant_values=0)

    return array_a, array_b


def _translate_array(
    array_a: np.ndarray, array_b: Optional[np.ndarray] = None, weight: Optional[np.ndarray] = None
) -> Tuple[np.ndarray, float]:
    """
    Return translated array_a and translation vector.

    Columns of both arrays will have mean zero.

    Parameters
    ----------
    array_a : ndarray
        The 2D-array to translate.
    array_b : ndarray, optional
        The 2D-array to translate array_a based on.
    weight : ndarray, optional
        The weight vector.

    Returns
    -------
    array_a : ndarray
        If array_b is None, array_a is translated to origin using its centroid.
        If array_b is given, array_a is translated to centroid of array_b (the centroid of
        translated array_a will centroid with the centroid array_b).
    centroid : float
        If array_b is given, the centroid is returned.

    """
    # The mean is strongly affected by outliers and is not a robust estimator for central location
    # see https://docs.python.org/3.6/library/statistics.html?highlight=mean#statistics.mean
    if weight is not None:
        if weight.ndim != 1:
            raise ValueError("The weight should be a 1d row vector.")
        if not (weight >= 0).all():
            raise ValueError("The elements of the weight should be non-negative.")

    centroid_a = np.average(array_a, axis=0, weights=weight)
    if array_b is not None:
        # translation vector to b centroid
        centroid_a -= np.average(array_b, axis=0, weights=weight)
    return array_a - centroid_a, -1 * centroid_a


def _scale_array(array_a, array_b=None) -> Tuple[np.ndarray, float]:
    """
    Return scaled/normalized array_a and scaling vector.

    Parameters
    ----------
    array_a : ndarray
        The 2D-array to scale
    array_b : ndarray, default=None
        The 2D-array to scale array_a based on.

    Returns
    -------
    scaled_a, ndarray
        If array_b is None, array_a is normalized using the Frobenius norm.
        If array_b is given, array_a is scaled to match array_b"s norm (the norm of array_a
        will be equal norm of array_b).
    scale : float
        The scaling factor to match array_b norm.

    """
    # scaling factor to match unit sphere
    scale = 1.0 / np.linalg.norm(array_a)
    if array_b is not None:
        # scaling factor to match array_b norm
        scale *= np.linalg.norm(array_b)
    return array_a * scale, scale


def _hide_zero_padding(
    array_a: np.ndarray,
    remove_zero_col: bool = True,
    remove_zero_row: bool = True,
    tol: float = 1.0e-8,
) -> np.ndarray:
    r"""
    Return array with zero-padded rows (bottom) and columns (right) removed.

    Parameters
    ----------
    array_a : ndarray
        The initial array.
    remove_zero_col : bool, optional
        If True, zero columns (values less than 1e-8) on the right side will be removed.
    remove_zero_row : bool, optional
        If True, zero rows (values less than 1e-8) on the bottom will be removed.
    tol : float, optional
        Tolerance value.

    Returns
    -------
    new_A : ndarray
        Array, with either near zero columns and/or zero rows are removed.

    """
    # Input checking
    if array_a.ndim > 2:
        raise TypeError("Matrix inputs must be 1- or 2- dimensional arrays")
    # Check zero rows from bottom to top
    if remove_zero_row:
        num_row = array_a.shape[0]
        tmp_a = array_a[..., np.newaxis] if array_a.ndim == 1 else array_a
        for array_v in tmp_a[::-1]:
            if any(abs(i) > tol for i in array_v):
                break
            num_row -= 1
        array_a = array_a[:num_row]
    # Cut off zero rows
    if remove_zero_col:
        if array_a.ndim == 2:
            # Check zero columns from right to left
            col_m = array_a.shape[1]
            for array_v in array_a.T[::-1]:
                if any(abs(i) > tol for i in array_v):
                    break
                col_m -= 1
            # Cut off zero columns
            array_a = array_a[:, :col_m]
    return array_a


def compute_error(
    a: np.ndarray, b: np.ndarray, t: np.ndarray, s: Optional[np.ndarray] = None
) -> float:
    r"""Return the one- or two-sided Procrustes (squared Frobenius norm) error.

    The double-sided Procrustes error is defined as

    .. math::
       \|\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\|_{F}^2 =
       \text{Tr}\left[
            \left(\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\right)^\dagger
            \left(\mathbf{S}\mathbf{A}\mathbf{T} - \mathbf{B}\right)\right]

    when :math:`\mathbf{S}` is the identity matrix :math:`\mathbf{I}`, this is called the one-sided
    Procrustes error.

    Parameters
    ----------
    a : ndarray
        The 2D-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed.
    b : ndarray
        The 2D-array :math:`\mathbf{B}_{m \times n}` representing the reference matrix.
    t : ndarray
        The 2D-array :math:`\mathbf{T}_{n \times n}` representing the right-hand-side transformation
        matrix.
    s : ndarray, optional
        The 2D-array :math:`\mathbf{S}_{m \times m}` representing the left-hand-side transformation
        matrix. If set to `None`, the one-sided Procrustes error is computed.

    Returns
    -------
    error : float
        The squared Frobenius norm of difference between the transformed array, :math:`\mathbf{S}
        \mathbf{A}\mathbf{T}`, and the reference array, :math:`\mathbf{B}`.

    """
    # transform matrix A to either AT or SAT
    a_trans = np.dot(a, t) if s is None else np.dot(np.dot(s, a), t)
    # subtract matrix B and compute Frobenius norm squared
    return np.linalg.norm(a_trans - b, ord=None) ** 2


def setup_input_arrays(
    array_a: np.ndarray,
    array_b: np.ndarray,
    remove_zero_col: bool,
    remove_zero_row: bool,
    pad: bool,
    translate: bool,
    scale: bool,
    check_finite: bool,
    weight: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    r"""
    Check and process array inputs for the Procrustes transformation routines.

    Usually, the precursor step before all Procrustes methods.

    Parameters
    ----------
    array_a : npdarray
        The 2D array :math:`A` being transformed.
    array_b : npdarray
        The 2D reference array :math:`B`.
    remove_zero_col : bool
        If True, zero columns (values less than 1e-8) on the right side will be removed.
    remove_zero_row : bool
        If True, zero rows (values less than 1e-8) on the bottom will be removed.
    pad : bool
        Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
        :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
    translate : bool
        If true, then translate both arrays :math:`A, B` to the origin, ie columns of the arrays
        will have mean zero.
    scale :
        If True, both arrays are normalized to one with respect to the Frobenius norm, ie
        :math:`Tr(A^T A) = 1`.
    check_finite : bool
        If true, then checks if both arrays :math:`A, B` are numpy arrays and two-dimensional.
    weight : A list of ndarray or ndarray
        A list of the weight arrays or one numpy array. When only on numpy array provided,
        it is assumed that the two arrays :math:`A` and :math:`B` share the same weight matrix.

    Returns
    -------
    (ndarray, ndarray) :
        Returns the padded arrays, in that they have the same matrix dimensions.

    """
    array_a = _setup_input_array_lower(
        array_a, None, remove_zero_col, remove_zero_row, translate, scale, check_finite, weight
    )
    array_b = _setup_input_array_lower(
        array_b, None, remove_zero_col, remove_zero_row, translate, scale, check_finite, weight
    )
    if pad:
        array_a, array_b = _zero_padding(array_a, array_b, pad_mode="row-col")
    return array_a, array_b


def setup_input_arrays_multi(
    array_list: List[np.ndarray],
    array_ref: np.ndarray,
    remove_zero_col: bool,
    remove_zero_row: bool,
    pad_mode: str,
    translate: bool,
    scale: bool,
    check_finite: bool,
    weight: Optional[np.ndarray] = None,
) -> List[np.ndarray]:
    r"""
    Check and process array inputs for the Procrustes transformation routines.

    Parameters
    ----------
    array_list : List
        A list of 2D arrays that being transformed.
    array_ref : ndarray
        The 2D reference array :math:`B`.
    remove_zero_col : bool
        If True, zero columns (values less than 1e-8) on the right side will be removed.
    remove_zero_row : bool
        If True, zero rows (values less than 1e-8) on the bottom will be removed.
    pad_mode : str
        Specifying how to pad the arrays. Should be one of
            - "row"
                The array with fewer rows is padded with zero rows so that both have the same
                number of rows.
            - "col"
                The array with fewer columns is padded with zero columns so that both have the
                same number of columns.
            - "row-col"
                The array with fewer rows is padded with zero rows, and the array with fewer
                columns is padded with zero columns, so that both have the same dimensions.
                This does not necessarily result in square arrays.
            - "square"
                The arrays are padded with zero rows and zero columns so that they are both
                squared arrays. The dimension of square array is specified based on the highest
                dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.
    translate : bool
        If true, then translate both arrays :math:`A, B` to the origin, ie columns of the arrays
        will have mean zero.
    scale :
        If True, both arrays are normalized to one with respect to the Frobenius norm, ie
        :math:`Tr(A^T A) = 1`.
    check_finite : bool
        If true, then checks if both arrays :math:`A, B` are numpy arrays and two-dimensional.
    weight : A list of ndarray or ndarray, optional
        A list of the weight arrays or one numpy array. When only on numpy array provided,
        it is assumed that the two arrays :math:`A` and :math:`B` share the same weight matrix.

    Returns
    -------
    List of arrays :
        Returns the padded arrays, in that they have the same matrix dimensions.
    """
    array_list_new = [
        _setup_input_array_lower(
            array_a=arr,
            array_ref=array_ref,
            remove_zero_col=remove_zero_col,
            remove_zero_row=remove_zero_row,
            translate=translate,
            scale=scale,
            check_finite=check_finite,
            weight=weight,
        )
        for arr in array_list
    ]
    arr_shape = np.array([arr.shape for arr in array_list_new])
    array_b = np.ones(np.max(arr_shape, axis=0), dtype=int)
    array_list_new = [_zero_padding(arr, array_b, pad_mode=pad_mode) for arr in array_list_new]
    return array_list_new


def _setup_input_array_lower(
    array_a: np.ndarray,
    array_ref: np.ndarray,
    remove_zero_col: np.ndarray,
    remove_zero_row: np.ndarray,
    translate: bool,
    scale: bool,
    check_finite: bool,
    weight: Optional[np.ndarray] = None,
) -> np.ndarray:
    """Pre-processing the matrices with translation, scaling."""
    _check_arraytypes(array_a)
    if check_finite:
        array_a = np.asarray_chkfinite(array_a)
        # Sometimes arrays already have zero padding that messes up zero padding below.
    array_a = _hide_zero_padding(array_a, remove_zero_col, remove_zero_row)
    if translate:
        array_a, _ = _translate_array(array_a, array_ref, weight)
    # scale the matrix when translate is False, but weight is True
    else:
        if weight is not None:
            array_a = np.dot(np.diag(weight), array_a)

    if scale:
        array_a, _ = _scale_array(array_a, array_ref)
    return array_a


def _check_arraytypes(*args) -> None:
    r"""Check array input types to Procrustes transformation routines."""
    if any(not isinstance(arr_x, np.ndarray) for arr_x in args):
        raise TypeError("Matrix inputs must be NumPy arrays")
    if any(x.ndim != 2 for x in args):
        raise TypeError("Matrix inputs must be 2-dimensional arrays")


class ProcrustesResult(dict):
    r"""Represents the Procrustes analysis result.

    Attributes
    ----------
    error : float
        The Procrustes (squared Frobenius norm) error.
    new_a : ndarray
        The translated/scaled numpy ndarray :math:`\mathbf{A}`.
    new_b : ndarray
        The translated/scaled numpy ndarray :math:`\mathbf{B}`.
    t : ndarray
        The 2D-array :math:`\mathbf{T}` representing the right-hand-side transformation matrix.
    s : ndarray
        The 2D-array :math:`\mathbf{S}` representing the left-hand-side transformation
        matrix. If set to `None`, the one-sided Procrustes was performed.

    """

    # modification on https://github.com/scipy/scipy/blob/v1.4.1/scipy/optimize/optimize.py#L77-L132
    def __getattr__(self, name: str):
        """Deal with attributes which it doesn't explicitly manage."""
        try:
            return self[name]
        # Not using raise from makes the traceback inaccurate, because the message implies there
        # is a bug in the exception-handling code itself, which is a separate situation than
        # wrapping an exception
        # W0707 from http://pylint.pycqa.org/en/latest/technical_reference/features.html
        except KeyError as ke_info:
            raise AttributeError(name) from ke_info

    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __repr__(self):
        """Return a human friendly representation."""
        if self.keys():
            max_len = max(map(len, list(self.keys()))) + 1
            return "\n".join([k.rjust(max_len) + ": " + repr(v) for k, v in sorted(self.items())])
        else:
            return self.__class__.__name__ + "()"

    def __dir__(self):
        """Provide basic customization of module attribute access with a list."""
        return list(self.keys())