venite's picture
initial
f670afc
# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
# flake8: noqa
from scipy.sparse import lil_matrix, diags, eye
import math
import numpy as np
import torch
EPSILON = 1e-6
NORMALIZATION = 1e6
def _get_msid(x, y, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100,
rademacher=False, graph_builder='full',
msid_mode='max', normalized_laplacian=True, normalize='empty'):
"""
Compute the msid score between two samples, x and y
Arguments:
x: x samples
y: y samples
ts: temperature values
k: number of neighbours for graph construction
m: Lanczos steps in SLQ
niters: number of starting random vectors for SLQ
rademacher: if True, sample random vectors from Rademacher
distributions, else sample standard normal distribution
graph_builder: if 'kgraph', uses faster graph construction (options:
'sparse', 'kgraph')
msid_mode: 'l2' to compute the l2 norm of the distance between `msid1`
and `msid2`; 'max' to find the maximum abosulute difference between
two descriptors over temperature
normalized_laplacian: if True, use normalized Laplacian
normalize: 'empty' for average heat kernel (corresponds to the empty
graph normalization of NetLSD), 'complete' for the complete, 'er'
for erdos-renyi normalization, 'none' for no normalization
Returns:
msid_score: the scalar value of the distance between discriptors
"""
normed_msidx = msid_descriptor(x, ts, k, m, niters, rademacher,
graph_builder, normalized_laplacian,
normalize)
normed_msidy = msid_descriptor(y, ts, k, m, niters, rademacher,
graph_builder, normalized_laplacian,
normalize)
c = np.exp(-2 * (ts + 1 / ts))
if msid_mode == 'l2':
score = np.linalg.norm(normed_msidx - normed_msidy)
elif msid_mode == 'max':
score = np.amax(c * np.abs(normed_msidx - normed_msidy))
else:
raise Exception('Use either l2 or max mode.')
return {'IMD': score}
def msid_descriptor(x, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100,
rademacher=False, graph_builder='full',
normalized_laplacian=True, normalize='empty'):
"""
Compute the msid descriptor for a single sample x
Arguments:
x: x samples
ts: temperature values
k: number of neighbours for graph construction
m: Lanczos steps in SLQ
niters: number of starting random vectors for SLQ
rademacher: if True, sample random vectors from Rademacher
distributions, else sample standard normal distribution
graph_builder: if 'kgraph', uses faster graph construction
(options: '
rse', 'kgraph')
normalized_laplacian: if True, use normalized Laplacian
normalize: 'empty' for average heat kernel (corresponds to the empty
graph normalization of NetLSD), 'complete' for the complete, 'er'
for erdos-renyi normalization, 'none' for no normalization
Returns:
normed_msidx: normalized msid descriptor
"""
Lx = _build_graph(x, k, graph_builder, normalized_laplacian)
nx = Lx.shape[0]
msidx = slq_red_var(Lx, m, niters, ts, rademacher)
normed_msidx = _normalize_msid(msidx, normalize, nx, k, ts) * NORMALIZATION
return normed_msidx
def _build_graph(data, k=5, graph_builder='full', normalized=True):
"""
Return Laplacian from data or load preconstructed from path
Arguments:
data: samples
k: number of neighbours for graph construction
graph_builder: if 'kgraph', use faster graph construction
normalized: if True, use nnormalized Laplacian
Returns:
L: Laplacian of the graph constructed with data
"""
if graph_builder == 'sparse':
A = construct_graph_sparse(data.cpu().numpy(), k)
elif graph_builder == 'kgraph':
A = construct_graph_kgraph(data.cpu().numpy(), k)
elif graph_builder == 'full':
A = lil_matrix(construct_graph(data, k).cpu().numpy()).tocsr()
else:
raise Exception('Please specify graph builder: sparse or kgraph.')
A = (A + A.T) / 2
A.data = np.ones(A.data.shape)
L = _laplacian_sparse(A, normalized)
return L
def _normalize_msid(msid, normalization, n, k, ts):
normed_msid = msid.copy()
if normalization == 'empty':
normed_msid /= n
elif normalization == 'complete':
normed_msid /= (1 + (n - 1) * np.exp(-(1 + 1 / (n - 1)) * ts))
elif normalization == 'er':
xs = np.linspace(0, 1, n)
er_spectrum = 4 / np.sqrt(k) * xs + 1 - 2 / np.sqrt(k)
er_msid = np.exp(-np.outer(ts, er_spectrum)).sum(-1)
normed_msid = normed_msid / (er_msid + EPSILON)
elif normalization == 'none' or normalization is None:
pass
else:
raise ValueError('Unknown normalization parameter!')
return normed_msid
def _lanczos_m(A, m, nv, rademacher, SV=None):
"""
Lanczos algorithm computes symmetric m x m tridiagonal matrix T and
matrix V with orthogonal rows constituting the basis of the Krylov
subspace K_m(A, x), where x is an arbitrary starting unit vector. This
implementation parallelizes `nv` starting vectors.
Arguments:
m: number of Lanczos steps
nv: number of random vectors
rademacher: True to use Rademacher distribution,
False - standard normal for random vectors
SV: specified starting vectors
Returns: T: a nv x m x m tensor, T[i, :, :] is the ith symmetric
tridiagonal matrix V: a n x m x nv tensor, V[:, :, i] is the ith matrix
with orthogonal rows
"""
orthtol = 1e-5
if type(SV) != np.ndarray:
if rademacher:
SV = np.sign(np.random.randn(A.shape[0], nv))
else:
SV = np.random.randn(A.shape[0],
nv) # init random vectors in columns: n x nv
V = np.zeros((SV.shape[0], m, nv))
T = np.zeros((nv, m, m))
np.divide(SV, np.linalg.norm(SV, axis=0), out=SV) # normalize each column
V[:, 0, :] = SV
w = A.dot(SV)
alpha = np.einsum('ij,ij->j', w, SV)
w -= alpha[None, :] * SV
beta = np.einsum('ij,ij->j', w, w)
np.sqrt(beta, beta)
T[:, 0, 0] = alpha
T[:, 0, 1] = beta
T[:, 1, 0] = beta
np.divide(w, beta[None, :], out=w)
V[:, 1, :] = w
t = np.zeros((m, nv))
for i in range(1, m):
SVold = V[:, i - 1, :]
SV = V[:, i, :]
w = A.dot(SV) # sparse @ dense
w -= beta[None, :] * SVold # n x nv
np.einsum('ij,ij->j', w, SV, out=alpha)
T[:, i, i] = alpha
if i < m - 1:
w -= alpha[None, :] * SV # n x nv
# reortho
np.einsum('ijk,ik->jk', V, w, out=t)
w -= np.einsum('ijk,jk->ik', V, t)
np.einsum('ij,ij->j', w, w, out=beta)
np.sqrt(beta, beta)
np.divide(w, beta[None, :], out=w)
T[:, i, i + 1] = beta
T[:, i + 1, i] = beta
# more reotho
innerprod = np.einsum('ijk,ik->jk', V, w)
reortho = False
for _ in range(100):
if not (innerprod > orthtol).sum():
reortho = True
break
np.einsum('ijk,ik->jk', V, w, out=t)
w -= np.einsum('ijk,jk->ik', V, t)
np.divide(w, np.linalg.norm(w, axis=0)[None, :], out=w)
innerprod = np.einsum('ijk,ik->jk', V, w)
V[:, i + 1, :] = w
if (np.abs(beta) > 1e-6).sum() == 0 or not reortho:
break
return T, V
def _slq(A, m, niters, rademacher):
"""
Compute the trace of matrix exponential
Arguments:
A: square matrix in trace(exp(A))
m: number of Lanczos steps
niters: number of quadratures (also, the number of random vectors in the
hutchinson trace estimator)
rademacher: True to use Rademacher distribution, False - standard normal
for random vectors in Hutchinson
Returns: trace: estimate of trace of matrix exponential
"""
T, _ = _lanczos_m(A, m, niters, rademacher)
eigvals, eigvecs = np.linalg.eigh(T)
expeig = np.exp(eigvals)
sqeigv1 = np.power(eigvecs[:, 0, :], 2)
trace = A.shape[-1] * (expeig * sqeigv1).sum() / niters
return trace
def _slq_ts(A, m, niters, ts, rademacher):
"""
Compute the trace of matrix exponential
Arguments:
A: square matrix in trace(exp(-t*A)), where t is temperature
m: number of Lanczos steps
niters: number of quadratures (also, the number of random vectors in the
hutchinson trace estimator)
ts: an array with temperatures
rademacher: True to use Rademacher distribution, False - standard normal
for random vectors in Hutchinson
Returns:
trace: estimate of trace of matrix exponential across temperatures `ts`
"""
T, _ = _lanczos_m(A, m, niters, rademacher)
eigvals, eigvecs = np.linalg.eigh(T)
expeig = np.exp(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m)
sqeigv1 = np.power(eigvecs[:, 0, :], 2)
traces = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1)
return traces
def _slq_ts_fs(A, m, niters, ts, rademacher, fs):
"""
Compute the trace of matrix functions
Arguments:
A: square matrix in trace(exp(-t*A)), where t is temperature
m: number of Lanczos steps
niters: number of quadratures (also, the number of random vectors in the
hutchinson trace estimator)
ts: an array with temperatures
rademacher: True to use Rademacher distribution, else - standard normal
for random vectors in Hutchinson
fs: a list of functions
Returns:
traces: estimate of traces for each of the functions in fs
"""
T, _ = _lanczos_m(A, m, niters, rademacher)
eigvals, eigvecs = np.linalg.eigh(T)
traces = np.zeros((len(fs), len(ts)))
for i, f in enumerate(fs):
expeig = f(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m)
sqeigv1 = np.power(eigvecs[:, 0, :], 2)
traces[i, :] = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1)
return traces
def slq_red_var(A, m, niters, ts, rademacher):
"""
Compute the trace of matrix exponential with reduced variance
Arguments:
A: square matrix in trace(exp(-t*A)), where t is temperature
m: number of Lanczos steps
niters: number of quadratures (also, the number of random vectors in the
hutchinson trace estimator)
ts: an array with temperatures
Returns:
traces: estimate of trace for each temperature value in `ts`
"""
fs = [np.exp, lambda x: x]
traces = _slq_ts_fs(A, m, niters, ts, rademacher, fs)
subee = traces[0, :] - traces[1, :] / np.exp(ts)
sub = - ts * A.shape[0] / np.exp(ts)
return subee + sub
def np_euc_cdist(data):
dd = np.sum(data * data, axis=1)
dist = -2 * np.dot(data, data.T)
dist += dd + dd[:, np.newaxis]
np.fill_diagonal(dist, 0)
np.sqrt(dist, dist)
return dist
def construct_graph_sparse(data, k):
n = len(data)
spmat = lil_matrix((n, n))
dd = np.sum(data * data, axis=1)
for i in range(n):
dists = dd - 2 * data[i, :].dot(data.T)
inds = np.argpartition(dists, k + 1)[:k + 1]
inds = inds[inds != i]
spmat[i, inds] = 1
return spmat.tocsr()
def construct_graph_kgraph(data, k):
raise NotImplementedError
#
# n = len(data)
# spmat = lil_matrix((n, n))
# index = pykgraph.KGraph(data, 'euclidean')
# index.build(reverse=0, K=2 * k + 1, L=2 * k + 50)
# result = index.search(data, K=k + 1)[:, 1:]
# spmat[np.repeat(np.arange(n), k, 0), result.ravel()] = 1
# return spmat.tocsr()
def construct_graph(input_features, k, num_splits=10):
num_samples = input_features.shape[0]
indices = []
for i in range(num_splits):
batch_size = math.ceil(num_samples / num_splits)
start_idx = i * batch_size
end_idx = min((i + 1) * batch_size, num_samples)
dist = torch.cdist(input_features[start_idx:end_idx],
input_features)
indices.append(torch.topk(dist, k + 1, dim=-1, largest=False)[1].cpu())
indices = torch.cat(indices, dim=0)
graph = torch.zeros(num_samples, num_samples, device=indices.device)
graph.scatter_(1, indices, 1)
graph -= torch.eye(num_samples, device=graph.device)
return graph
def _laplacian_sparse(A, normalized=True):
D = A.sum(1).A1
if normalized:
Dsqrt = diags(1 / np.sqrt(D))
L = eye(A.shape[0]) - Dsqrt.dot(A).dot(Dsqrt)
else:
L = diags(D) - A
return L