project1 / knowledge_integration /N_correlation_matrix.py
teamalphabmsit's picture
Upload folder using huggingface_hub
44504f7 verified
################# This Python code is designed by Yancheng Yuan at National University of Singapore to solve ##
################# min 0.5*<X-Sigma, X-Sigma>
################# s.t. X_ii =b_i, i=1,2,...,n
################# X>=tau*I (symmetric and positive semi-definite) ########
#################
################# based on the algorithm in #################
################# ``A Quadratically Convergent Newton Method fo r#################
################# Computing the Nearest Correlation Matrix #################
################# By Houduo Qi and Defeng Sun #################
################# SIAM J. Matrix Anal. Appl. 28 (2006) 360--385.
#################
################# Last modified date: March 19, 2016 #################
################# #################
################# The input arguments Sigma, b>0, tau>=0, and tol (tolerance error) #################
################# #################
################# #################
################# For the correlation matrix problem, set b = np.ones((n,1)) #################
################# #################
################# For a positive definite matrix #################
################# set tau = 1.0e-5 for example #################
################# set tol = 1.0e-6 or lower if no very high accuracy required #################
################# If the algorithm terminates with the given tol, then it means ##################
################# the relative gap between the objective function value of the obtained solution ###
################# and the objective function value of the global solution is no more than tol or ###
################# the violation of the feasibility is smaller than tol*(1+np.linalg.norm(b)) ############
#################
################# The outputs include the optimal primal solution, the dual solution and others ########
################# Diagonal Preconditioner is used #################
#################
################# Send your comments to [email protected] or [email protected] #################
#################
################# Warning: Though the code works extremely well, it is your call to use it or not. #################
import numpy as np
import scipy.io as sio
import scipy
import sys
import time
# Generate F(y) Compute the gradient
def my_gradient(y_input, lamb, p_input, b_0, n):
f = 0.0
Fy = np.zeros((n, 1))
p_input_copy = (p_input.copy()).transpose()
for i in range(0, n):
p_input_copy[i, :] = ((np.maximum(lamb[i], 0).astype(float)) ** 0.5) * p_input_copy[i, :]
for i in range(0, n):
Fy[i] = np.sum(p_input_copy[:, i] * p_input_copy[:, i])
for i in range(0, n):
f = f + np.square((np.maximum(lamb[i], 0)))
f = 0.5 * f - np.dot(b_0.transpose(), y_input)
return f, Fy
# use PCA to generate a primal feasible solution checked
def my_pca(x_input, lamb, p_input, b_0, n):
x_pca = x_input
lamb = np.asarray(lamb)
lp = lamb > 0
r = lamb[lp].size
if r == 0:
x_pca = np.zeros((n, n))
elif r == n:
x_pca = x_input
elif r < (n / 2.0):
lamb1 = lamb[lp].copy()
lamb1 = lamb1.transpose()
lamb1 = np.sqrt(lamb1.astype(float))
P1 = p_input[:, 0:r].copy()
if r > 1:
P1 = np.dot(P1, np.diagflat(lamb1))
x_pca = np.dot(P1, P1.transpose())
else:
x_pca = np.dot(np.dot(np.square(lamb1), P1), P1.transpose())
else:
lamb2 = -lamb[r:n].copy()
lamb2 = np.sqrt(lamb2.astype(float))
p_2 = p_input[:, r:n]
p_2 = np.dot(p_2, np.diagflat(lamb2))
x_pca = x_pca + np.dot(p_2, p_2.transpose())
# To make x_pca positive semidefinite with diagonal elements exactly b0
d = x_pca.diagonal()
d = d.reshape((d.size, 1))
d = np.maximum(d, b_0.reshape(d.shape))
x_pca = x_pca - np.diagflat(x_pca.diagonal()) + np.diagflat(d)
d = d.astype(float) ** (-0.5)
d = d * ((np.sqrt(b_0.astype(float))).reshape(d.shape))
x_pca = x_pca * (np.dot(d, d.reshape(1, d.size)))
return x_pca
# end of PCA
# To generate the first order difference of lambda
# To generate the first order essential part of d
def my_omega_mat(p_input, lamb, n):
idx_idp = np.where(lamb > 0)
idx_idp = idx_idp[0]
idx_idm = np.setdiff1d(range(0, n), idx_idp)
n = lamb.size
r = idx_idp.size
if r > 0:
if r == n:
omega_12 = np.ones((n, n))
else:
s = n - r
dp = lamb[0:r].copy()
dp = dp.reshape(dp.size, 1)
dn = lamb[r:n].copy()
dn = dn.reshape((dn.size, 1))
omega_12 = np.dot(dp, np.ones((1, s)))
omega_12 = omega_12 / (np.dot(np.abs(dp), np.ones((1, s))) + np.dot(np.ones((r, 1)), abs(dn.transpose())))
omega_12 = omega_12.reshape((r, s))
else:
omega_12 = np.array([])
return omega_12
# End of my_omega_mat
# To generate Jacobian
def my_jacobian_matrix(x, omega_12, p_input, n):
x_result = np.zeros((n, 1))
[r, s] = omega_12.shape
if r > 0:
hmat_1 = p_input[:, 0:r].copy()
if r < n / 2.0:
i = 0
while i < n:
hmat_1[i, :] = x[i] * hmat_1[i, :]
i = i + 1
omega_12 = omega_12 * (np.dot(hmat_1.transpose(), p_input[:, r:n]))
hmat = np.dot(hmat_1.transpose(), np.dot(p_input[:, 0:r], p_input[:, 0:r].transpose()))
hmat = hmat + np.dot(omega_12, p_input[:, r:n].transpose())
hmat = np.vstack((hmat, np.dot(omega_12.transpose(), p_input[:, 0:r].transpose())))
i = 0
while i < n:
x_result[i] = np.dot(p_input[i, :], hmat[:, i])
x_result[i] = x_result[i] + 1.0e-10 * x[i]
i = i + 1
else:
if r == n:
x_result = 1.0e-10 * x
else:
hmat_2 = p_input[:, r:n].copy()
i = 0
while i < n:
hmat_2[i, :] = x[i] * hmat_2[i, :]
i = i + 1
omega_12 = np.ones((r, s)) - omega_12
omega_12 = omega_12 * (np.dot(p_input[:, 0:r].transpose(), hmat_2))
hmat = np.dot(p_input[:, r:n].transpose(), hmat_2)
hmat = np.dot(hmat, p_input[:, r:n].transpose())
hmat = hmat + np.dot(omega_12.transpose(), p_input[:, 0:r].transpose())
hmat = np.vstack((np.dot(omega_12, p_input[:, r:n].transpose()), hmat))
i = 0
while i < n:
x_result[i] = np.dot(-p_input[i, :], hmat[:, i])
x_result[i] = x[i] + x_result[i] + 1.0e-10 * x[i]
i = i + 1
return x_result
# end of Jacobian
# PCG Method
def my_pre_cg(b, tol, maxit, c, omega_12, p_input, n):
# Initializations
r = b.copy()
r = r.reshape(r.size, 1)
c = c.reshape(c.size, 1)
n2b = np.linalg.norm(b)
tolb = tol * n2b
p = np.zeros((n, 1))
flag = 1
iterk = 0
relres = 1000
# Precondition
z = r / c
rz_1 = np.dot(r.transpose(), z)
rz_2 = 1
d = z.copy()
# d = d.reshape(z.shape)
# CG Iteration
for k in range(0, maxit):
if k > 0:
beta = rz_1 / rz_2
d = z + beta * d
w = my_jacobian_matrix(d, omega_12, p_input, n)
denom = np.dot(d.transpose(), w)
iterk = k + 1
relres = np.linalg.norm(r) / n2b
if denom <= 0:
ss = 0 # don't know the usage, check the paper
p = d / np.linalg.norm(d)
break
else:
alpha = rz_1 / denom
p = p + alpha * d
r = r - alpha * w
z = r / c
if np.linalg.norm(r) <= tolb: # exit if hmat p = b solved in relative error tolerance
iterk = k + 1
relres = np.linalg.norm(r) / n2b
flag = 0
break
rz_2 = rz_1
rz_1 = np.dot(r.transpose(), z)
return p, flag, relres, iterk
# end of pre_cg
# to generate the diagonal preconditioner
def my_precond_matrix(omega_12, p_input, n):
[r, s] = omega_12.shape
c = np.ones((n, 1))
if r > 0:
if r < n / 2.0:
hmat = (p_input.copy()).transpose()
hmat = hmat * hmat
hmat_12 = np.dot(hmat[0:r, :].transpose(), omega_12)
d = np.ones((r, 1))
for i in range(0, n):
c_temp = np.dot(d.transpose(), hmat[0:r, i])
c_temp = c_temp * hmat[0:r, i]
c[i] = np.sum(c_temp)
c[i] = c[i] + 2.0 * np.dot(hmat_12[i, :], hmat[r:n, i])
if c[i] < 1.0e-8:
c[i] = 1.0e-8
else:
if r < n:
hmat = (p_input.copy()).transpose()
hmat = hmat * hmat
omega_12 = np.ones((r, s)) - omega_12
hmat_12 = np.dot(omega_12, hmat[r:n, :])
d = np.ones((s, 1))
dd = np.ones((n, 1))
for i in range(0, n):
c_temp = np.dot(d.transpose(), hmat[r:n, i])
c[i] = np.sum(c_temp * hmat[r:n, i])
c[i] = c[i] + 2.0 * np.dot(hmat[0:r, i].transpose(), hmat_12[:, i])
alpha = np.sum(hmat[:, i])
c[i] = alpha * np.dot(hmat[:, i].transpose(), dd) - c[i]
if c[i] < 1.0e-8:
c[i] = 1.0e-8
return c
# end of precond_matrix
# my_issorted()
def my_issorted(x_input, flag):
n = x_input.size
tf_value = False
if n < 2:
tf_value = True
else:
if flag == 1:
i = 0
while i < n - 1:
if x_input[i] <= x_input[i + 1]:
i = i + 1
else:
break
if i == n - 1:
tf_value = True
elif i < n - 1:
tf_value = False
elif flag == -1:
i = n - 1
while i > 0:
if x_input[i] <= x_input[i - 1]:
i = i - 1
else:
break
if i == 0:
tf_value = True
elif i > 0:
tf_value = False
return tf_value
# end of my_issorted()
def my_mexeig(x_input):
[n, m] = x_input.shape
[lamb, p_x] = np.linalg.eigh(x_input)
# lamb = lamb.reshape((lamb.size, 1))
p_x = p_x.real
lamb = lamb.real
if my_issorted(lamb, 1):
lamb = lamb[::-1]
p_x = np.fliplr(p_x)
elif my_issorted(lamb, -1):
return p_x, lamb
else:
idx = np.argsort(-lamb)
# lamb_old = lamb # add for debug
lamb = lamb[idx]
# p_x_old = p_x add for debug
p_x = p_x[:, idx]
lamb = lamb.reshape((n, 1))
p_x = p_x.reshape((n, n))
return p_x, lamb
# end of my_mymexeig()
# begin of the main function
def my_correlationmatrix(g_input, b_input=None, tau=None, tol=None):
print('-- Semismooth Newton-CG method starts -- \n')
[n, m] = g_input.shape
g_input = g_input.copy()
t0 = time.time() # time start
g_input = (g_input + g_input.transpose()) / 2.0
b_g = np.ones((n, 1))
error_tol = 1.0e-6
if b_input is None:
tau = 0
elif tau is None:
b_g = b_input.copy()
tau = 0
elif tol is None:
b_g = b_input.copy() - tau * np.ones((n, 1))
g_input = g_input - tau * np.eye(n, n)
else:
b_g = b_input.copy() - tau * np.ones((n, 1))
g_input = g_input - tau * np.eye(n, n)
error_tol = np.maximum(1.0e-12, tol)
res_b = np.zeros((300, 1))
norm_b0 = np.linalg.norm(b_g)
y = np.zeros((n, 1))
f_y = np.zeros((n, 1))
k = 0
f_eval = 0
iter_whole = 200
iter_inner = 20 # maximum number of line search in Newton method
maxit = 200 # maximum number of iterations in PCG
iterk = 0
inner = 0
tol_cg = 1.0e-2 # relative accuracy for CGs
sigma_1 = 1.0e-4
x0 = y.copy()
prec_time = 0
pcg_time = 0
eig_time = 0
c = np.ones((n, 1))
d = np.zeros((n, 1))
val_g = np.sum((g_input.astype(float)) * (g_input.astype(float)))
val_g = val_g * 0.5
x_result = g_input + np.diagflat(y)
x_result = (x_result + x_result.transpose()) / 2.0
eig_time0 = time.time()
[p_x, lamb] = my_mexeig(x_result)
eig_time = eig_time + (time.time() - eig_time0)
[f_0, f_y] = my_gradient(y, lamb, p_x, b_g, n)
initial_f = val_g - f_0
x_result = my_pca(x_result, lamb, p_x, b_g, n)
val_obj = np.sum(((x_result - g_input) * (x_result - g_input))) / 2.0
gap = (val_obj - initial_f) / (1.0 + np.abs(initial_f) + np.abs(val_obj))
f = f_0.copy()
f_eval = f_eval + 1
b_input = b_g - f_y
norm_b = np.linalg.norm(b_input)
time_used = time.time() - t0
print('Newton-CG: Initial Dual objective function value: %s \n' % initial_f)
print('Newton-CG: Initial Primal objective function value: %s \n' % val_obj)
print('Newton-CG: Norm of Gradient: %s \n' % norm_b)
print('Newton-CG: computing time used so far: %s \n' % time_used)
omega_12 = my_omega_mat(p_x, lamb, n)
x0 = y.copy()
while np.abs(gap) > error_tol and norm_b / (1 + norm_b0) > error_tol and k < iter_whole:
prec_time0 = time.time()
c = my_precond_matrix(omega_12, p_x, n)
prec_time = prec_time + (time.time() - prec_time0)
pcg_time0 = time.time()
[d, flag, relres, iterk] = my_pre_cg(b_input, tol_cg, maxit, c, omega_12, p_x, n)
pcg_time = pcg_time + (time.time() - pcg_time0)
print('Newton-CG: Number of CG Iterations=== %s \n' % iterk)
if flag == 1:
print('=== Not a completed Newton-CG step === \n')
slope = np.dot((f_y - b_g).transpose(), d)
y = (x0 + d).copy()
x_result = g_input + np.diagflat(y)
x_result = (x_result + x_result.transpose()) / 2.0
eig_time0 = time.time()
[p_x, lamb] = my_mexeig(x_result)
eig_time = eig_time + (time.time() - eig_time0)
[f, f_y] = my_gradient(y, lamb, p_x, b_g, n)
k_inner = 0
while k_inner <= iter_inner and f > f_0 + sigma_1 * (np.power(0.5, k_inner)) * slope + 1.0e-6:
k_inner = k_inner + 1
y = x0 + (np.power(0.5, k_inner)) * d
x_result = g_input + np.diagflat(y)
x_result = (x_result + x_result.transpose()) / 2.0
eig_time0 = time.time()
[p_x, lamb] = my_mexeig(x_result)
eig_time = eig_time + (time.time() - eig_time0)
[f, f_y] = my_gradient(y, lamb, p_x, b_g, n)
f_eval = f_eval + k_inner + 1
x0 = y.copy()
f_0 = f.copy()
val_dual = val_g - f_0
x_result = my_pca(x_result, lamb, p_x, b_g, n)
val_obj = np.sum((x_result - g_input) * (x_result - g_input)) / 2.0
gap = (val_obj - val_dual) / (1 + np.abs(val_dual) + np.abs(val_obj))
print('Newton-CG: The relative duality gap: %s \n' % gap)
print('Newton-CG: The Dual objective function value: %s \n' % val_dual)
print('Newton-CG: The Primal objective function value: %s \n' % val_obj)
b_input = b_g - f_y
norm_b = np.linalg.norm(b_input)
time_used = time.time() - t0
rel_norm_b = norm_b / (1 + norm_b0)
print('Newton-CG: Norm of Gradient: %s \n' % norm_b)
print('Newton-CG: Norm of Relative Gradient: %s \n' % rel_norm_b)
print('Newton-CG: Computing time used so for %s \n' % time_used)
res_b[k] = norm_b
k = k + 1
omega_12 = my_omega_mat(p_x, lamb, n)
position_rank = np.maximum(lamb, 0) > 0
rank_x = (np.maximum(lamb, 0)[position_rank]).size
final_f = val_g - f
x_result = x_result + tau * (np.eye(n))
time_used = time.time() - t0
print('\n')
print('Newton-CG: Number of iterations: %s \n' % k)
print('Newton-CG: Number of Funtion Evaluation: =========== %s\n' % f_eval)
print('Newton-CG: Final Dual Objective Function value: ========= %s\n' % final_f)
print('Newton-CG: Final Primal Objective Function value: ======= %s \n' % val_obj)
print('Newton-CG: The final relative duality gap: %s \n' % gap)
print('Newton-CG: The rank of the Optimal Solution - tau*I: %s \n' % rank_x)
print('Newton-CG: computing time for computing preconditioners: %s \n' % prec_time)
print('Newton-CG: computing time for linear system solving (cgs time): %s \n' % pcg_time)
print('Newton-CG: computing time for eigenvalue decompositions: =============== %s \n' % eig_time)
print('Newton-CG: computing time used for equal weight calibration ============ %s \n' % time_used)
return x_result, y
# end of the main function
# test
n = 3000
data_g_test = scipy.randn(n, n)
data_g_test = (data_g_test + data_g_test.transpose()) / 2.0
data_g_test = data_g_test - np.diag(np.diag(data_g_test)) + np.eye(n)
b = np.ones((n, 1))
tau = 0
tol = 1.0e-6
[x_test_result, y_test_result] = my_correlationmatrix(data_g_test, b, tau, tol)
print(x_test_result)
print(y_test_result)