Pytorch: Weighted Covariance - numpy

I am trying to implement a PyTorch covariance matrix operator. However, I notice the results are not the same between the Numpy implementation and my attempt, yet I do not understand why.
I define the Bessel-corrected weighted covariance matrix as:
I define the weighted mean as:
I compare the NumPy method and my method as follows:
import numpy as np
import torch
torch.set_printoptions(precision=8)
x = np.random.randn(1000, 3)*1000
w = np.abs(np.random.randn(1000))*1000
x_torch = torch.DoubleTensor(x)
w_torch = torch.DoubleTensor(w)
#calculate weighted means
m_w = torch.sum(x_torch.T*w_torch, axis=1)/torch.sum(w_torch)
m_w_np = np.average(x, axis=0, weights=w)
#calculate weighted covariance matrix
Q = (x_torch-m_w).T
cov_w = (1.0 / (torch.sum(w_torch) - 1))*(w_torch*Q).mm(Q.T)
cov_w_np = np.cov(x.T, aweights=w.T)
print("WEIGHTED MEAN")
print("NUMPY = {0}\n\nTORCH = {1}\n\nDIFFERENCE={2}".format(m_w_np, m_w.numpy(), m_w_np-m_w.numpy()))
print("")
print("")
print("WEIGHTED COVARIANCE")
print("NUMPY = {0}\n\nTORCH = {1}\n\nDIFFERENCE={2}".format(cov_w_np, cov_w.numpy(),cov_w_np-cov_w.numpy()))
This yields the following output:
WEIGHTED MEAN
NUMPY = [-21.10537208 -7.70801723 64.4034329 ]
TORCH = [-21.10537208 -7.70801723 64.4034329 ]
DIFFERENCE=[-7.10542736e-15 -1.77635684e-15 1.42108547e-14]
WEIGHTED COVARIANCE
NUMPY = [[ 989468.17457696 13620.54885133 10723.87790683]
[ 13620.54885133 953966.92486133 21407.69378841]
[ 10723.87790683 21407.69378841 1019646.81044077]]
TORCH = [[ 987952.51042915 13599.68493868 10707.45110536]
[ 13599.68493868 952505.64141296 21374.90155234]
[ 10707.45110536 21374.90155234 1018084.91875621]]
DIFFERENCE=[[1515.6641478 20.86391265 16.42680147]
[ 20.86391265 1461.28344838 32.79223607]
[ 16.42680147 32.79223607 1561.89168456]]

Related

I'm creating a linear regression model and i am receiving an error

I was creating a linear regression model and I used TensorFlow's linear estimator but after I run the linear estimator train function I receive an invalid argument error which says Labels must be <= n_classes - 1.I don't know which part of the code i have gone wrong
this is the code i was running
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:\Users\XPRESS\Downloads\CarPrice_Assignment.csv") #load the data
data.head()
#split data into traiing and testing
from sklearn.model_selection import train_test_split
train , test = train_test_split(data,random_state=42,test_size=0.2)
train_x = train
train_y = train.pop('price')
eval_x = test
eval_y = test.pop('price')
lst = list(train_x.columns)
#get numerical and categorical columns
categorical_columns = []
numerical_columns = []
for cat in lst:
if train_x[cat].dtypes == 'object':
categorical_columns.append(_)
for nums in lst:
if nums not in categorical_columns:
numerical_columns.append(nums)
train_x.info()
#convert categorical data to numeric data
feature_columns = []
for feature_name in categorical_columns:
vocabulary = train_x[feature_name].unique()
feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name,vocabulary))
for feature_name in numerical_columns: feature_columns.append(tf.feature_column.numeric_column(feature_name,dtype=tf.float32))
def make_input_fn(data,label,num_epochs=10,shuffle=True,batch_size=32):
def input_fn():
ds = tf.data.Dataset.from_tensor_slices((dict(data),label))
if shuffle:
ds=ds.shuffle(1000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
return input_fn
train_input_funtion = make_input_fn(train_x,train_y)
eval_input_function = make_input_fn(eval_x,eval_y,shuffle=False,num_epochs=1)
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_funtion)
this is the error i received
InvalidArgumentError: 2 root error(s) found.
(0) INVALID_ARGUMENT: assertion failed: [Labels must be <= n_classes - 1] [Condition x <= y did not hold element-wise:] [x (head/losses/Cast:0) = ] [[7895][10795][17710]...] [y (head/losses/check_label_range/Const:0) = ] [1]
[[{{function_node head_losses_check_label_range_assert_less_equal_Assert_AssertGuard_false_22323}}{{node Assert}}]]
[[training/Ftrl/gradients/gradients/linear/linear_model/linear/linear_model/linear/linear_model/enginelocation/weighted_sum_grad/Select_1/_1047]]
(1) INVALID_ARGUMENT: assertion failed: [Labels must be <= n_classes - 1] [Condition x <= y did not hold element-wise:] [x (head/losses/Cast:0) = ] [[7895][10795][17710]...] [y (head/losses/check_label_range/Const:0) = ] [1]
[[{{function_node head_losses_check_label_range_assert_less_equal_Assert_AssertGuard_false_22323}}{{node Assert}}]]
0 successful operations.
0 derived errors ignored.
...
[[training/Ftrl/gradients/gradients/linear/linear_model/linear/linear_model/linear/linear_model/enginelocation/weighted_sum_grad/Select_1/_1047]]
(1) INVALID_ARGUMENT: assertion failed: [Labels must be <= n_classes - 1] [Condition x <= y did not hold element-wise:] [x (head/losses/Cast:0) = ] [[7895][10795][17710]...] [y (head/losses/check_label_range/Const:0) = ] [1]
[[{{node Assert}}]]
0 successful operations.
0 derived errors ignored.
You mentioned that you are creating regression, but here you have tf.estimator.LinearClassifier in the code. May be you meant to use tf.estimator.LinearRegressor instead?

Align the Truncated SVD from sklearn.decomposition and np.linalg.svd

=========update==========
I read an infomation in this book:
The matrix that is actually returned by TruncatedSVD is the dot product of the U andS matrices.
Then i try to just multiply U and Sigma:
US = U.dot(Sigma)
print("==>> US: ", US)
this time it produce the same result, just with sign flipping. So why Truncated SVD doesn't need multiplying VT ?
==========previous question===========
I am learning SVD, i found numpy and sklearn both provide some related APIs, then i try to use them to do dimensional reduction, below are the code:
import numpy as np
np.set_printoptions(precision=2, suppress=True)
A = np.array([
[1,1,1,0,0],
[3,3,3,0,0],
[4,4,4,0,0],
[5,5,5,0,0],
[0,2,0,4,4],
[0,0,0,5,5],
[0,1,0,2,2]])
U, s, VT = np.linalg.svd(A)
print("==>> U: ", U)
print("==>> VT: ", VT)
# create m x n Sigma matrix
Sigma = np.zeros((A.shape[0], A.shape[1]))
# populate Sigma with n x n diagonal matrix
square_len = min((A.shape[0], A.shape[1]))
Sigma[:square_len, :square_len] = np.diag(s)
print("==>> Sigma: ", Sigma)
n_elements = 2
U = U[:, :n_elements]
Sigma = Sigma[:n_elements, :n_elements]
VT = VT[:n_elements, :n_elements]
# reconstruct
B = U.dot(Sigma.dot(VT))
print("==>> B: ", B)
The output B is :
==>> B: [[ 0.99 1.01]
[ 2.98 3.04]
[ 3.98 4.05]
[ 4.97 5.06]
[ 0.36 1.29]
[-0.37 0.73]
[ 0.18 0.65]]
then this is sklearn code:
import numpy as np
from sklearn.decomposition import TruncatedSVD
A = np.array([
[1,1,1,0,0],
[3,3,3,0,0],
[4,4,4,0,0],
[5,5,5,0,0],
[0,2,0,4,4],
[0,0,0,5,5],
[0,1,0,2,2]]).astype(float)
svd = TruncatedSVD(n_components=2)
svd.fit(A) # Fit model on training data A
print("==>> right singular vectors: ", svd.components_)
print("==>> svd.singular_values_: ", svd.singular_values_)
B = svd.transform(A) # Perform dimensionality reduction on A.
print("==>> B: ", B)
its last output result is:
==>> B: [[ 1.72 -0.22]
[ 5.15 -0.67]
[ 6.87 -0.9 ]
[ 8.59 -1.12]
[ 1.91 5.62]
[ 0.9 6.95]
[ 0.95 2.81]]
As we can see, they produce different result (but i notice their singular values are the same, both are 12.48 9.51), how to make them same, does i misunderstand something ?
I think the correct way to perform a dimensionality reduction of the array A with np.linalg.svd is:
U, s, V = np.linalg.svd(A)
VT = V.T
B = A#VT[:,:n_elements]
Now B is:
array([[-1.72, 0.22],
[-5.15, 0.67],
[-6.87, 0.9 ],
[-8.59, 1.12],
[-1.91, -5.62],
[-0.9 , -6.95],
[-0.95, -2.81]])
That is exactly what you get from the TruncatedSVD, but with negative sign.

How to Plot in 3D Principal Component Analysis Visualizations, using the fast PCA script from this answer

I found this fast script here in Stack Overflow for perform PCA with a given numpy array.
I don't know how to plot this in 3D, and also plot in 3D the Cumulative Explained Variances and the Number of Components. This fast script was perform with covariance method, and not with singular value decomposition, maybe that's the reason why I can't get my Cumulative Variances?
I tried to plotting with this, but it doesn't work.
This is the code and my output:
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
def cov(X):
"""
Covariance matrix
note: specifically for mean-centered data
note: numpy's `cov` uses N-1 as normalization
"""
return dot(X.T, X) / X.shape[0]
# N = data.shape[1]
# C = empty((N, N))
# for j in range(N):
# C[j, j] = mean(data[:, j] * data[:, j])
# for k in range(j + 1, N):
# C[j, k] = C[k, j] = mean(data[:, j] * data[:, k])
# return C
def pca(data, pc_count = None):
"""
Principal component analysis using eigenvalues
note: this mean-centers and auto-scales the data (in-place)
"""
data -= mean(data, 0)
data /= std(data, 0)
C = cov(data)
E, V = eigh(C)
key = argsort(E)[::-1][:pc_count]
E, V = E[key], V[:, key]
U = dot(data, V)
print(f'Eigen Values: {E}')
print(f'Eigen Vectors: {V}')
print(f'Key: {key}')
print(f'U: {U}')
print(f'shape: {U.shape}')
return U, E, V
data = dftransformed.transpose() # df tranpose and convert to numpy
trans = pca(data, 3)[0]
fig, (ax1, ax2) = subplots(1, 2)
ax1.scatter(data[:50, 0], data[:50, 1], c = 'r')
ax1.scatter(data[50:, 0], data[50:, 1], c = 'b')
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r')
ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b')
show()
I understand the eigen values & eigen vectors, but I can't understand this key value, the user didn't comment this section of code in the answer, anyone knows what means each variable printed?
output:
Eigen Values: [126.30390621 68.48966957 26.03124927]
Eigen Vectors: [[-0.05998409 0.05852607 -0.03437937]
[ 0.00807487 0.00157143 -0.12352761]
[-0.00341751 0.03819162 0.08697668]
...
[-0.0210582 0.06601974 -0.04013712]
[-0.03558994 0.02953385 0.01885872]
[-0.06728424 -0.04162485 -0.01508154]]
Key: [439 438 437]
U: [[-12.70954048 8.97405411 -2.79812235]
[ -4.90853527 4.36517107 0.54129243]
[ -2.49370123 0.48341147 7.26682759]
[-16.07860635 6.16100749 5.81777637]
[ -1.81893291 6.48443689 -5.8655646 ]
[ 9.03939039 2.64196391 4.22056618]
[-14.71731064 9.19532016 -2.79275543]
[ 1.60998654 8.37866823 0.86207034]
[ -4.4503797 10.12688097 -5.12453656]
[ 12.16293556 2.2594413 -2.11730311]
[-15.76505125 9.48537581 -2.73906772]
[ -2.54289959 9.86768111 -4.84802992]
[ -5.78214902 9.21901651 -8.13594627]
[ -1.35428398 5.85550586 6.30553987]
[ 12.87261987 0.96283606 -3.26982121]
[ 24.57767477 -4.28214631 6.29510659]
[ 4.13941679 3.3688288 3.01194055]
[ -2.98318764 1.32775227 7.62610929]
[ -4.44461549 -1.49258339 1.39080386]
[ -0.10590795 -0.3313904 8.46363066]
[ 6.05960739 1.03091753 5.10875657]
[-21.27737352 -3.44453629 3.25115921]
[ -1.1183025 0.55238687 10.75611405]
[-10.6359291 7.58630341 -0.55088259]
[ 4.52557492 -8.05670864 2.23113833]
[-11.07822559 1.50970501 4.66555889]
[ -6.89542628 -19.24672805 -3.71322812]
[ -0.57831362 -17.84956249 -5.52002876]
[-12.70262277 -14.05542691 -2.72417438]
[ -7.50263129 -15.83723295 -3.2635125 ]
[ -7.52780216 -17.60790567 -2.00134852]
[ -5.34422731 -17.29394266 -2.69261597]
[ 9.40597893 0.21140292 2.05522806]
[ 12.12423431 -2.80281266 7.81182024]
[ 19.51224195 4.7624575 -11.20523383]
[ 22.38102384 0.82486072 -1.64716468]
[ -8.60947699 4.12597477 -6.01885407]
[ 9.56268414 1.18190655 -5.44074124]
[ 14.97675455 3.31666971 -3.30012109]
[ 20.47530869 -1.95896058 -1.91238615]]
shape: (40, 3)
trans = pca(data, 3)[0] is the U data, since [0] selects the first index of the returned data, and pca returns U, E, V
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r') plots the first 50 rows of column 0 against the first 50 rows of column 1, and ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b') does the same for rows from 50 to the end. This from the sample data given in this fast script, but your data only has shape: (40, 3) (e.g. only 40 rows of data).
In order to plot trans as a 3d scatter plot, extract each of the 3 columns into a separate variable and plot as a scatter plot.
# imports as shown in the linked answer
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
# other imports
import numpy as np
# test data from linked answer (e.g. this fast script)
np.random.seed(365) # makes data repeatable
data = array([randn(8) for k in range(150)]) # creates array with shape (150, 8)
data[:50, 2:4] += 5 # adds 5 to first 50 rows of columns 2:4
data[50:, 2:5] += 5 # adds 5 to to rows from 50 of columns 2:5
# function call
trans = pca(data, 3)[0] # [0] gets U returned by pca(...)
# extract each column to a separate variable
x = trans[:, 0] # all rows of column 0
y = trans[:, 1] # all rows of column 1
z = trans[:, 2] # all rows of column 2
# plot 3d scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z)

Understanding keras.backend.max usage with tf.random_normal

import numpy as np
import tensorflow as tf
from keras import backend as K
sess = tf.InteractiveSession()
box_scores1 = tf.constant([[[ 9.188682, 11.484599 ],
[10.06533, 7.557296 ]],
[[10.099248, 10.591225 ],
[10.592823 , 7.8770704]]])
box_scores2 = tf.random_normal([2,2,2], mean=10, stddev=1, dtype=tf.float32, seed = 1)
box_class_scores1 = K.max(box_scores1, axis=-1)
box_class_scores2 = K.max(box_scores2, axis=-1)
print(box_scores1.eval())
print(box_scores2.eval())
print(box_class_scores1.eval())
print(box_class_scores2.eval())
Output:
[[[ 9.188682 11.484599 ]
[10.06533 7.557296 ]]
[[10.099248 10.591225 ]
[10.592823 7.8770704]]]
[[[ 9.188682 11.484599 ]
[10.06533 7.557296 ]]
[[10.099248 10.591225 ]
[10.592823 7.8770704]]]
[[11.484599 10.06533 ]
[10.591225 10.592823]]
[[10.242094 10.515779]
[12.083789 11.397354]]
As, we can see values in box_scores1 and box_scores2 are same but the result obtained after applying max operation differs. How can the values of box_class_scores1 and box_class_scores2 be different?
Your problem has nothing to do with the max function, but a misunderstanding with tensorflow, as most of its operations are symbolic, so when you use tf.random_mormal, this does not produce random numbers, but a symbolic normal distribution with the given mean and standard distribution.
Then, each time you evaluate this distribution, it generates different outputs, so your first eval looks ok, but the second produces a different output that is given to max, so it produces a different output than just giving a constant vector.

PyMC - variance-covariance matrix estimation

I read the following paper(http://www3.stat.sinica.edu.tw/statistica/oldpdf/A10n416.pdf) where they model the variance-covariance matrix Σ as:
Σ = diag(S)*R*diag(S) (Equation 1 in the paper)
S is the k×1 vector of standard deviations, diag(S) is the diagonal matrix with diagonal elements S, and R is the k×k correlation matrix.
How can I implement this using PyMC ?
Here is some initial code I wrote:
import numpy as np
import pandas as pd
import pymc as pm
k=3
prior_mu=np.ones(k)
prior_var=np.eye(k)
prior_corr=np.eye(k)
prior_cov=prior_var*prior_corr*prior_var
post_mu = pm.Normal("returns",prior_mu,1,size=k)
post_var=pm.Lognormal("variance",np.diag(prior_var),1,size=k)
post_corr_inv=pm.Wishart("inv_corr",n_obs,np.linalg.inv(prior_corr))
post_cov_matrix_inv = ???
muVector=[10,5,-2]
varMatrix=np.diag([10,20,10])
corrMatrix=np.matrix([[1,.2,0],[.2,1,0],[0,0,1]])
cov_matrix=varMatrix*corrMatrix*varMatrix
n_obs=10000
x=np.random.multivariate_normal(muVector,cov_matrix,n_obs)
obs = pm.MvNormal( "observed returns", post_mu, post_cov_matrix_inv, observed = True, value = x )
model = pm.Model( [obs, post_mu, post_cov_matrix_inv] )
mcmc = pm.MCMC()
mcmc.sample( 5000, 2000, 3 )
Thanks
[edit]
I think that can be done using the following:
#pm.deterministic
def post_cov_matrix_inv(post_sdev=post_sdev,post_corr_inv=post_corr_inv):
return np.diag(post_sdev)*post_corr_inv*np.diag(post_sdev)
Here is the solution for the benefit of someone who stumbles onto this post:
p=3
prior_mu=np.ones(p)
prior_sdev=np.ones(p)
prior_corr_inv=np.eye(p)
muVector=[10,5,1]
sdevVector=[3,5,10]
corrMatrix=np.matrix([[1,0,-.1],[0,1,.5],[-.1,.5,1]])
cov_matrix=np.diag(sdevVector)*corrMatrix*np.diag(sdevVector)
n_obs=2000
x=np.random.multivariate_normal(muVector,cov_matrix,n_obs)
prior_cov=np.diag(prior_sdev)*np.linalg.inv(prior_corr_inv)*np.diag(prior_sdev)
post_mu = pm.Normal("returns",prior_mu,1,size=p)
post_sdev=pm.Lognormal("sdev",prior_sdev,1,size=p)
post_corr_inv=pm.Wishart("inv_corr",n_obs,prior_corr_inv)
#post_cov_matrix_inv = pm.Wishart("inv_cov_matrix",n_obs,np.linalg.inv(prior_cov))
#pm.deterministic
def post_cov_matrix_inv(post_sdev=post_sdev,post_corr_inv=post_corr_inv,nobs=n_obs):
post_sdev_inv=(post_sdev)**-1
return np.diag(post_sdev_inv)*cov2corr(post_corr_inv/nobs)*np.diag(post_sdev_inv)
obs = pm.MvNormal( "observed returns", post_mu, post_cov_matrix_inv, observed = True, value = x )
model = pm.Model( [obs, post_mu, post_sdev ,post_corr_inv])
mcmc = pm.MCMC(model)
mcmc.sample( 25000, 15000, 1,progress_bar=False )