only size-1 arrays : log function - numpy

# UNQ_C2
# GRADED FUNCTION: compute_cost
def compute_cost(X, y, w, b, lambda_= 1):
"""
Computes the cost over all examples
Args:
X : (ndarray Shape (m,n)) data, m examples by n features
y : (array_like Shape (m,)) target value
w : (array_like Shape (n,)) Values of parameters of the model
b : scalar Values of bias parameter of the model
lambda_: unused placeholder
Returns:
total_cost: (scalar) cost
"""
m, n = X.shape
### START CODE HERE ###
X = X.transpose()
predictions = np.dot(w,X) + b
g = sigmoid(predictions)
error = (-y*np.math.log(predictions)-(1-y)*math.log(1-predictions))
cost = 1/m * error
### END CODE HERE ###
return total_cost
m, n = X_train.shape
# Compute and display cost with w initialized to zeroes
initial_w = np.zeros(n)
initial_b = 0.
cost = compute_cost(X_train, y_train, initial_w, initial_b)
print('Cost at initial w (zeros): {:.3f}'.format(cost))
Error
<ipython-input-134-a0c3de514c37> in compute_cost(X, y, w, b, lambda_)
21 g = sigmoid(predictions)
22 print(np.dot(w,X))
---> 23 error = (-y*np.math.log(predictions)-(1-y)*math.log(1-predictions))
24 cost = 1/m * error
25
TypeError: only size-1 arrays can be converted to Python scalars
Currently doing a machine learning course assignment
Is passing array into log function causes the error ? Any idea what tool to use to fix this ? Tried searching the web.

Related

Scipy Spatial Distance Sub-module rejects Numpy Array

I have a dataframe named, "df", with 4 columns. Three columns are independent variables: x1, x2, and x3. And, the other variable, y, is the dependent variable
I would like to calculate the distance, "pdist" between the dependent variable and each of the dependent variables, so I first converted each column to a numpy array as follows:
y = df[["y"]].values
x1 = df[["x1"]].values
x2 = df[["x2"]].values
x3 = df[["x3"]].values
When I feed these arrays through this coding pipeline I got from Github:
import numpy as np
from scipy.spatial.distance import pdist
def distance_correlation(Xval, Yval, pval=True, nruns=500):
X, Y = np.atleast_1d(Xval),np.atleast_1d(Yval)
if np.prod(X.shape) == len(X):X = X[:, None]
if np.prod(Y.shape) == len(Y):Y = Y[:, None]
X, Y = np.atleast_2d(X),np.atleast_2d(Y)
n = X.shape[0]
if Y.shape[0] != X.shape[0]:raise ValueError('Number of samples must match')
a, b = squareform(pdist(X)),squareform(pdist(Y))
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
dcov2_xy = (A * B).sum() / float(n * n)
dcov2_xx = (A * A).sum() / float(n * n)
dcov2_yy = (B * B).sum() / float(n * n)
dcor = np.sqrt(dcov2_xy) / np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
if pval:
greater = 0
for i in range(nruns):
Y_r = copy.copy(Yval)
np.random.shuffle(Y_r)
if distcorr(Xval, Y_r, pval=False) > dcor:
greater += 1
return (dcor, greater / float(nruns))
else:
return dcor
distance_correlation(x1, y, pval=True, nruns=500)
I get this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-c720c9df4e97> in <module>
----> 1 distance_correlation(bop_sp500, price, pval=True, nruns=500)
<ipython-input-17-e0b3aea12c32> in distance_correlation(Xval, Yval, pval, nruns)
9 n = X.shape[0]
10 if Y.shape[0] != X.shape[0]:raise ValueError('Number of samples must match')
---> 11 a, b = squareform(pdist(X)),squareform(pdist(Y))
12 A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
13 B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
~\Anaconda3\lib\site-packages\scipy\spatial\distance.py in pdist(X, metric, *args, **kwargs)
1997 s = X.shape
1998 if len(s) != 2:
-> 1999 raise ValueError('A 2-dimensional array must be passed.')
2000
2001 m, n = s
ValueError: A 2-dimensional array must be passed..
Could anyone identify where I am going wrong? I know the error originates from the manner in which I created my numpy arrays. But, I have no clues on fixing it.
Please explain it with examples that use my variable definitions. I am new to Python
Ok, so I finally managed to figure out the cause of the problem I faced:
The Numpy array that was being fed into the helper function was a 2d array.
While the helper function required a "Numpy vector"; i.e. a 1d Numpy array.
The best way to create it is to use the numpy.ravel() function. Hence, for my datasets, the code would be as follows (I have broken down the steps for simplicity):
# Create Arrays
y = df[["y"]].values
x1 = df[["x1"]].values
x2 = df[["x2"]].values
x3 = df[["x3"]].values
# Ravel Them
y = y.ravel()
x1 = x1.ravel()
x2 = x2.ravel()
x3 = x3.ravel()

Tensorflow 2: Nested TensorArray

What's wrong with this code? Edit: It works on CPU, but fails when ran on GPU. It runs for a few iterations, then fails with one of errors (github issue here):
2019-12-02 12:59:29.727966: F tensorflow/core/framework/tensor_shape.cc:445] Check failed: end <= dims() (1 vs. 0)
Process finished with exit code -1073740791 (0xC0000409)
or
tensorflow.python.framework.errors_impl.InvalidArgumentError: Tried to set a tensor with incompatible shape at a list index. Item element shape: [3,3] list shape: [3]
[[{{node while/body/_1/TensorArrayV2Write/TensorListSetItem}}]] [Op:__inference_computeElement_73]
#tf.function
def computeElement_byBin():
c = tf.TensorArray(tf.int64, size=1, infer_shape=False, element_shape=(3,))
const = tf.cast(tf.constant([1, 2, 3]), tf.int64)
c = c.write(0, const)
c_c = c.concat()
return c_c
#tf.function
def computeElement():
c = tf.TensorArray(tf.int64, size=1, infer_shape=False, element_shape=(3,))
for x in tf.range(50):
byBinVariant = computeElement_byBin()
c = c.write(0, byBinVariant)
return c.concat()
k = 0
while True:
k += 1
r = computeElement()
print('iteration: %s, result: %s' % (k, r))
I played around with it more and narrowed it down a bit:
#tf.function
def computeElement():
tarr = tf.TensorArray(tf.int32, size=1,clear_after_read=False)
tarr = tarr.write(0, [1])
concat = tarr.concat()
# PROBLEM HERE
for x in tf.range(50):
concat = tarr.concat()
return concat
If you set tf.config.threading.set_inter_op_parallelism_threads(1) the bug goes away, which means it's to do with parallelization of the unrolled tensorflow loop. Knowing that tensorflow unrolls statically when looping over a python variable rather than a tensor, I could confirm that this code worked:
#tf.function
def computeElement(arr):
tarr = tf.TensorArray(tf.int32, size=1)
tarr = tarr.write(0, [1])
concat = tarr.concat()
a = 0
while a<arr:
concat = tarr.concat()
a+=1
return concat
k = 0
while True:
k += 1
r = computeElement(50)
So solution for now is to loop over a python variable rather than a tensor.

matmul function for vector with tensor multiplication in tensorflow

In general when we multiply a vector v of dimension 1*n with a tensor T of dimension m*n*k, we expect to get a matrix/tensor of dimension m*k/m*1*k. This means that our tensor has m slices of matrices with dimension n*k, and v is multiplied to each matrix and the resulting vectors are stacked together. In order to do this multiplication in tensorflow, I came up with the following formulation. I am just wondering if there is any built-in function that does this standard multiplication straightforward?
T = tf.Variable(tf.random_normal((m,n,k)), name="tensor")
v = tf.Variable(tf.random_normal((1,n)), name="vector")
c = tf.stack([v,v]) # m times, here set m=2
output = tf.matmul(c,T)
You can do it with:
tf.reduce_sum(tf.expand_dims(v,2)*T,1)
Code:
m, n, k = 2, 3, 4
T = tf.Variable(tf.random_normal((m,n,k)), name="tensor")
v = tf.Variable(tf.random_normal((1,n)), name="vector")
c = tf.stack([v,v]) # m times, here set m=2
out1 = tf.matmul(c,T)
out2 = tf.reduce_sum(tf.expand_dims(v,2)*T,1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
n_out1 = sess.run(out1)
n_out2 = sess.run(out2)
#both n_out1 and n_out2 matches
Not sure if there is a better way, but it sounds like you could use tf.map_fn like this:
output = tf.map_fn(lambda x: tf.matmul(v, x), T)

`scipy.optimize` functions hang even with `maxiter=0`

I am trying to train the MNIST data (which I downloaded from Kaggle) with simple multi-class logistic regression, but the scipy.optimize functions hang.
Here's the code:
import csv
from math import exp
from numpy import *
from scipy.optimize import fmin, fmin_cg, fmin_powell, fmin_bfgs
# Prepare the data
def getIiter(ifname):
"""
Get the iterator from a csv file with filename ifname
"""
ifile = open(ifname, 'r')
iiter = csv.reader(ifile)
iiter.__next__()
return iiter
def parseRow(s):
y = [int(x) for x in s]
lab = y[0]
z = y[1:]
return (lab, z)
def getAllRows(ifname):
iiter = getIiter(ifname)
x = []
l = []
for row in iiter:
lab, z = parseRow(row)
x.append(z)
l.append(lab)
return x, l
def cutData(x, y):
"""
70% training
30% testing
"""
m = len(x)
t = int(m * .7)
return [(x[:t], y[:t]), (x[t:], y[t:])]
def num2IndMat(l):
t = array(l)
tt = [vectorize(int)((t == i)) for i in range(10)]
return array(tt).T
def readData(ifname):
x, l = getAllRows(ifname)
t = [[1] + y for y in x]
return array(t), num2IndMat(l)
#Calculate the cost function
def sigmoid(x):
return 1 / (1 + exp(-x))
vSigmoid = vectorize(sigmoid)
vLog = vectorize(log)
def costFunction(theta, x, y):
sigxt = vSigmoid(dot(x, theta))
cm = (- y * vLog(sigxt) - (1 - y) * vLog(1 - sigxt)) / m / N
return sum(cm)
def unflatten(flatTheta):
return [flatTheta[i * N : (i + 1) * N] for i in range(n + 1)]
def costFunctionFlatTheta(flatTheta):
return costFunction(unflatten(flatTheta), trainX, trainY)
def costFunctionFlatTheta1(flatTheta):
return costFunction(flatTheta.reshape(785, 10), trainX, trainY)
x, y = readData('train.csv')
[(trainX, trainY), (testX, testY)] = cutData(x, y)
m = len(trainX)
n = len(trainX[0]) - 1
N = len(trainY[0])
initTheta = zeros(((n + 1), N))
flatInitTheta = ndarray.flatten(initTheta)
flatInitTheta1 = initTheta.reshape(1, -1)
In the last two lines we flatten initTheta because the fmin{,_cg,_bfgs,_powell} functions seem to only take vectors as the initial value argument x0. I also flatten initTheta using reshape in hope this answer can be of help.
There is no problem computing the cost function which takes up less than 2 seconds on my computer:
print(costFunctionFlatTheta(flatInitTheta), costFunctionFlatTheta1(flatInitTheta1))
# 0.69314718056 0.69314718056
But all the fmin functions hang, even if I set maxiter=0.
e.g.
newFlatTheta = fmin(costFunctionFlatTheta, flatInitTheta, maxiter=0)
or
newFlatTheta1 = fmin(costFunctionFlatTheta1, flatInitTheta1, maxiter=0)
When I interrupt the program, it seems to me it all hangs at lines in optimize.py calling the cost functions, lines like this:
return function(*(wrapper_args + args))
For example, if I use fmin_cg, this would be line 292 in optimize.py (Version 0.5).
How do I solve this problem?
OK I found a way to stop fmin_cg from hanging.
Basically I just need to write a function that computes the gradient of the cost function, and pass it to the fprime parameter of fmin_cg.
def gradient(theta, x, y):
return dot(x.T, vSigmoid(dot(x, theta)) - y) / m / N
def gradientFlatTheta(flatTheta):
return ndarray.flatten(gradient(flatTheta.reshape(785, 10), trainX, trainY))
Then
newFlatTheta = fmin_cg(costFunctionFlatTheta, flatInitTheta, fprime=gradientFlatTheta, maxiter=0)
terminates within seconds, and setting maxiter to a higher number (say 100) one can train the model within reasonable amount of time.
The documentation of fmin_cg says the gradient would be numerically computed if no fprime is given, which is what I suspect caused the hanging.
Thanks to this notebook by zgo2016#Kaggle which helped me find the solution.

hessian of a variable returned by tf.concat() is None

Let x and y be vectors of length N, and z is a function z = f(x,y). In Tensorflow v1.0.0, tf.hessians(z,x) and tf.hessians(z,y) both returns an N by N matrix, which is what I expected.
However, when I concatenate the x and y into a vector p of size 2*N using tf.concat, and run tf.hessian(z, p), it returns error "ValueError: None values not supported."
I understand this is because in the computation graph x,y ->z and x,y -> p, so there is no gradient between p and z. To circumvent the problem, I can create p first, slice it into x and y, but I will have to change a ton of my code. Is there a more elegant way?
related question: Slice of a variable returns gradient None
import tensorflow as tf
import numpy as np
N = 2
A = tf.Variable(np.random.rand(N,N).astype(np.float32))
B = tf.Variable(np.random.rand(N,N).astype(np.float32))
x = tf.Variable(tf.random_normal([N]) )
y = tf.Variable(tf.random_normal([N]) )
#reshape to N by 1
x_1 = tf.reshape(x,[N,1])
y_1 = tf.reshape(y,[N,1])
#concat x and y to form a vector with length of 2*N
p = tf.concat([x,y],axis = 0)
#define the function
z = 0.5*tf.matmul(tf.matmul(tf.transpose(x_1), A), x_1) + 0.5*tf.matmul(tf.matmul(tf.transpose(y_1), B), y_1) + 100
#works , hx and hy are both N by N matrix
hx = tf.hessians(z,x)
hy = tf.hessians(z,y)
#this gives error "ValueError: None values not supported."
#expecting a matrix of size 2*N by 2*N
hp = tf.hessians(z,p)
Compute the hessian by its definition.
gxy = tf.gradients(z, [x, y])
gp = tf.concat([gxy[0], gxy[1]], axis=0)
hp = []
for i in range(2*N):
hp.append(tf.gradients(gp[i], [x, y]))
Because tf.gradients computes the sum of (dy/dx), so when computing the second partial derivative, one should slice the vector into scalars and then compute the gradient. Tested on tf1.0 and python2.