Tensorflow confusion matrix using one-hot code - tensorflow

I have multi-class classification using RNN and here is my main code for RNN:
def RNN(x, weights, biases):
x = tf.unstack(x, input_size, 1)
lstm_cell = rnn.BasicLSTMCell(num_unit, forget_bias=1.0, state_is_tuple=True)
stacked_lstm = rnn.MultiRNNCell([lstm_cell]*lstm_size, state_is_tuple=True)
outputs, states = tf.nn.static_rnn(stacked_lstm, x, dtype=tf.float32)
return tf.matmul(outputs[-1], weights) + biases
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)
cost =tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(cost)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
I have to classify all inputs to 6 classes and each of classes is composed of one-hot code label as the follow:
happy = [1, 0, 0, 0, 0, 0]
angry = [0, 1, 0, 0, 0, 0]
neutral = [0, 0, 1, 0, 0, 0]
excited = [0, 0, 0, 1, 0, 0]
embarrassed = [0, 0, 0, 0, 1, 0]
sad = [0, 0, 0, 0, 0, 1]
The problem is I cannot print confusion matrix using tf.confusion_matrix() function.
Is there any way to print confusion matrix using those labels?
If not, how can I convert one-hot code to integer indices only when I need to print confusion matrix?

You cannot generate confusion matrix using one-hot vectors as input parameters of labels and predictions. You will have to supply it a 1D tensor containing your labels directly.
To convert your one hot vector to normal label, make use of argmax function:
label = tf.argmax(one_hot_tensor, axis = 1)
After that you can print your confusion_matrix like this:
import tensorflow as tf
num_classes = 2
prediction_arr = tf.constant([1, 1, 1, 1, 0, 0, 0, 0, 1, 1])
labels_arr = tf.constant([0, 1, 1, 1, 1, 1, 1, 1, 0, 0])
confusion_matrix = tf.confusion_matrix(labels_arr, prediction_arr, num_classes)
with tf.Session() as sess:
print(confusion_matrix.eval())
Output:
[[0 3]
[4 3]]

Related

InvalidArgumentError: Specified a list with shape [1,1] from a tensor with shape [32,1] in tensorflow v2.4 but working well in tensorflow v1.14

I am trying to do a timeseries forecasting and the training is going smoothly but passing the same dataset to predict function I'm getting the following error.
InvalidArgumentError: Specified a list with shape [1,1] from a tensor with shape [32,1]
[[node sequential/lstm/TensorArrayUnstack/TensorListFromTensor ]] [Op:__inference_predict_function_55827]
Function call stack:
predict_function
I'm using a Statefull Lstm and same code and model works fine in tensorflow v1.14 but not in tensorflow v2.4.
my X_train.shape,y_train.shape is ((6191, 10, 1), (6191, 1)),
X_test.shape=(6191, 10, 1) and batch_size=1
model = Sequential()
model.add(LSTM(10,batch_input_shape=(batch_size, int(i_shape[0]), int(i_shape[1])),
activation=activation,stateful=True,
kernel_regularizer=L1L2(0.01,0.001)))
Model: "sequential_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_11 (LSTM) (1, 10) 480
_________________________________________________________________
dense_4 (Dense) (1, 1) 11
=================================================================
Total params: 491
Trainable params: 491
Non-trainable params: 0
_________________________________________________________________
None
Let me know if any additional information is required.
I encountered the same error. In my case, I used a Bidirectional wrapper around LSTM.
I resolved the problem by predicting one time step at a time.
Create a function that splits the data into X and Y. (I think you already have one)
import numpy as np
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the sequence
if end_ix > len(sequence)-1:
break
# gather input and output parts of the pattern
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return np.array(X), np.array(y)
Create a function that will loop through X and predict.
def get_predictions(model, np_input, look_back):
X = list()
for i in range(len(np_input)):
#print(np_input[i])
n_features = 1
testX = np_input[i].reshape((1, look_back, n_features))
#this returns one prediction that has 2 dimensions so we need to flatten
testPredict = model.predict(testX)
X.append(testPredict.flatten())
return np.array(X)
Test split_sequence:
import numpy as np
from keras.models import load_model
raw_seq = np.array([0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1])
n_steps = 24
X, y = split_sequence(raw_seq, n_steps)
for i in range(len(X)):
print(X[i], y[i])
n_features = 1
testX = X.reshape((X.shape[0], X.shape[1], n_features))
model = load_model("<your_model_file>")
testPredict = model.predict(testX)
print("===Prediction===")
print(testPredict)
As in this example, I have 24 timesteps (the model, of course, was created with 24 time steps), and predict the 25th element. So, the sample input (raw_seq) has a total of 25 elements.
You'll notice that if you add one element to raw_seq, the error will come back. This means that model.predict can only do one prediction at a time.
Test split_sequence and get_predictions:
raw_seq = np.array([0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2
])
look_back = 24
X, y = split_sequence(raw_seq, look_back)
preds = get_predictions(model, X, look_back)
print(preds)
Running the code will give 26 predictions
preds.shape = (26, 1)
As expected, predictions on a whole dataset will take a long time time.
the reason for this error is that you are choosing batch_size not split over the number of TrainSets
the batch size should be chosen in a way so that the number of samples is divisible by the batch size. See also here:
this problem solve here

Tensorflow tf.metrics.accuracy multi-label always zero

My label looks like this:
label = [0, 1, 0, 0, 1, 1, 0]
In other words, classes 1, 4, 5 are present at the corresponding sample. I believe this is called a soft class.
I'm calculating my loss with:
logits = tf.layers.dense(encoding, 7, activation=None)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
labels=labels,
logits=logits
)
loss = tf.reduce_mean(cross_entropy)
According to Tensorboard, the loss is decreasing over time, as expected. However, the accuracy is flat at zero:
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(labels=labels, predictions=logits),
}
tf.summary.scalar('accuracy', eval_metric_ops['accuracy'][1])
How do I calculate the accuracy of my model when using soft classes?
Did you solve this? I think the comment about softmax_cross_entropy_with_logits is incorrect because you have a multi-label, (each label is a) binary-class problem.
Partial solution:
labels = tf.constant([1, 1, 1, 0, 0, 0]) # example
predicitons = tf.constant([0, 1, 0, 0, 1, 0]) # example
is_equal = tf.equal(label, predicitons)
accuracy = tf.reduce_mean(tf.cast(is_equal, tf.float32))
This gives a number but still need to convert it into a tf metric.

use variational_recurrent in tf.contrib.rnn.DropoutWrapper

In the api of tf.contrib.rnn.DropoutWrapper, I am trying to set variational_recurrent=True, in which case, input_size is mandatory. As explained, input_size is TensorShape objects containing the depth(s) of the input tensors.
depth(s) is confusing, what is it please? Is it just the shape of the tensor as we can get by tf.shape()? Or the number of channels for the special case of images? But my input tensor is not an image.
And I don't understand why dtype is demanded when variational_recurrent=True.
Thanks!
Inpput_size for tf.TensorShape([200, None, 300]) is just 300
Play with this example.
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see TF issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import tensorflow as tf
import numpy as np
n_steps = 2
n_inputs = 3
n_neurons = 5
keep_prob = 0.5
learning_rate = 0.001
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
basic_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)
basic_cell_drop = tf.contrib.rnn.DropoutWrapper(
basic_cell,
input_keep_prob=keep_prob,
variational_recurrent=True,
dtype=tf.float32,
input_size=n_inputs)
output_seqs, states = tf.contrib.rnn.static_rnn(
basic_cell_drop,
X_seqs,
dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])
init = tf.global_variables_initializer()
X_batch = np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
])
with tf.Session() as sess:
init.run()
outputs_val = outputs.eval(feed_dict={X: X_batch})
print(outputs_val)
See this for more details: https://github.com/tensorflow/tensorflow/issues/7927

simple Recurrent Neural Net from scratch using tensorflow

I've build a simple recurrent neural net with one hidden layer with 4 nodes in it. This is my code:
import tensorflow as tf
# hyper parameters
learning_rate = 0.0001
number_of_epochs = 10000
# Computation Graph
W1 = tf.Variable([[1.0, 1.0, 1.0, 1.0]], dtype=tf.float32, name = 'W1')
W2 = tf.Variable([[1.0], [1.0], [1.0], [1.0]], dtype=tf.float32, name = 'W2')
WR = tf.Variable([[1.0, 1.0, 1.0, 1.0]], dtype=tf.float32, name = 'WR')
# b = tf.Variable([[0], [0], [0], [0]], dtype=tf.float32)
prev_val = [[0.0]]
X = tf.placeholder(tf.float32, [None, None], name = 'X')
labels = tf.placeholder(tf.float32, [None, 1], name = 'labels')
sess = tf.Session()
sess.run(tf.initialize_all_variables())
z = tf.matmul(X, W1) + tf.matmul(prev_val, WR)# - b
prev_val = z
predict = tf.matmul(z, W2)
error = tf.reduce_mean((labels - predict)**2)
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
time_series = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
lbsx = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
for i in range(number_of_epochs):
for j in range(len(time_series)):
curr_X = time_series[j]
lbs = lbsx[j]
sess.run(train, feed_dict={X: [[curr_X]], labels: [[lbs]]})
print(sess.run(predict, feed_dict={X: [[0]]}))
print(sess.run(predict, feed_dict={X: [[1]]}))
I'm getting output:
[[ 0.]]
[[ 3.12420416e-05]]
With input 1, it should output 0 and vice versa. I'm also confused regarding the 'previous value'. Should it be a placeholder? I'd really appreciate your efforts to fix the code.

Out of Memory error at model.compile

I have a relatively large multi-layer regression model that I want to train end-to-end. My training is a two-step procedure in which I first minimize the euclidean loss and then I minimize my loss. Effectively, this means the following pseudo-code:
model.compile(optimizer='Adam', loss='mse')
model.fit()
model.compile(optimizer='Adam', loss=my_metric)
model.fit()
I am able to run the first two statements without any problems. But I get an out of memory error when my code reaches the second model.compile statement. What should I do differently to avoid this problem ?
Edited to include my_metric. Think of y_true and y_pred as 3-dim vectors. First I'm minimizing the euclidean distance between them to initialize the weights and then I minimize a geodesic loss between them.
# compute geodesic viewpoint loss
def my_metric(y_true, y_pred):
# compute angles
angle_true = K.sqrt(K.sum(K.square(y_true), axis=1))
angle_pred = K.sqrt(K.sum(K.square(y_pred), axis=1))
# compute axes
axis_true = K.l2_normalize(y_true, axis=1)
axis_pred = K.l2_normalize(y_pred, axis=1)
# convert axes to corresponding skew-symmetric matrices
proj = tf.constant(np.asarray([[0, -1, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, -1, 0, 0], [0, 0, 0, 0, 0, -1, 0, 1, 0]]), dtype=tf.float32)
skew_true = K.dot(axis_true, proj)
skew_pred = K.dot(axis_pred, proj)
skew_true = K.map_fn(lambda x: K.reshape(x, [3, 3]), skew_true)
skew_pred = K.map_fn(lambda x: K.reshape(x, [3, 3]), skew_pred)
# compute rotation matrices and do a dot product
R = tf.map_fn(my_R, (skew_true, skew_pred, angle_true, angle_pred), dtype=tf.float32)
# compute the angle error
theta = K.map_fn(get_theta, R)
return K.mean(theta)
# function to compute R1^T R2 given the axis angle representations (\theta_1, v_1) and (\theta_2, v_2)
# x is a list that contains x[0] = v_1, x[1] = v_2, x[2] = \theta_1, x[3] = \theta_2
# note that the v_1 and v_2 are skew-symmetric matrices corresponding to the 3-dim vectors in this function
def my_R(x):
R1 = K.eye(3) + K.sin(x[2]) * x[0] + (1.0 - K.cos(x[2])) * K.dot(x[0], x[0])
R2 = K.eye(3) + K.sin(x[3]) * x[1] + (1.0 - K.cos(x[3])) * K.dot(x[1], x[1])
return K.dot(K.transpose(R1), R2)
# Rodrigues' formula
def get_theta(x):
return K.abs(tf.acos(K.clip(0.5*(tf.reduce_sum(tf.diag_part(x))-1.0), -1.0+1e-7, 1.0-1e-7)))