Return all possible prediction values - tensorflow

This neural network trains on inputs [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] with labelled outputs : [[0.0], [1.0], [1.0], [0.0]]
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
sess.run(init)
# a batch of inputs of 2 value each
inputs = tf.placeholder(tf.float32, shape=[None, 2])
# a batch of output of 1 value each
desired_outputs = tf.placeholder(tf.float32, shape=[None, 1])
# [!] define the number of hidden units in the first layer
HIDDEN_UNITS = 4
weights_1 = tf.Variable(tf.truncated_normal([2, HIDDEN_UNITS]))
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
# connect 2 inputs to every hidden unit. Add bias
layer_1_outputs = tf.nn.sigmoid(tf.matmul(inputs, weights_1) + biases_1)
print layer_1_outputs
NUMBER_OUTPUT_NEURONS = 1
biases_2 = tf.Variable(tf.zeros([NUMBER_OUTPUT_NEURONS]))
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, NUMBER_OUTPUT_NEURONS]))
finalLayerOutputs = tf.nn.sigmoid(tf.matmul(layer_1_outputs, weights_2) + biases_2)
tf.global_variables_initializer().run()
logits = tf.nn.sigmoid(tf.matmul(layer_1_outputs, weights_2) + biases_2)
training_inputs = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
training_outputs = [[0.0], [1.0], [1.0], [0.0]]
error_function = 0.5 * tf.reduce_sum(tf.sub(logits, desired_outputs) * tf.sub(logits, desired_outputs))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(error_function)
for i in range(15):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
print(sess.run(logits, feed_dict={inputs: np.array([[0.0, 1.0]])}))
Upon training this network returns [[ 0.61094815]] for values [[0.0, 1.0]]
[[ 0.61094815]] is value with highest probability after training this network is assign to input value [[0.0, 1.0]] ? Can the lower probability values also be accessed and not just most probable ?
If I increase number of training epochs I'll get better prediction but in this case I just want to access all potential values with their probabilities for a given input.
Update :
Have updated code to use multi class classification with softmax. But the prediction for [[0.0, 1.0, 0.0, 0.0]] is [array([0])]. Have I updated correctly ?
import numpy as np
import tensorflow as tf
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
# a batch of inputs of 2 value each
inputs = tf.placeholder(tf.float32, shape=[None, 4])
# a batch of output of 1 value each
desired_outputs = tf.placeholder(tf.float32, shape=[None, 3])
# [!] define the number of hidden units in the first layer
HIDDEN_UNITS = 4
weights_1 = tf.Variable(tf.truncated_normal([4, HIDDEN_UNITS]))
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
# connect 2 inputs to every hidden unit. Add bias
layer_1_outputs = tf.nn.softmax(tf.matmul(inputs, weights_1) + biases_1)
biases_2 = tf.Variable(tf.zeros([3]))
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, 3]))
finalLayerOutputs = tf.nn.softmax(tf.matmul(layer_1_outputs, weights_2) + biases_2)
tf.global_variables_initializer().run()
logits = tf.nn.softmax(tf.matmul(layer_1_outputs, weights_2) + biases_2)
training_inputs = [[0.0, 0.0 , 0.0, 0.0], [0.0, 1.0 , 0.0, 0.0], [1.0, 0.0 , 0.0, 0.0], [1.0, 1.0 , 0.0, 0.0]]
training_outputs = [[0.0,0.0,0.0], [1.0,0.0,0.0], [1.0,0.0,0.0], [0.0,0.0,1.0]]
error_function = 0.5 * tf.reduce_sum(tf.sub(logits, desired_outputs) * tf.sub(logits, desired_outputs))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(error_function)
for i in range(15):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
prediction=tf.argmax(logits,1)
best = sess.run([prediction],feed_dict={inputs: np.array([[0.0, 1.0, 0.0, 0.0]])})
print(best)
Which prints [array([0])]
Update 2 :
Replacing
prediction=tf.argmax(logits,1)
best = sess.run([prediction],feed_dict={inputs: np.array([[0.0, 1.0, 0.0, 0.0]])})
print(best)
With :
prediction=tf.nn.softmax(logits)
best = sess.run([prediction],feed_dict={inputs: np.array([[0.0, 1.0, 0.0, 0.0]])})
print(best)
Appears to fix issue.
So now full source is :
import numpy as np
import tensorflow as tf
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
# a batch of inputs of 2 value each
inputs = tf.placeholder(tf.float32, shape=[None, 4])
# a batch of output of 1 value each
desired_outputs = tf.placeholder(tf.float32, shape=[None, 3])
# [!] define the number of hidden units in the first layer
HIDDEN_UNITS = 4
weights_1 = tf.Variable(tf.truncated_normal([4, HIDDEN_UNITS]))
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
# connect 2 inputs to every hidden unit. Add bias
layer_1_outputs = tf.nn.softmax(tf.matmul(inputs, weights_1) + biases_1)
biases_2 = tf.Variable(tf.zeros([3]))
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, 3]))
finalLayerOutputs = tf.nn.softmax(tf.matmul(layer_1_outputs, weights_2) + biases_2)
tf.global_variables_initializer().run()
logits = tf.nn.softmax(tf.matmul(layer_1_outputs, weights_2) + biases_2)
training_inputs = [[0.0, 0.0 , 0.0, 0.0], [0.0, 1.0 , 0.0, 0.0], [1.0, 0.0 , 0.0, 0.0], [1.0, 1.0 , 0.0, 0.0]]
training_outputs = [[0.0,0.0,0.0], [1.0,0.0,0.0], [1.0,0.0,0.0], [0.0,0.0,1.0]]
error_function = 0.5 * tf.reduce_sum(tf.sub(logits, desired_outputs) * tf.sub(logits, desired_outputs))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(error_function)
for i in range(1500):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
prediction=tf.nn.softmax(logits)
best = sess.run([prediction],feed_dict={inputs: np.array([[0.0, 1.0, 0.0, 0.0]])})
print(best)
Which prints
[array([[ 0.49810624, 0.24845563, 0.25343812]], dtype=float32)]

Your current network does (logistic) regression, not really classification: given an input x, it tries to evaluate f(x) (where f(x) = x1 XOR x2 here, but the network does not know that before training), which is regression. To do so, it learns a function f1(x) and tries to have it be as close to f(x) on all your training samples. [[ 0.61094815]] is just the value of f1([[0.0, 1.0]]). In this setting, there is no such thing as "probability to be in a class", since there is no class. There is only the user (you) chosing to interpret f1(x) as a probability for the output to be 1. Since you have only 2 classes, that tells you that the probability of the other class is 1-0.61094815 (that is, you're doing classification with the output of the network, but it is not really trained to do that in itself). This method used as classification is, in a way, a (widely used) trick to perform classification, but only works if you have 2 classes.
A real network for classification would be built a bit differently: your logits would be of shape (batch_size, number_of_classes) - so (1, 2) in your case-, you apply a sofmax on them, and then the prediction is argmax(softmax), with probability max(softmax). Then you can also get the probability of each output, according to the network: probability(class i) = softmax[i]. Here the network is really trained to learn the probability of x being in each class.
I'm sorry if my explanation is obscure or if the difference between regression between 0 and 1 and classification seems philosophical in a setting with 2 classes, but if you add more classes you'll probably see what I mean.
EDIT
Answer to your 2 updates.
in your training samples, the labels (training_outputs) must be probability distributions, i.e. they must have sum 1 for each sample (99% of the time they are of the form (1, 0, 0), (0, 1, 0) or (0, 0, 1)), so your first output [0.0,0.0,0.0] is not valid. If you want to learn XOR on the two first inputs, then the 1st output should be the same as the last: [0.0,0.0,1.0].
prediction=tf.argmax(logits,1) = [array([0])] is completely normal: loginscontains your probabilities, and prediction is the prediction, which is the class with the biggest probability, which is in your case class 0: in your training set, [0.0, 1.0, 0.0, 0.0] is associated with output [1.0, 0.0, 0.0], i.e. it is of class 0 with probability 1, and of the other classes with probability 0. After enough training, print(best) with prediction=tf.argmax(logits,1) on input [1.0, 1.0 , 0.0, 0.0] should give you [array([2])], 2 being the index of the class for this input in your training set.

Related

Google Cloud ml-engine fails predicting multiple inputs

Predictions only successful when providing a single instance instance.json.
Test 1: Contents of instance.json:
{"serving_input": [20.0, 0.0, 1.0 ... 0.16474569041197143, 0.04138248072194471], "prediction_id": 0, "keep_prob": 1.0}
Prediction (same output for local and online prediction)
gcloud ml-engine local predict --model-dir=./model_dir --json-instances=instances.json
Output:
SERVING_OUTPUT ARGMAX PREDICTION_ID SCORES TOP_K
[-340.6920166015625, -1153.0877685546875] 0 0 [1.0, 0.0] [1.0, 0.0]
Test 2: Contents of instance.json:
{"serving_input": [20.0, 0.0, 1.0 ... 0.16474569041197143, 0.04138248072194471], "prediction_id": 0, "keep_prob": 1.0}
{"serving_input": [21.0, 2.0, 3.0 ... 3.14159265359, 0.04138248072194471], "prediction_id": 1, "keep_prob": 1.0}
Output:
.. Incompatible shapes: [2] vs. [2,108] .. (_arg_keep_prob_0_1, Model/dropout/random_uniform)
Where as 108 is the size of the first hidden layer(net_dim=[2015,108,2]). (Initialized with tf.nn.dropout, thus the keep_prob=1.0)
Exporting code:
probabilities = tf.nn.softmax(self.out_layer)
top_k, _ = tf.nn.top_k(probabilities, self.network_dim[-1])
prediction_signature = (
tf.saved_model.signature_def_utils.predict_signature_def(
inputs={'serving_input': self.x, 'keep_prob': self.keep_prob,
'prediction_id': self.prediction_id_in},
outputs={'serving_output': self.out_layer, 'argmax': tf.argmax(self.out_layer, 1),
'prediction_id': self.prediction_id_out, 'scores': probabilities, 'top_k': top_k}))
builder.add_meta_graph_and_variables(
sess,
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
prediction_signature
},
main_op=tf.saved_model.main_op.main_op())
builder.save()
How can i format the instance.json to perform a batched prediction? (Prediction with multiple input instances)
The problem is not in the JSON. Check to see how you are using self.x
I think that your code is assuming that it's a 1D array, when you should treat it as a tensor of shape [?, 108]

Basic Tensorflow: Define Tensor variable using existing variables

I have some very simple tensorflow code to rotate a vector:
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=(2, 1))
angle = tf.placeholder(tf.float32)
s_a = tf.sin(angle)
c_a = tf.cos(angle)
R = tf.Variable([[c_a, s_a], [-s_a, c_a]], tf.float32, expected_shape=(2,2))
#R = tf.Variable([[1.0, 0.0], [0.0, 1.0]], tf.float32)
rotated_v = tf.matmul(R,x)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
res = sess.run([init,rotated_v], feed_dict={x:np.array([[1.0],[1.0]]), angle:1.0})
print(res)
The code works fine when I hand-code the identity matrix. However, in its current form I get this error:
ValueError: initial_value must have a shape specified: Tensor("Variable/initial_value:0", dtype=float32)
I've tried specifying the shape in multiple ways, but I can't make this work.
What am I doing wrong?
I have figured out a way to achieve this (might not be the best way, but it works).
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=(2, 1))
angle = tf.placeholder(tf.float32)
s_a = tf.sin(angle)
c_a = tf.cos(angle)
R = tf.Variable([[1.0, 0.0], [0.0, 1.0]], tf.float32)
assignR = tf.assign(R, [[c_a, s_a], [-s_a, c_a]])
rotated_v = tf.matmul(R,x)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
newR = sess.run(assignR, feed_dict={angle:1.0})
print(newR)
print()
res = sess.run([rotated_v], feed_dict={x:np.array([[1.0],[1.0]])})
print(res)
This approach won't work, because s_a and c_a are the ops outputs, which values are uniquely determined by angle. You can't assign or update these nodes, so training them doesn't make any sense.
This line, on the other hand...
R = tf.Variable([[1.0, 0.0], [0.0, 1.0]], tf.float32)
... is a definition of an independent variable with initial value equal to identity matrix. This is perfectly valid. Since this variable is independent, you can assign a new value to it, which consists of s_a and c_a. Note that you can't initialize it with s_a and c_a, because the initializer is run before the values are fed into a session (so angle is unknown).

SparseTensor * Vector

How can the following be achieved in tensorflow, when A is a tf.SparseTensor and b is a tf.Variable?
A = np.arange(5**2).reshape((5,5))
b = np.array([1.0, 2.0, 0.0, 0.0, 1.0])
C = A * b
If I try the same notation, I get InvalidArgumentError: Provided indices are out-of-bounds w.r.t. dense side with broadcasted shape.
* works for SparseTensor as well, your problem seems to be with related to the SparseTensor itself, you might have provided the indices that are out of the range of the shape you give to it, consider this example:
A_t = tf.SparseTensor(indices=[[0,6],[4,4]], values=[3.2,5.1], dense_shape=(5,5))
Notice the column index 6 is larger than the shape specified which should have maximum 5 columns, and this gives the same error as you've shown:
b = np.array([1.0, 2.0, 0.0, 0.0, 1.0])
B_t = tf.Variable(b, dtype=tf.float32)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(A_t * B_t))
InvalidArgumentError (see above for traceback): Provided indices are
out-of-bounds w.r.t. dense side with broadcasted shape
Here is a working example:
A_t = tf.SparseTensor(indices=[[0,3],[4,4]], values=[3.2,5.1], dense_shape=(5,5))
b = np.array([1.0, 2.0, 0.0, 0.0, 1.0])
B_t = tf.Variable(b, dtype=tf.float32)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(A_t * B_t))
# SparseTensorValue(indices=array([[0, 3],
# [4, 4]], dtype=int64), values=array([ 0. , 5.0999999], dtype=float32), dense_shape=array([5, 5], dtype=int64))

Give me a code example usage for tensorflow's tf.metrics.sparse_average_precision_at_k

Can you give me a code example that uses tf.metrics.sparse_average_precision_at_k? I cannot find anything on the Internet... :(
If I have a multi-labeled dataset like this one (each example may have more than one target label):
(total number of classes = 5)
y1 = [class_0, class_1]
y2 = [class_1, class_2]
y3 = [class_0]
and my system predicts:
prediction for y1 -> [0.1, 0.3, 0.2, 0.0, 0.0]
prediction for y2 -> [0.0, 0.3, 0.7, 0.4, 0.4]
prediction for y3 -> [0.1, 0.3, 0.2, 0.3, 0.5]
How can I compute for k=3, for example?
P.S.: Feel free to suggest your own example, if you can't comprehend this one.
EDIT: My code so far:
I really don't get it... Pls advise for a single prediction (y1 only) as well as for several predictions at once (with different number of true classes in each).
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
tf.local_variables_initializer().run()
y1 = tf.constant( np.array([0, 1]) )
y2 = tf.constant( np.array([1, 2]) )
y3 = tf.constant( np.array([0]) )
p1 = tf.constant( np.array([0.1, 0.3, 0.2, 0.0, 0.0]) )
p2 = tf.constant( np.array([0.0, 0.3, 0.7, 0.4, 0.4]) )
p3 = tf.constant( np.array([0.1, 0.3, 0.2, 0.3, 0.5]) )
metric, update = tf.metrics.sparse_average_precision_at_k(tf.cast(y1, tf.int64), p1, 3)
print(sess.run(update))
The tf.metrics.sparse_average_precision_at_k will be replaced by tf.metrics.average_precision_at_k. And by browsing the code in tensorflow, you will find that when your inputs are y_true and y_pred, this function will actually transform the y_pred to y_pred_idx, by using top_k function.
y_true is a tensor of shape (batch_size, num_labels), and y_pred is of shape (batch_size, num_classes)
You can also see some discussion in this issue, and this example comes from this issue.
import tensorflow as tf
import numpy as np
y_true = np.array([[2], [1], [0], [3], [0]]).astype(np.int64)
y_true = tf.identity(y_true)
y_pred = np.array([[0.1, 0.2, 0.6, 0.1],
[0.8, 0.05, 0.1, 0.05],
[0.3, 0.4, 0.1, 0.2],
[0.6, 0.25, 0.1, 0.05],
[0.1, 0.2, 0.6, 0.1]
]).astype(np.float32)
y_pred = tf.identity(y_pred)
_, m_ap = tf.metrics.sparse_average_precision_at_k(y_true, y_pred, 3)
sess = tf.Session()
sess.run(tf.local_variables_initializer())
stream_vars = [i for i in tf.local_variables()]
tf_map = sess.run(m_ap)
print(tf_map)
print((sess.run(stream_vars)))
tmp_rank = tf.nn.top_k(y_pred,3)
print(sess.run(tmp_rank))
This line stream_vars = [i for i in tf.local_variables()] helps you see the two local_variables which is created in this tf.metrics.sparse_average_precision_at_k function.
This line tmp_rank = tf.nn.top_k(y_pred,3) in order to helps you understand by changing the value of k ,the prediction index which is used in tf.metrics.sparse_average_precision_at_k .
You can change the value of k to see the different result, and the tmp_rank represents the index which is used in calculating the average precision.
For example:
when k=1, only the first batch match the label, so the average precision at 1 result will be 1/6 = 0.16666666.
When k=2, the third batch will also match the label, so the average precision at 2 result will be (1+(1/2))/6=0.25.
metric, update = tf.metrics.sparse_average_precision_at_k(tf.stack(y1, y2, y3), tf.stack(p1, p2, p3), 3)
print session.run(update)

How to make a histogram of tensor columns in tensorflow

I have a batch of images as a tensor of size [batch_size, w, h].
I wish to get a histogram of the values in each column.
This is what I came up with (but it works only for the first image in the batch and its also very slow):
global_hist = []
net = tf.squeeze(net)
for i in range(batch_size):
for j in range(1024):
hist = tf.histogram_fixed_width(tf.slice(net,[i,0,j],[1,1024,1]), [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], nbins=10)
global_hist[i].append(hist)
Is there an efficient way to do this?
ok so I found a solution (though its rather slow and does not allow to fix the bins edges), but someone may find this usefull.
nbins=10
net = tf.squeeze(net)
for i in range(batch_size):
local_hist = tf.expand_dims(tf.histogram_fixed_width(tf.slice(net,[i,0,0],[1,1024,1]), [0.0, 1.0], nbins=nbins, dtype=tf.float32),[-1])
for j in range(1,1024):
hist = tf.histogram_fixed_width(tf.slice(net,[i,0,j],[1,1024,1]), [0.0, 1.0], nbins=nbins, dtype=tf.float32)
hist = tf.expand_dims(hist,[-1])
local_hist = tf.concat(1, [local_hist, hist])
if i==0:
global_hist = tf.expand_dims(local_hist, [0])
else:
global_hist = tf.concat(0, [global_hist, tf.expand_dims(local_hist,[0])])
In addition, I found this link to be very usefull
https://stackoverflow.com/questions/41764199/row-wise-histogram/41768777#41768777