How to use scatter_update update a tensorflow diagonal weight matrix - tensorflow

I tried to update a tensorflow diagonal weight matrix using scatter_update but with no luck so far. It either prompted shape mismatch or only updated along first row. It is very bizarre API behaviour. Could anyone help me out? Thanks
Example:
dia_mx = tf.Variable(initial_value=np.array([[1.,0.,0.],
[0.,1.,0.],
[0.,0.,1.]]))
new_diagonal_values = np.array([2., 3., 4.])
tf.scatter_update(dia_mx, [[0,0],[1,1],[2,2]], new_diagonal_values)
Get error:
InvalidArgumentError: shape of indices ([3,2]) is not compatible with the shape of updates ([3]) [Op:ResourceScatterUpdate]
Expect new diagonal matrix:
dia_mx = [[2.,0.,0.],
[0.,3.,0.],
[0.,0.,4.]]

To update specific indices in a tensor use tf.scatter_nd_update():
import tensorflow as tf
import numpy as np
dia_mx = tf.Variable(initial_value=np.array([[1.,0.,0.],
[0.,1.,0.],
[0.,0.,1.]]))
updates = [tf.constant(2.), tf.constant(3.), tf.constant(4.)]
indices = tf.constant([[0, 0], [1, 1], [2, 2]])
update_tensor = tf.scatter_nd_update(dia_mx, indices, updates)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(update_tensor.eval())
# [[2. 0. 0.]
# [0. 3. 0.]
# [0. 0. 4.]]
tf.scatter_update() applies updates along the first dimension of a tensor. In this particular case it means that updates are applied for a whole row of a matrix at once:
dia_mx = tf.Variable(initial_value=np.array([[1.,0.,0.],
[0.,1.,0.],
[0.,0.,1.]]), dtype=tf.float32)
updates = tf.constant([[2., 0., 0.], [0., 3., 0.], [0., 0., 4.]], dtype=tf.float32)
indices = tf.constant([0, 1, 2])
update_tensor = tf.scatter_update(dia_mx, indices, updates)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(update_tensor.eval())
# [[2. 0. 0.]
# [0. 3. 0.]
# [0. 0. 4.]]

Related

What's the difference between dim in Pytorch and axis in Tensorflow?

I have two lines and I want to understand whether they will produce the same output or not?
In tensorflow: tf.norm(my_tensor, ord=2, axis=1)
In pytorch: torch.norm(my_tensor, p=2, dim=1)
Say the shape of my_tensor is [100,2]
Will the above two lines give the same result? Or is the axis attribute different from dim?
Yes, they are the same!
import tensorflow as tf
tensor = [[1., 2.], [4., 5.], [3., 6.], [7., 8.], [5., 2.]]
tensor = tf.convert_to_tensor(tensor, dtype=tf.float32)
t_norm = tf.norm(tensor, ord=2, axis=1)
print(t_norm)
Output
tf.Tensor([ 2.236068 6.4031243 6.708204
10.630146 5.3851647], shape=(5,), dtype=float32)
import torch
tensor = [[1., 2.], [4., 5.], [3., 6.], [7., 8.], [5., 2.]]
tensor = torch.tensor(tensor, dtype=torch.float32)
t_norm = torch.norm(tensor, p=2, dim=1)
print(t_norm)
Output
tensor([ 2.2361, 6.4031, 6.7082, 10.6301, 5.3852])

Translate tensor horizontally, back-fill with zeros

Given a 2D tensor
T = [[1, 2, 3]
[4, 5, 6]]
and a 1D tensor containing horizontal shifts, say, s = [0, -2, 1], how can I obtain the following 3D tensor R?
R[0] = T
R[1] = [[3, 0, 0], # shifted two to the left,
[6, 0, 0]] # padding the rest with zeros
R[2] = [[0, 1, 2], # shifted one to the right,
[0, 4, 5]] # padding the rest with zeros
I know about tf.contrib.image.translate, but that isn't differentiable, so I am looking for some elegant combination of padding/slicing/looping/concatenating operations that accomplishes the same thing.
I have only come up with two ways to use tf.map_fn(). The first method is to fill about 0 in T and slice it.
import tensorflow as tf
T = tf.constant([[1, 2, 3],[4, 5, 6]],dtype=tf.float32)
s = tf.constant([0, -2, 1])
left = tf.reduce_max(s)
right = tf.reduce_min(s)
left_mask = tf.zeros(shape=(tf.shape(T)[0],left))
right_mask = tf.zeros(shape=(tf.shape(T)[0],tf.abs(right)))
tmp_slice = tf.concat([left_mask,T,right_mask],axis=-1)
result = tf.map_fn(lambda x: tmp_slice[:,left-x:left-x+tf.shape(T)[1]],s,dtype=T.dtype)
grads = tf.gradients(ys=result,xs=T)
with tf.Session() as sess:
print(sess.run(result))
print(sess.run(grads))
# print
[[[1. 2. 3.]
[4. 5. 6.]]
[[3. 0. 0.]
[6. 0. 0.]]
[[0. 1. 2.]
[0. 4. 5.]]]
[array([[2., 2., 2.],
[2., 2., 2.]], dtype=float32)]
The second method is to compute a corresponding mask matrix by tf.sequence_mask and tf.roll().Then take the value by tf.where().
import tensorflow as tf
T = tf.constant([[1, 2, 3],[4, 5, 6]],dtype=tf.float32)
s = tf.constant([0, -2, 1])
def mask_f(x):
indices = tf.tile([x], (tf.shape(T)[0],))
mask = tf.sequence_mask(tf.shape(T)[1]-tf.abs(indices),tf.shape(T)[1])
mask = tf.roll(mask,shift=tf.maximum(0,x),axis=-1)
return tf.where(mask,tf.roll(T,shift=x,axis=-1),tf.zeros_like(T))
result = tf.map_fn(lambda x:mask_f(x),s,dtype=T.dtype)
grads = tf.gradients(ys=result,xs=T)
with tf.Session() as sess:
print(sess.run(result))
print(sess.run(grads))
# print
[[[1. 2. 3.]
[4. 5. 6.]]
[[3. 0. 0.]
[6. 0. 0.]]
[[0. 1. 2.]
[0. 4. 5.]]]
[array([[2., 2., 2.],
[2., 2., 2.]], dtype=float32)]
Update
I found new method to achieve it. In essence, horizontal shifts are T multiplied by an offset identity matrix. So we can use np.eye() to create factor.
import tensorflow as tf
import numpy as np
T = tf.constant([[1, 2, 3],[4, 5, 6]],dtype=tf.float32)
s = tf.constant([0, -2, 1])
new_T = tf.tile(tf.expand_dims(T,axis=0),[tf.shape(s)[0],1,1])
s_factor = tf.map_fn(lambda x: tf.py_func(lambda y: np.eye(T.get_shape().as_list()[-1],k=y),[x],tf.float64),s,tf.float64)
result = tf.matmul(new_T,tf.cast(s_factor,new_T.dtype))
grads = tf.gradients(ys=result,xs=T)
with tf.Session() as sess:
print(sess.run(result))
print(sess.run(grads))
# print
[[[1. 2. 3.]
[4. 5. 6.]]
[[3. 0. 0.]
[6. 0. 0.]]
[[0. 1. 2.]
[0. 4. 5.]]]
[array([[2., 2., 2.],
[2., 2., 2.]], dtype=float32)]

How to train model with two kids functions for object detection?

I'm trying to implement the model described by Professor Andrew Ng for object detection (explanation starts at 10:00).
He describes the first element of the output vector as the probability that an object was detected, followed by the coordinates of the bounding box of the object matched (when one is matched). The last part of the output vector is a softmax of all the classes your model knows.
As he explains it, using a simple squared error for the case when there is a detection is fine, and just the squares difference of y^[0] - y[0]. I get that this is a naive approach. I'm just wanting to implement this for the learning experience.
My questions
How do I implement this conditional loss in tensorflow?
How do I handle this conditional about y^[0] when dealing with a batch.
How do I implement this conditional loss in tensorflow?
You can convert the loss function to:
Error = mask[0]*(y^[0]-y[0])**2 + mask[1]*(y^[1]-y[1])**2 ... mask[n]*(y^[n]-y[n])**2),
where mask = [1, 1,...1] for y[0] = 1 and [1, 0, ...0] for y[0] = 0
How do I handle this conditional about y^[0] when dealing with a
batch.
For a batch, you can construct the mask on the fly like:
mask = tf.concat([tf.ones((tf.shape(y)[0],1)),y[:,0][...,None]*y[:,1:]], axis=1)
Code:
y_hat_n = np.array([[3, 3, 3, 3], [3,3,3,3]])
y_1 = np.array([[1, 1, 1, 1], [1,1,1,1]])
y_0 = np.array([[0, 1, 1, 1], [0,1,1,1]])
y = tf.placeholder(tf.float32,[None, 4])
y_hat = tf.placeholder(tf.float32,[None, 4])
mask = tf.concat([tf.ones((tf.shape(y)[0],1)),y[:,0][...,None]*y[:,1:]], axis=1)
error = tf.losses.mean_squared_error(mask*y, mask*y_hat)
with tf.Session() as sess:
print(sess.run([mask,error], {y:y_0, y_hat:y_hat_n}))
print(sess.run([mask,error], {y:y_1, y_hat:y_hat_n}))
# Mask and error
#[array([[1., 0., 0., 0.],
# [1., 0., 0., 0.]], dtype=float32), 2.25]
#[array([[1., 1., 1., 1.],
# [1., 1., 1., 1.]], dtype=float32), 4.0]

How do I create a binary (0 or 1 valued) tensor according to known index in tensorflow?

input: length(placeholder), index(1D tensor)
output: 0-1 1D tensor
example: length 5, index [0,1,3], output tensor should be [1,1,0,1,0]
I have tried scatter_add, which requires Variable which requires known shape, and embedding_lookup from matrix with [length, length], which is not effective when length is large.
Any ideas?
Try tf.sparse_to_dense:
output_size = tf.placeholder(tf.int32, [1])
index = tf.constant([0, 1, 3])
ones = tf.ones([tf.size(index)])
result = tf.sparse_to_dense(index, output_size, ones)
with tf.Session() as sess:
sess.run(result, feed_dict={output_size: [5]})
Outputs: array([ 1., 1., 0., 1., 0.], dtype=float32)

how does tensorflow indexing work

I'm having trouble understanding a basic concept with tensorflow. How does indexing work for tensor read/write operations? In order to make this specific, how can the following numpy examples be translated to tensorflow (using tensors for the arrays, indices and values being assigned):
x = np.zeros((3, 4))
row_indices = np.array([1, 1, 2])
col_indices = np.array([0, 2, 3])
x[row_indices, col_indices] = 2
x
with output:
array([[ 0., 0., 0., 0.],
[ 2., 0., 2., 0.],
[ 0., 0., 0., 2.]])
... and ...
x[row_indices, col_indices] = np.array([5, 4, 3])
x
with output:
array([[ 0., 0., 0., 0.],
[ 5., 0., 4., 0.],
[ 0., 0., 0., 3.]])
... and finally ...
y = x[row_indices, col_indices]
y
with output:
array([ 5., 4., 3.])
There's github issue #206 to support this nicely, meanwhile you have to resort to verbose work-arounds
The first example can be done with tf.select that combines two same-shaped tensors by selecting each element from one or the other
tf.reset_default_graph()
row_indices = tf.constant([1, 1, 2])
col_indices = tf.constant([0, 2, 3])
x = tf.zeros((3, 4))
sess = tf.InteractiveSession()
# get list of ((row1, col1), (row2, col2), ..)
coords = tf.transpose(tf.pack([row_indices, col_indices]))
# get tensor with 1's at positions (row1, col1),...
binary_mask = tf.sparse_to_dense(coords, x.get_shape(), 1)
# convert 1/0 to True/False
binary_mask = tf.cast(binary_mask, tf.bool)
twos = 2*tf.ones(x.get_shape())
# make new x out of old values or 2, depending on mask
x = tf.select(binary_mask, twos, x)
print x.eval()
gives
[[ 0. 0. 0. 0.]
[ 2. 0. 2. 0.]
[ 0. 0. 0. 2.]]
The second one could be done with scatter_update, except scatter_update only supports on linear indices and works on variables. So you could create a temporary variable and use reshaping like this. (to avoid variables you could use dynamic_stitch, see the end)
# get linear indices
linear_indices = row_indices*x.get_shape()[1]+col_indices
# turn 'x' into 1d variable since "scatter_update" supports linear indexing only
x_flat = tf.Variable(tf.reshape(x, [-1]))
# no automatic promotion, so make updates float32 to match x
updates = tf.constant([5, 4, 3], dtype=tf.float32)
sess.run(tf.initialize_all_variables())
sess.run(tf.scatter_update(x_flat, linear_indices, updates))
# convert back into original shape
x = tf.reshape(x_flat, x.get_shape())
print x.eval()
gives
[[ 0. 0. 0. 0.]
[ 5. 0. 4. 0.]
[ 0. 0. 0. 3.]]
Finally the third example is already supported with gather_nd, you write
print tf.gather_nd(x, coords).eval()
To get
[ 5. 4. 3.]
Edit, May 6
The update x[cols,rows]=newvals can be done without using Variables (which occupy memory between session run calls) by using select with sparse_to_dense that takes vector of sparse values, or relying on dynamic_stitch
sess = tf.InteractiveSession()
x = tf.zeros((3, 4))
row_indices = tf.constant([1, 1, 2])
col_indices = tf.constant([0, 2, 3])
# no automatic promotion, so specify float type
replacement_vals = tf.constant([5, 4, 3], dtype=tf.float32)
# convert to linear indexing in row-major form
linear_indices = row_indices*x.get_shape()[1]+col_indices
x_flat = tf.reshape(x, [-1])
# use dynamic stitch, it merges the array by taking value either
# from array1[index1] or array2[index2], if indices conflict,
# the later one is used
unchanged_indices = tf.range(tf.size(x_flat))
changed_indices = linear_indices
x_flat = tf.dynamic_stitch([unchanged_indices, changed_indices],
[x_flat, replacement_vals])
x = tf.reshape(x_flat, x.get_shape())
print x.eval()