shuffling two tensors in the same order - tensorflow

As above. I tried those to no avail:
tf.random.shuffle( (a,b) )
tf.random.shuffle( zip(a,b) )
I used to concatenate them and do the shuffling, then unconcatenate / unpack. But now I'm in a situation where (a) is 4D rank tensor while (b) is 1D, so, no way to concatenate.
I also tried to give the seed argument to the shuffle method so it reproduces the same shuffling and I use it twice => Failed. Also tried to do the shuffling myself with randomly shuffled range of numbers, but TF is not as flexible as numpy in fancy indexing and stuff ==> failed.
What I'm doing now is, convert everything back to numpy then use shuffle from sklearn then go back to tensors by recasting. It is sheer stupid way. This is supposed to happen inside a graph.

You could just shuffle the indices and then use tf.gather() to extract values corresponding to those shuffled indices:
TF2.x (UPDATE)
import tensorflow as tf
import numpy as np
x = tf.convert_to_tensor(np.arange(5))
y = tf.convert_to_tensor(['a', 'b', 'c', 'd', 'e'])
indices = tf.range(start=0, limit=tf.shape(x)[0], dtype=tf.int32)
shuffled_indices = tf.random.shuffle(indices)
shuffled_x = tf.gather(x, shuffled_indices)
shuffled_y = tf.gather(y, shuffled_indices)
print('before')
print('x', x.numpy())
print('y', y.numpy())
print('after')
print('x', shuffled_x.numpy())
print('y', shuffled_y.numpy())
# before
# x [0 1 2 3 4]
# y [b'a' b'b' b'c' b'd' b'e']
# after
# x [4 0 1 2 3]
# y [b'e' b'a' b'b' b'c' b'd']
TF1.x
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, (None, 1, 1, 1))
y = tf.placeholder(tf.int32, (None))
indices = tf.range(start=0, limit=tf.shape(x)[0], dtype=tf.int32)
shuffled_indices = tf.random.shuffle(indices)
shuffled_x = tf.gather(x, shuffled_indices)
shuffled_y = tf.gather(y, shuffled_indices)
Make sure that you compute shuffled_x, shuffled_y in the same session run. Otherwise they might get different index orderings.
# Testing
x_data = np.concatenate([np.zeros((1, 1, 1, 1)),
np.ones((1, 1, 1, 1)),
2*np.ones((1, 1, 1, 1))]).astype('float32')
y_data = np.arange(4, 7, 1)
print('Before shuffling:')
print('x:')
print(x_data.squeeze())
print('y:')
print(y_data)
with tf.Session() as sess:
x_res, y_res = sess.run([shuffled_x, shuffled_y],
feed_dict={x: x_data, y: y_data})
print('After shuffling:')
print('x:')
print(x_res.squeeze())
print('y:')
print(y_res)
Before shuffling:
x:
[0. 1. 2.]
y:
[4 5 6]
After shuffling:
x:
[1. 2. 0.]
y:
[5 6 4]

Related

Tensorflow randomly sample from each row

Suppose I have a tensor A of shape (m, n), I would like to randomly sample k elements (without replacement) from each row, resulting in a tensor B of shape (m, k). How to do that in tensorflow?
An example would be:
A: [[1,2,3], [4,5,6], [7,8,9], [10,11,12]]
k: 2
B: [[1,3],[5,6],[9,8],[12,10]]
This is a way to do that:
import tensorflow as tf
with tf.Graph().as_default(), tf.Session() as sess:
tf.random.set_random_seed(0)
a = tf.constant([[1,2,3], [4,5,6], [7,8,9], [10,11,12]], tf.int32)
k = tf.constant(2, tf.int32)
# Tranpose, shuffle, slice, undo transpose
aT = tf.transpose(a)
aT_shuff = tf.random.shuffle(aT)
at_shuff_k = aT_shuff[:k]
result = tf.transpose(at_shuff_k)
print(sess.run(result))
# [[ 3 1]
# [ 6 4]
# [ 9 7]
# [12 10]]

Efficient method for finding index of first occurrence of a number in batched data other than for loop

I am doing a task in which i have data in form of frames stored in batches. Dimension of a batch is like (batch_size,400), i want to find index of first occurrence of number 1 in each 400 length frame.
currently i m using for loop over batch size but since data is very larger it is very time consuming
Any other Efficient method using some matrix operation in tensorflow or numpy would
In TensorFlow:
import tensorflow as tf
def index_of_first_tf(batch, value):
eq = tf.equal(batch, value)
has_value = tf.reduce_any(eq, axis=-1)
_, idx = tf.math.top_k(tf.cast(eq, tf.int8))
idx = tf.squeeze(idx, -1)
return tf.where(has_value, idx, -tf.ones_like(idx))
In NumPy:
import numpy as np
def index_of_first_np(batch, value):
eq = np.equal(batch, value)
has_value = np.any(eq, axis=-1)
idx = np.argmax(eq, axis=-1)
idx[~has_value] = -1
return idx
Tests:
import tensorflow as tf
batch = [[0, 1, 2, 3],
[1, 2, 1, 0],
[0, 2, 3, 4]]
value = 1
print(index_of_first_np(batch, value))
# [ 1 0 -1]
with tf.Graph().as_default(), tf.Session() as sess:
print(sess.run(index_of_first_tf(batch, value)))
# [ 1 0 -1]

Keras - pad tensor with values on the borders

I have image with size that's not even, so when convolution scales it down by a factor of 2, and then I do Conv2DTranspose, I don't get consistent sizes, which is a problem.
So I thought I'd pad the intermediate tensor with an extra row and column, with values same as what I see on the edges, for minimal disruption. How do I do this in Keras, is it even possible? What are my alternatives?
With Tensorflow for background, you could use tf.concat() to add to your tensor a duplicate of the row/column.
Supposing you want to duplicate the last row/column:
import tensorflow as tf
from keras.layers import Lambda, Input
from keras.models import Model
import numpy as np
def duplicate_last_row(tensor):
return tf.concat((tensor, tf.expand_dims(tensor[:, -1, ...], 1)), axis=1)
def duplicate_last_col(tensor):
return tf.concat((tensor, tf.expand_dims(tensor[:, :, -1, ...], 2)), axis=2)
# --------------
# Demonstrating with TF:
x = tf.convert_to_tensor([[[1, 2, 3], [4, 5, 6]],
[[10, 20, 30], [40, 50, 60]]])
x = duplicate_last_row(duplicate_last_col(x))
with tf.Session() as sess:
print(sess.run(x))
# [[[ 1 2 3 3]
# [ 4 5 6 6]
# [ 4 5 6 6]]
#
# [[10 20 30 30]
# [40 50 60 60]
# [40 50 60 60]]]
# --------------
# Using as a Keras Layer:
inputs = Input(shape=(5, 5, 3))
padded = Lambda(lambda t: duplicate_last_row(duplicate_last_col(t)))(inputs)
model = Model(inputs=inputs, outputs=padded)
model.compile(optimizer="adam", loss='mse', metrics=['mse'])
batch = np.random.rand(2, 5, 5, 3)
x = model.predict(batch, batch_size=2)
print(x.shape)
# (2, 6, 6, 3)

How do I select certain columns of a 2D tensor in TensorFlow?

As generalized slicing is being worked on in this issue, what would be the best way to achieve an op gathering columns of a 2D tensor (matrix)? For example, for tensor t:
1 2 3 4
5 6 7 8
and indices [1,3], I would like to get:
2 4
6 8
which is equivalent to numpy t[:, [1,3]].
Meanwhile the gather method has an axis parameter.
import tensorflow as tf
params = tf.constant([[1,2,3],[4,5,6]])
indices = [0,2]
op = tf.gather(params, indices, axis=1)
produces the output
[[1 3]
[4 6]]
There is a function named tf.nn.embedding_lookup(params, ind) which retrieves the rows of the params tensor.
To achieve what you want, we can first transpose the tensor t from which you want to select certain columns from. Then look up the rows of tf.transpose(t) (columns of t). After the selection, we transpose the result back.
import tensorflow as tf
t = tf.constant([[1, 2, 3],
[4, 5, 6]])
ind = tf.constant([0, 2])
result = tf.transpose(tf.nn.embedding_lookup(tf.transpose(t), ind))
with tf.Session() as sess:
print(sess.run(result))
So far, I created a workaround by flattening the input and using gather:
def gather_cols(params, indices, name=None):
"""Gather columns of a 2D tensor.
Args:
params: A 2D tensor.
indices: A 1D tensor. Must be one of the following types: ``int32``, ``int64``.
name: A name for the operation (optional).
Returns:
A 2D Tensor. Has the same type as ``params``.
"""
with tf.op_scope([params, indices], name, "gather_cols") as scope:
# Check input
params = tf.convert_to_tensor(params, name="params")
indices = tf.convert_to_tensor(indices, name="indices")
try:
params.get_shape().assert_has_rank(2)
except ValueError:
raise ValueError('\'params\' must be 2D.')
try:
indices.get_shape().assert_has_rank(1)
except ValueError:
raise ValueError('\'indices\' must be 1D.')
# Define op
p_shape = tf.shape(params)
p_flat = tf.reshape(params, [-1])
i_flat = tf.reshape(tf.reshape(tf.range(0, p_shape[0]) * p_shape[1],
[-1, 1]) + indices, [-1])
return tf.reshape(tf.gather(p_flat, i_flat),
[p_shape[0], -1])
Which for:
params = tf.constant([[1, 2, 3],
[4, 5, 6]])
indices = [0, 2]
op = gather_cols(params, indices)
produces the expected output:
[[1 3]
[4 6]]

reduce_sum by certain dimension

I have two embeddings tensor A and B, which looks like
[
[1,1,1],
[1,1,1]
]
and
[
[0,0,0],
[1,1,1]
]
what I want to do is calculate the L2 distance d(A,B) element-wise.
First I did a tf.square(tf.sub(lhs, rhs)) to get
[
[1,1,1],
[0,0,0]
]
and then I want to do an element-wise reduce which returns
[
3,
0
]
but tf.reduce_sum does not allow my to reduce by row. Any inputs would be appreciated. Thanks.
Add the reduction_indices argument with a value of 1, eg.:
tf.reduce_sum( tf.square( tf.sub( lhs, rhs) ), 1 )
That should produce the result you're looking for. Here is the documentation on reduce_sum().
According to TensorFlow documentation, reduce_sum function which takes four arguments.
tf.reduce_sum(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None).
But reduction_indices has been deprecated. Better to use axis instead of. If the axis is not set, reduces all its dimensions.
As an example,this is taken from the documentation,
# 'x' is [[1, 1, 1]
# [1, 1, 1]]
tf.reduce_sum(x) ==> 6
tf.reduce_sum(x, 0) ==> [2, 2, 2]
tf.reduce_sum(x, 1) ==> [3, 3]
tf.reduce_sum(x, 1, keep_dims=True) ==> [[3], [3]]
tf.reduce_sum(x, [0, 1]) ==> 6
Above requirement can be written in this manner,
import numpy as np
import tensorflow as tf
a = np.array([[1,7,1],[1,1,1]])
b = np.array([[0,0,0],[1,1,1]])
xtr = tf.placeholder("float", [None, 3])
xte = tf.placeholder("float", [None, 3])
pred = tf.reduce_sum(tf.square(tf.subtract(xtr, xte)),1)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
nn_index = sess.run(pred, feed_dict={xtr: a, xte: b})
print nn_index