I have these 2 matrices and I need to combine them so:
these are them in code:
matrix_a = tf.Variable(np.zeros(big_shape, dtype=np.float32))
matrix_b = tf.Variable(np.zeros(small_shape, dtype=np.float32))
#here I need to combine them
How can I do that?
###################################################### EDITED
Thanks to #jdehesa I've written this code:
shape = (batch_size, window_size, window_size, num_channels)
# the variable we're going to optimize over
modifier = tf.Variable(np.zeros(shape, dtype=np.float32))
mask = tf.zeros((batch_size, image_size, image_size, num_channels), tf.float32)
# Get input shapes
modifier_shape = tf.shape(modifier)
mask_shape = tf.shape(mask)
# Make indices grid
oo, ii, jj, kk = tf.meshgrid(tf.range(modifier_shape[0]), tf.range(modifier_shape[1]), tf.range(modifier_shape[2], modifier_shape[3]), indexing='ij')
# Shift indices
ii += y_window
jj += x_window
# Scatter update
mask_to_apply = tf.tensor_scatter_nd_update(mask, tf.stack([oo, ii, jj, kk], axis=-1), modifier)
bot now I have this error:
ValueError: Requires start <= limit when delta > 0: 28/1 for 'range_2' (op: 'Range') with input shapes: [], [], [] and with computed input tensors: input[0] = <28>, input[1] = <1>, input[2] = <1>.
Why?
This is a way to do that:
import tensorflow as tf
# Input data (assumes two 2D tensors, `a` at least as big as `b`)
a = tf.zeros((4, 4), tf.int32)
b = tf.ones((2, 2), tf.int32)
# Get input shapes
a_shape = tf.shape(a)
b_shape = tf.shape(b)
# Make indices grid
ii, jj = tf.meshgrid(tf.range(b_shape[0]), tf.range(b_shape[1]), indexing='ij')
# Shift indices
ii += a_shape[0] - b_shape[0]
jj += a_shape[1] - b_shape[1]
# Scatter update
c = tf.tensor_scatter_nd_update(a, tf.stack([ii, jj], axis=-1), b)
tf.print(c)
# [[0 0 0 0]
# [0 0 0 0]
# [0 0 1 1]
# [0 0 1 1]]
Related
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(777) # reproducibility
sentence = ("if you want to build a ship, don't drum up people together to "
"collect wood and don't assign them tasks and work, but rather "
"teach them to long for the endless immensity of the sea.")
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10 # Any arbitrary number
learning_rate = 0.1
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
x_str = sentence[i:i + sequence_length]
y_str = sentence[i + 1: i + sequence_length + 1]
print(i, x_str, '->', y_str)
x = [char_dic[c] for c in x_str] # x str to index
y = [char_dic[c] for c in y_str] # y str to index
dataX.append(x)
dataY.append(y)
batch_size = len(dataX)
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])
# One-hot encoding
X_one_hot = tf.one_hot(X, num_classes)
print(X_one_hot) # check out the shape
def lstm_cell():
cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
return cell
multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(2)], state_is_tuple=True)
# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)
# FC layer
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)
# reshape out for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])
# All weights are 1 (equal weights)
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=outputs, targets=Y, weights=weights)
mean_loss = tf.reduce_mean(sequence_loss)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(500):
_, l, results = sess.run(
[train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY})
for j, result in enumerate(results):
index = np.argmax(result, axis=1)
print(i, j, ''.join([char_set[t] for t in index]), l)
# Let's print the last char of each result to check it works
results = sess.run(outputs, feed_dict={X: dataX})
for j, result in enumerate(results):
index = np.argmax(result, axis=1)
if j is 0: # print all for the first result to make a sentence
print(''.join([char_set[t] for t in index]), end='')
else:
print(char_set[index[-1]], end='')
'''
0 167 tttttttttt 3.23111
0 168 tttttttttt 3.23111
0 169 tttttttttt 3.23111
…
499 167 of the se 0.229616
499 168 tf the sea 0.229616
499 169 the sea. 0.229616
g you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
'''
(Please understand that English is not my native language)
I don't understand the last 'if, else' part of the code above, can anyone explain?
Why do print(''. Join ([char_set [t] for t in index]), end = '') only when j is 0,
In the case of else, why do print(char_set [index [-1]], end = '')?
Please explain how the code works
That last bit is just checking whether the network works or not. It generates multiple results first, and then iterates through those results. I guess the creator of this code snippet wanted to check the whole sentence in the first result, and then only the last characters for the rest. That is up to you entirely, if you want to change it.
tf.unravel_index takes a flat index into an array and a shape, and returns the tuple that represents that index in the array.
with tf.Session() as sess:
t = tf.unravel_index(tf.constant(15), [2, 3, 4])
print(sess.run(t)) # [1 0 3]
Is there an inverse? tf.ravel_index doesn't exist, but maybe it's under a different name?
Here is how you can do the equivalent to np.ravel_multi_index in TensorFlow:
import tensorflow as tf
# multi_index is 2D (num dimensions x num indices), dims is 1D
# Does not check for out of bounds indices
def tf_ravel_multi_index(multi_index, dims):
strides = tf.cumprod(dims, exclusive=True, reverse=True)
return tf.reduce_sum(multi_index * tf.expand_dims(strides, 1), axis=0)
# Test
with tf.Graph().as_default(), tf.Session() as sess:
# Shape tensor
shape = tf.constant([4, 5, 6])
# Some flat indices
idx = tf.constant([23, 56, 4, 17])
# Unravel indices
unravel = tf.unravel_index(idx, shape)
# Ravel indices again
idx2 = tf_ravel_multi_index(unravel, shape)
# Check result
print(*sess.run((unravel, idx2)), sep='\n')
# [[0 1 0 0]
# [3 4 0 2]
# [5 2 4 5]]
# [23 56 4 17]
I want to create a batch of zero images with several channels and with some one given pixel per image with value one.
If the images are indexed only by the number of channels, the following code do the work just fine:
num_channels = 3
im_size = 2
images = np.zeros((num_channels, im_size, im_size))
# random locations for the ones
pixels = np.random.randint(low=0, high=im_size,
size=(num_channels, 2))
images[np.arange(num_channels), pixels[:, 0], pixels[:, 1]] = 1
However, the analogous code fails if we want to consider the batch too:
batch_size = 4
num_channels = 3
im_size = 2
images = np.zeros((batch_size, num_channels, im_size, im_size))
# random locations for the ones
pixels = np.random.randint(low=0, high=im_size,
size=(batch_size, num_channels, 2))
images[np.arange(batch_size), np.arange(num_channels), pixels[:, :, 0], pixels[:, :, 1]] = 1
which gives the error
IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (4,) (3,) (4,3) (4,3)
The following code will do the work, using an inefficient loop:
batch_size = 4
num_channels = 3
im_size = 2
images = np.zeros((batch_size, num_channels, im_size, im_size))
# random locations for the ones
pixels = np.random.randint(low=0, high=im_size,
size=(batch_size, num_channels, 2))
for k in range(batch_size):
images[k, np.arange(num_channels), pixels[k, :, 0], pixels[k, :, 1]] = 1
How would you obtain a vectorized solution?
A simple vectorized using advanced-indexing would be -
I,J = np.arange(batch_size)[:,None],np.arange(num_channels)
images[I, J, pixels[...,0], pixels[...,1]] = 1
Alternative easier way to get those I,J indexers would be with np.ogrid -
I,J = np.ogrid[:batch_size,:num_channels]
I need to initialize a checkerboard matrix to merge two feature maps in my tensorflow graph, I was able to do it for a known shape using numpy beside TF like this
def checkerboard_concat(x1, x2):
mask1 = np.ones((10,10,3))
mask1[1::2,::2] = 0
mask1[::2,1::2] = 0
mask2 = np.zeros((10,10,3))
mask2[1::2,::2] = 1
mask2[::2,1::2] = 1
return x1 * mask1 + x2 * mask2
But I was not able to do it with a dynamic shape, I used tf.shape() that returns an output of shape (N,) but I don't how to evaluate it dynamically.
Also, I tried using tf.ones_like(x1) but couldn't use subscripts to change it like a numpy array
Here is a solution based on modulo and XOR operations:
import tensorflow as tf
def make_checkerboard(N):
"""
Return a NxN checkerboard matrix M, i.e. with M(i,j) == True if (i+j) mod 2 == 1
:param N: Length of the checkerboard (can be dynamic)
:return: Boolean tensor of shape NxN
"""
range_n = tf.range(N)
odd_ind = tf.cast(tf.mod(range_n, 2), dtype=tf.bool)
odd_rows = tf.tile(tf.expand_dims(odd_ind , -1), [1, N])
odd_cols = tf.tile(tf.expand_dims(odd_ind , 0), [N, 1])
checker = tf.logical_xor(odd_rows, odd_cols)
return checker
def checkerboard_concat(x1, x2, is_batch=True):
dynamic_n = tf.shape(x1)[1 if is_batch else 0]
mask2 = make_checkerboard(dynamic_n)
mask2 = tf.expand_dims(mask2, -1) # Expand masks to cover channels
mask1 = tf.logical_not(mask2)
return x1 * tf.cast(mask1, dtype=x1.dtype) + x2 * tf.cast(mask2, dtype=x2.dtype)
# Example:
tf.reset_default_graph()
sess = tf.InteractiveSession()
x1 = tf.ones((4,4,3), dtype=tf.int32)
x2 = tf.ones((4,4,3), dtype=tf.int32) * 2
x = checkerboard_concat(x1, x2, is_batch=False)
res = sess.run(x)
print(res[...,0])
# [[1 2 1 2]
# [2 1 2 1]
# [1 2 1 2]
# [2 1 2 1]]
I am building a graph where the input is split into a list of tensors of length 30. I then use a shared RNN layer on each element of the list.
It takes ~ 1 minute until the model is compiled. Does it have to be like this (why?) or is there anything I am doing wrong?
Code:
shared_lstm = keras.layers.LSTM(4, return_sequences=True)
shared_dense = TimeDistributed(keras.layers.Dense(1, activation='sigmoid'))
inp_train = keras.layers.Input([None, se.action_space, 3])
# Split each possible measured label into a list:
inputs_train = [ keras.layers.Lambda(lambda x: x[:, :, i, :])(inp_train) for i in range(se.action_space) ]
# Apply the shared weights on each tensor:
lstm_out_train = [shared_lstm(x) for x in inputs_train]
dense_out_train = [(shared_dense(x)) for x in lstm_out_train]
# Merge the tensors again:
out_train = keras.layers.Lambda(lambda x: K.stack(x, axis=2))(dense_out_train)
# "Pick" the unique element along where the inp_train tensor is == 1.0 (along axis=2, in the next time step, of the first dimension of axis=3)
# (please disregard this line if it seems too complex)
shift_and_pick_layer = keras.layers.Lambda(lambda x: K.sum(x[0][:, :-1, :, 0] * x[1][:, 1:, :, 0], axis=2))
out_train = shift_and_pick_layer([out_train, inp_train])
m_train = keras.models.Model(inp_train, out_train)