How are input tensors with different shapes fed to neural network? - tensorflow

I am following this tutorial on Policy Gradient using Keras,
and can't quite figure out the below.
In the below case, how exactly are input tensors with different shapes fed to the model?
Layers are neither .concated or .Added.
input1.shape = (4, 4)
input2.shape = (4,)
"input" layer has 4 neurons, and accepts input1 + input2 as 4d vector??
The code excerpt (modified to make it simpler) :
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras import backend as K
import numpy as np
input = tf.keras.Input(shape=(4, ))
advantages = tf.keras.Input(shape=[1])
dense1 = layers.Dense(32, activation='relu')(input)
dense2 = layers.Dense(32, activation='relu')(dense1)
output = layers.Dense(2, activation='softmax')(dense2)
model = tf.keras.Model(inputs=[input, advantages], outputs=[output])
# *********************************
input1 = np.array(
[[ 4.52281174e-02, 4.31672811e-02, -4.57789579e-02, 4.35560472e-02],
[ 4.60914630e-02, -1.51269339e-01, -4.49078369e-02, 3.21451106e-01],
[ 4.30660763e-02, 4.44624011e-02, -3.84788148e-02, 1.49510297e-02],
[ 4.39553243e-02, -1.50087194e-01, -3.81797942e-02, 2.95249428e-01]]
)
input2 = np.array(
[ 1.60063125, 1.47153674, 1.34113826, 1.20942261]
)
label = np.array(
[[1, 0],
[0, 1],
[1, 0],
[0, 1]]
)
model.compile(optimizer=optimizers.Adam(lr=0.0005), loss="binary_crossentropy")
model.train_on_batch([input1, input2], label)

In cases where you might want to figure out what type of graph you have just build, it is helpful to use the model.summary() or tf.keras.utils.plot_model() methods for debugging:
tf.keras.utils.plot_model(model, to_file="test.png", show_shapes=True, show_layer_names=True, show_dtype=True)
This will show you that your input_2 is indeed not used. Since you haven't connected it to the main graph with any operations, it has no weights associated with it (the graph runs but there is nothing to update on the right side):

Related

Tensor construction with a loop over number of batches

I want to create a tensor which is some kind of a transformation matrix (rotation matrix for instance)
My model predicts 2 parameters: x1 and x2
so the output is a tensor of (B, 2), when B is number of batches.
however, when I write my loss, I have to know this "B" since I want to iterate over it:
def get_rotation_tensor(x):
roll_mat = K.stack([ [[1, 0, 0],
[0, K.cos(x[i, 0]), -K.sin(x[i, 0])],
[0, K.sin(x[i, 0]), K.cos(x[i, 0])]] for i in range(BATCH_SIZE)])
pitch_mat = K.stack([ [[K.cos(x[i, 1]), 0, K.sin(x[i, 1])],
[0, 1, 0],
[-K.sin(x[i, 1]), 0, K.cos(x[i, 1])]] for i in range(BATCH_SIZE)])
return K.batch_dot(pitch_mat, roll_mat)
the only solution I could have think of is to pre-define the BATCH_SIZE in advance.. but is there a way to write a general loss function that will work for every batch size?
THANKS
I found a solution
def get_rotation_tensor(x):
ones = K.ones_like(x[:, 0])
zeros = K.zeros_like(x[:, 0])
roll_mat = K.stack([[ones, zeros, zeros],
[zeros, K.cos(x[:, 0]), -K.sin(x[:, 0])],
[zeros, K.sin(x[:, 0]), K.cos(x[:, 0])]])
pitch_mat = K.stack([[K.cos(x[:, 1]), zeros, K.sin(x[:, 1])],
[zeros, ones, zeros],
[-K.sin(x[:, 1]), zeros, K.cos(x[:, 1])]])
return K.batch_dot(K.permute_dimensions(pitch_mat, (2, 0, 1)),
K.permute_dimensions(roll_mat, (2, 0, 1)))
Perhaps I'm not fully understanding your issue, but can't you just determine the batch size by the shape of the tensors passed into the loss function. Below is an example that shows the idea. I hope this helps.
# Install TensorFlow
try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
print(tf.__version__)
print(tf.executing_eagerly())
# Setup repro section from Keras FAQ with TF1 to TF2 adjustments
import numpy as np
import random as rn
# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(42)
# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
rn.seed(12345)
# Force TensorFlow to use single thread.
# Multiple threads are a potential source of non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/set_random_seed
tf.compat.v1.set_random_seed(1234)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)
# Rest of code follows ...
# Custom Loss
def my_custom_loss(y_true, y_pred):
tf.print('inside my_custom_loss:')
tf.print('y_true:')
tf.print(y_true)
tf.print('y_true column 0:')
tf.print(y_true[:,0])
tf.print('y_true column 1:')
tf.print(y_true[:,1])
tf.print('y_pred:')
tf.print(y_pred)
# get length/batch size
batch_size=tf.shape(y_pred)[0]
tf.print('batch_size:')
tf.print(batch_size)
y_zeros = tf.zeros_like(y_pred)
y_mask = tf.math.greater(y_pred, y_zeros)
res = tf.boolean_mask(y_pred, y_mask)
logres = tf.math.log(res)
finres = tf.math.reduce_sum(logres)
return finres
# Define model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(1, activation='linear', input_dim=1, name="Dense1"))
model.compile(optimizer='rmsprop', loss=my_custom_loss)
print('model.summary():')
print(model.summary())
# Generate dummy data
data = np.array([[2.0],[1.0],[1.0],[3.0],[4.0]])
labels = np.array([[[2.0],[1.0]],
[[0.0],[3.0]],
[[0.0],[3.0]],
[[0.0],[3.0]],
[[0.0],[3.0]]])
# Train the model.
print('training the model:')
print('-----')
model.fit(data, labels, epochs=1, batch_size=3)
print('done training the model.')
print(data.shape)
print(labels.shape)

Is there a method for Keras to read TFRecord datasets without additional data processing measures?

I am a high school student trying to learn the basics of TensorFlow. I am currently building a model with TFRecords input files, the default dataset file type from TensorFlow, that have been compressed from the original raw data. I am currently using a convoluted way of parsing the data into numpy arrays for Keras to interpret it. While Keras is a part of TF, it should be easily able to read TFRecord datasets. Is there any other way for Keras to understand TFRecord files?
I use the _decodeExampleHelper method to prepare the data for training.
def _decodeExampleHelper(example) :
dataDictionary = {
'xValues' : tf.io.FixedLenFeature([7], tf.float32),
'yValues' : tf.io.FixedLenFeature([3], tf.float32)
}
# Parse the input tf.Example proto using the data dictionary
example = tf.io.parse_single_example(example, dataDictionary)
xValues = example['xValues']
yValues = example['yValues']
# The Keras Sequential network will have "dense" as the name of the first layer; dense_input is the input to this layer
return dict(zip(['dense_input'], [xValues])), yValues
data = tf.data.TFRecordDataset(workingDirectory + 'training.tfrecords')
parsedData = data.map(_decodeExampleHelper)
We can see that the parsedData has the correct dimensions in the following code block.
tmp = next(iter(parsedData))
print(tmp)
This outputs the first set of data in the correct dimensions that Keras should be able to interpret.
({'dense_input': <tf.Tensor: id=273, shape=(7,), dtype=float32, numpy=
array([-0.6065675 , -0.610906 , -0.65771157, -0.41417238, 0.89691925,
0.7122903 , 0.27881026], dtype=float32)>}, <tf.Tensor: id=274, shape=(3,), dtype=float32, numpy=array([ 0. , -0.65868723, -0.27960175], dtype=float32)>)
Here is a very simple model with only two layers and train it with the data I just parsed.
model = tf.keras.models.Sequential(
[
tf.keras.layers.Dense(20, activation = 'relu', input_shape = (7,)),
tf.keras.layers.Dense(3, activation = 'linear'),
]
)
model.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['accuracy'])
model.fit(parsedData, epochs = 1)
The line model.fit(parsedData, epochs = 1) gives an error of ValueError: Error when checking input: expected dense_input to have shape (7,) but got array with shape (1,) despite the dense_input being 7.
What problem could there be in this case? Why can Keras no interpret tensors from the file correctly?
You need to be batching your data before passing it to Keras and using an Input layer. The following works for me just fine:
import tensorflow as tf
ds = tf.data.Dataset.from_tensors((
{'dense_input': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]}, [ 0.0, 0.1, -0.1]))
ds = ds.repeat(32).batch(32)
model = tf.keras.models.Sequential(
[
tf.keras.Input(shape=(7,), name='dense_input'),
tf.keras.layers.Dense(20, activation = 'relu'),
tf.keras.layers.Dense(3, activation = 'linear'),
]
)
model.compile(optimizer = 'adam', loss = 'mean_absolute_error', metrics = ['accuracy'])
model.fit(ds, epochs = 1)

How to model symmetrical function for regression using tensorflow neural network

I am trying to make regression model for symmetrical input, hoping to model function with f(x,y)=f(y,x)=F. Suddenly, i found that trained neural network will give different outputs for f(x,y) and f(y,x).
I am using dense neural network with multiple layers with Adagrad for learning on entire training set.
The part of the problem occurs because of random (non-symmetrical) weights initialization.
But it looks like making symmetrical weights on each neuron will lose benefits of using DNN.
Is it possible to solve this with DNN or what is the way to do this
example:
from __future__ import absolute_import, division, print_function
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
print(tf.__version__)
train = pd.DataFrame([[0, 0], [0, 1], [1, 0], [1, 1]])
labels = pd.DataFrame([[0], [1], [1], [3]])
def build_model4():
model4 = tf.keras.Sequential([
layers.Dense(4, activation=tf.nn.elu, input_shape=(2,)),
layers.Dense(4, activation=tf.nn.elu),
layers.Dense(4, activation=tf.nn.elu),
layers.Dense(1, activation=tf.nn.relu)
])
optimizer = tf.keras.optimizers.Adagrad(lr=0.05, epsilon=None, decay=0.0)
model4.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model4
model4 = build_model4()
model4.summary()
EPOCHS = 500
history = model4.fit(
train, labels, epochs=EPOCHS, batch_size=4, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
plt.plot(history.history['mean_squared_error'], label='train')
test=pd.DataFrame([[1, 2], [2, 1]])
o=model4.predict(test)
print(o)
If your model is inherently asymmetrical, there is a simple way to force symmetry explicitly:
g(x, y) = g(y, x) = 1/2 * (f(x, y) + f(y, x))

Tensorflow tf.nn.embedding_lookup

is there a small neural network in tf.nn.embedding_lookup??
When I train some data, a value of the same index is changing.
So is it trained also? while I'm training my model
I checked the official embedding_lookup code but I can not see any tf.Variables for train embedding parameter.
But when I print all tf.Variables then I can found a Variable which is within embedding scope
Thank you.
Yes, the embedding is learned. You can look at the tf.nn.embedding_lookup operation as doing the following matrix multiplication more efficiently:
import tensorflow as tf
import numpy as np
NUM_CATEGORIES, EMBEDDING_SIZE = 5, 3
y = tf.placeholder(name='class_idx', shape=(1,), dtype=tf.int32)
RS = np.random.RandomState(42)
W_em_init = RS.randn(NUM_CATEGORIES, EMBEDDING_SIZE)
W_em = tf.get_variable(name='W_em',
initializer=tf.constant_initializer(W_em_init),
shape=(NUM_CATEGORIES, EMBEDDING_SIZE))
# Using tf.nn.embedding_lookup
y_em_1 = tf.nn.embedding_lookup(W_em, y)
# Using multiplication
y_one_hot = tf.one_hot(y, depth=NUM_CATEGORIES)
y_em_2 = tf.matmul(y_one_hot, W_em)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run([y_em_1, y_em_2], feed_dict={y: [1.0]})
# [array([[ 1.5230298 , -0.23415338, -0.23413695]], dtype=float32),
# array([[ 1.5230298 , -0.23415338, -0.23413695]], dtype=float32)]
The variable W_em will be trained in exactly the same way irrespective of whether you use y_em_1 or y_em_2 formulation; y_em_1 is likely to be more efficient, though.

Minimal RNN example in tensorflow

Trying to implement a minimal toy RNN example in tensorflow.
The goal is to learn a mapping from the input data to the target data, similar to this wonderful concise example in theanets.
Update: We're getting there. The only part remaining is to make it converge (and less convoluted). Could someone help to turn the following into running code or provide a simple example?
import tensorflow as tf
from tensorflow.python.ops import rnn_cell
init_scale = 0.1
num_steps = 7
num_units = 7
input_data = [1, 2, 3, 4, 5, 6, 7]
target = [2, 3, 4, 5, 6, 7, 7]
#target = [1,1,1,1,1,1,1] #converges, but not what we want
batch_size = 1
with tf.Graph().as_default(), tf.Session() as session:
# Placeholder for the inputs and target of the net
# inputs = tf.placeholder(tf.int32, [batch_size, num_steps])
input1 = tf.placeholder(tf.float32, [batch_size, 1])
inputs = [input1 for _ in range(num_steps)]
outputs = tf.placeholder(tf.float32, [batch_size, num_steps])
gru = rnn_cell.GRUCell(num_units)
initial_state = state = tf.zeros([batch_size, num_units])
loss = tf.constant(0.0)
# setup model: unroll
for time_step in range(num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
step_ = inputs[time_step]
output, state = gru(step_, state)
loss += tf.reduce_sum(abs(output - target)) # all norms work equally well? NO!
final_state = state
optimizer = tf.train.AdamOptimizer(0.1) # CONVERGEs sooo much better
train = optimizer.minimize(loss) # let the optimizer train
numpy_state = initial_state.eval()
session.run(tf.initialize_all_variables())
for epoch in range(10): # now
for i in range(7): # feed fake 2D matrix of 1 byte at a time ;)
feed_dict = {initial_state: numpy_state, input1: [[input_data[i]]]} # no
numpy_state, current_loss,_ = session.run([final_state, loss,train], feed_dict=feed_dict)
print(current_loss) # hopefully going down, always stuck at 189, why!?
I think there are a few problems with your code, but the idea is right.
The main issue is that you're using a single tensor for inputs and outputs, as in:
inputs = tf.placeholder(tf.int32, [batch_size, num_steps]).
In TensorFlow the RNN functions take a list of tensors (because num_steps can vary in some models). So you should construct inputs like this:
inputs = [tf.placeholder(tf.int32, [batch_size, 1]) for _ in xrange(num_steps)]
Then you need to take care of the fact that your inputs are int32s, but a RNN cell works on float vectors - that's what embedding_lookup is for.
And finally you'll need to adapt your feed to put in the input list.
I think the ptb tutorial is a reasonable place to look, but if you want an even more minimal example of an out-of-the-box RNN you can take a look at some of the rnn unit tests, e.g., here.
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/kernel_tests/rnn_test.py#L164