I'm trying to convert my tensorflow code to tensorflow eager. The problem is the forward pass predicts only the same actions for different input values in eager mode. The normal tensorflow code with graph works fine. I've only changed the network. The agent is the same I'm used with normal tensorflow. What could be the problem with the network? The forward pass is in the function get_probs()
Another issue is the eager network is very slow. I think the graph execution is 2-3 times faster.
Example probs for one episode in eager
...
[0.31471518 0.33622807 0.34905672]
[0.31472355 0.3363353 0.34894115]
[0.31482834 0.33600125 0.34917045]
[0.31461707 0.33643782 0.34894508]
[0.31466153 0.33620775 0.34913075]
[0.31461093 0.33637658 0.3490125 ]
[0.31452385 0.33623937 0.34923682]
[0.31438416 0.33645296 0.3491629 ]
[0.31471425 0.3363982 0.34888753]
[0.314866 0.33610862 0.34902537]
[0.31489033 0.33622313 0.34888652]
...
Example probs for one episode with tensorflow graph
...
[0.25704077 0.46056205 0.28239718]
[0.20610097 0.49288744 0.30101162]
[0.24638997 0.5338215 0.2197885 ]
[0.22581507 0.51206875 0.2621162 ]
[0.19064051 0.5398092 0.26955026]
[0.24399564 0.4424694 0.313535 ]
[0.25321653 0.48051655 0.26626688]
[0.2241595 0.43447506 0.3413655 ]
[0.20665398 0.5128011 0.28054494]
[0.2943201 0.39530927 0.3103706 ]
...
Network
import tensorflow as tf
from keras.layers import *
import numpy as np
tf.enable_eager_execution()
print(tf.executing_eagerly())
class PGEagerAtariNetwork:
def __init__(self, state_space, action_space, lr):
self.state_space = state_space
self.action_space = action_space
self.model = tf.keras.Sequential()
self.model.add(InputLayer(input_shape=(84, 84, 4)))
# Conv
self.model.add(Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu', name='conv1'))
self.model.add(Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
self.model.add(Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
# Flatten
self.model.add(Flatten(name='flatten'))
# Fully connected
self.model.add(Dense(units=512, activation='relu', name='fc1'))
# Logits
self.model.add(Dense(units=self.action_space, activation=None, name='logits'))
self.model.summary()
# Optimizer
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def get_probs(self, s):
s = s[np.newaxis, :]
logits = self.model(s)
probs = tf.nn.softmax(logits).numpy().squeeze()
return probs
def update_policy(self, s, r, a):
with tf.GradientTape() as tape:
loss = self.calc_loss(s, r, a)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables),
global_step=tf.train.get_or_create_global_step())
def calc_loss(self, s, r, a):
logits = self.model(s)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
policy_loss = tf.reduce_mean(policy_loss * tf.stop_gradient(r))
loss = tf.reduce_mean(policy_loss)
return loss
Related
I am trying to create this custom ANN using tensorflow. Here is image of the toy network and code.
import tensorflow as tf
import numpy as np
in = np.array([1, 2, 3, 4], , dtype="float32")
y_true = np.array([10, 11], , dtype="float32")
# w is vector of weights
# y_pred = np.array([in[0]*w[0]+in[1]*w[0]], [in[2]*w[1]+in[3]*w[1]] )
# y_pred1 = 1 / (1 + tf.math.exp(-y_pred)) # sigmoid activation function
def loss_fun(y_true, y_pred1):
loss1 = tf.reduce_sum(tf.pow(y_pred1 - y_true, 2))
# model.compile(loss=loss_fun, optimizer='adam', metrics=['accuracy'])
The output of this network goes to another ANN to the right and I know that stuff, but don't know how can I create the connections, update the w, y_pred, and compile the model. Any help?
Something like this ought to work
import tensorflow as tf
import numpy as np
def y_pred(x, w):
return [x[0]*w[0]+x[1]*w[0], x[2]*w[1]+x[3]*w[1]]
def loss_fun(y_true, y_pred):
return tf.reduce_sum(tf.pow(y_pred - y_true, 2))
x = np.array([1, 2, 3, 4], dtype="float32")
y_true = np.array([10, 11], dtype="float32")
w = tf.Variable(initial_value=np.random.normal(size=(2)), name='weights', dtype=tf.float32)
xt = tf.convert_to_tensor(x)
yt = tf.convert_to_tensor(y_true)
sgd_opt = tf.optimizers.SGD()
training_steps = 100
display_steps = 10
for step in range(training_steps):
with tf.GradientTape() as tape:
tape.watch(w)
yp = y_pred(xt, w)
loss = loss_fun(yt, yp)
dl_dw = tape.gradient(loss, w)
sgd_opt.apply_gradients(zip([dl_dw], [w]))
if step % display_steps == 0:
print(loss, w)
I went over
a basic example of tf2.0
containing very simple code
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import tensorflow as tf
import cProfile
# Fetch and format the mnist data
(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),
tf.cast(mnist_labels,tf.int64)))
dataset = dataset.shuffle(1000).batch(32)
# Build the model
mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16,[3,3], activation='relu',
input_shape=(None, None, 1)),
tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10)
])
for images,labels in dataset.take(1):
print("Logits: ", mnist_model(images[0:1]).numpy())
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_history = []
def train_step(model, images, labels):
with tf.GradientTape() as tape:
logits = model(images, training=True)
# Add asserts to check the shape of the output.
tf.debugging.assert_equal(logits.shape, (32, 10))
loss_value = loss_object(labels, logits)
loss_history.append(loss_value.numpy().mean())
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train(epochs):
for epoch in range(epochs):
for (batch, (images, labels)) in enumerate(dataset):
train_step(mnist_model, images, labels)
print ('Epoch {} finished'.format(epoch))
I trained it and save trainable_variables before and after by the following
t0=mnist_model.trainable_variables
train(epochs = 3)
t1=mnist_model.trainable_variables
diff = tf.reduce_mean(tf.abs(t0[0] - t1[0]))
# whethere indexing [0] or [1] etc. gets the same outcome of diff
print(diff.numpy())
They are the same!!!
So am I checking somethere incorrect? If that is the case, how can I observe those updated variables correctly?
You aren't creating new arrays of variables, just 2 pointers on the same object
Try to do so
t0 = np.array(mnist_model.trainable_variables)
I'm migrating a current Tensorflow 1.x model built with estimators across to Tensorflow 2.0 Keras. The migration has been relatively smooth until it comes to serialising the model for serving.
The model is specified as follows
model = tf.keras.Sequential()
model.add(tf.keras.layers.DenseFeatures(feature_columns))
for units in hidden_layers:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation=None))
I am using the Tensorflow feature columns api, which expects as input a dictionary of feature columns, and applying a transformation to those features before they pass into the model.
For example when training
def dataset_transformation_function(feature_dict: Dict[str, tf.Tensor]):
output_dict = feature_dict.copy()
output_dict['logx1'] = tf.math.log(feature_dict['x1'])
return output_dict
train_dataset = (
tf.data.Dataset.from_tensor_slices(
(train_feature_dict, train_label_vector)
)
.shuffle(n_train)
.batch(batch_size)
.map(dataset_transformation_function)
.repeat()
.prefetch(tf.data.experimental.AUTOTUNE)
)
It appears that to perform the same transformation at serve time I require:
input_tensors = [tf.Tensorspec(name=...), ...]
#tf.function(input_signature=input_tensors)
def dataset_transformation_function(args) -> Dict[str, tf.Tensor]:
...
And
tf.saved_model.save(
model,
MODEL_DIR,
signatures=feature_transform,
)
However I cannot determine the correct signature for the input tensor or the function.
The method I am migrating from is:
def serving_input_fn():
receiver_tensors = {
'x1': tf.placeholder(dtype=tf.float32, shape=[None, ], name='x1')
'x2': tf.placeholder(dtype=tf.string, shape=[None, ], name='x2')
}
features = dataset_transformation_function(
receiver_tensors
)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
estimator.export_savedmodel(
MODEL_DIR,
serving_input_fn,
as_text=False,
checkpoint_path=estimator.best_checkpoint,
)
To answer my own question, it seems that the solution is to provide a function which, when called does both the preprocessing and calls the model. Example here:
# tensorflow 2.0.0
import tensorflow as tf
import numpy as np
hidden_layers = [4,4]
feature_columns = [fc.numeric_column(name) for name in ['x1', 'x2', 'logx1']]
# construct a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.layers.DenseFeatures(feature_columns))
for units in hidden_layers:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation=None))
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-3),
loss='mae',
metrics=['mae']
)
x_train = {'x1': np.arange(10), 'x2': np.arange(10), 'logx1': np.log1p(np.arange(10))}
x_predict = {'x1': np.arange(10), 'x2': np.arange(10)}
y = np.random.random(size=10)
model.fit(x=x_train, y=y)
trained_model_predictions = model.predict(x_train)
# preprocessing function for serving
#tf.function()
def serve_predict(x1, x2):
preprocessed_feature = tf.math.log1p(x1)
output = {
'x1': x1,
'x2': x2,
'logx1': preprocessed_feature
}
prediction = model(output)
return prediction
serve_predict = serve_predict.get_concrete_function(x1=tf.TensorSpec([None,]), x2=tf.TensorSpec([None,]))
tf.saved_model.save(
model,
'/tmp/tf',
signatures=serve_predict
)
# check the models give the same output
loaded = tf.saved_model.load('/tmp/tf')
loaded_model_predictions = loaded.serve_predict(x1=tf.range(10, dtype=tf.float32), x2=tf.range(10, dtype=tf.float32))
np.testing.assert_allclose(trained_model_predictions, loaded_model_predictions, atol=1e-6)
I have a customer Tensorflow op. that wrote in C++ and was build successfully to call in Tensorflow code as
from libs.customer_op import customer_op
output = customer_op(x, filter=w, rates=[1, 1, rate, rate], padding="SAME", strides=[1, 1, stride, stride])
Now, I am using Keras with Tensorflow backend. Is it possible to call my above function in Keras. Do we need do some extra register step?
Update: Thanks Matias Valdenegro for your suggestion. I have tried it. This is my full code in tensorflow and what I have done in Keras.
-Tensorflow code
def my_conv(input,num_o,kernel_size, stride):
num_x = input.shape[3].value
offset = slim.conv2d(input, 18, [kernel_size, kernel_size], stride=stride, activation_fn=None, scope='offset', normalizer_fn=None)
w = tf.get_variable('weights', shape=[num_o, num_x, kernel_size, kernel_size],
initializer=tf.contrib.layers.xavier_initializer())
output = customer_conv(x, filter=w, offset=offset,padding="SAME")
-Keras code:
def my_conv(input, num_o, kernel_size, stride):
num_x = input.shape[3].value
offset = KL.Conv2D(18, (kernel_size, kernel_size), strides=(stride,stride))(input)
w = KI.TruncatedNormal(mean=0.0, stddev=0.05, seed=None)
output = Lambda(lambda x: deform_conv_op(x, filter=w, offset=offset, padding="SAME"))(input)
return output
So, this is the place that I will call the function
class CustomerCNN():
def __init__(self, mode):
self.mode = mode
def build(self, mode):
# Inputs
input_image = KL.Input(
shape=config.IMAGE_SHAPE.tolist(), name="input_image")
f1 = Lambda(lambda x: my_conv(x, 256, 3, 1))(input_image)
For above solution, I still remain the issue:
How to initial weight with shape as shape=[num_o, num_x, kernel_size, kernel_size] in Keras
How to call my customer conv my_conv in the class CustomerCNN? Do we need one more Lambda function as I did
You can just call it with a lambda layer:
output = Lambda(lambda x: customer_op(x, filter=w, rates=[1, 1, rate, rate],
padding="SAME", strides=[1, 1, stride, stride]))(input)
I'm trying to create a dice_loss function in Tensorflow.
I'm facing a trouble with tensorlfow. Executing the following code
import tensorflow as tf
import tensorlayer as tl
def conv3d(x, inChans, outChans, kernel_size, stride, padding):
weights = weight_variable([kernel_size, kernel_size, kernel_size, inChans, outChans])
biases = bias_variable([outChans])
conv = tf.nn.conv3d(x, weights, strides=[1, stride, stride, stride, 1], padding=padding)
return tf.nn.bias_add(conv, biases)
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
return optimizer.apply_gradients(grads)
def main(argv=None):
image = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="input_image")
annotation = tf.placeholder(tf.float32, shape=[None, SLICE_SIZE, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
logits, pred_annotation = vnet.VNet(image)
loss = 1 - tl.cost.dice_coe(output=pred_annotation, target=annotation, axis=[1,2,3,4])
trainable_var = tf.trainable_variables()
train_op = train(loss, trainable_var)
sess = tf.Session()
...
...
def VNet(x):
...
out = tf.nn.elu(BatchNorm3d(conv3d(x, inChans, 2, kernel_size=5, stride=1, padding="SAME")))
out = conv3d(out, 2, 2, kernel_size=1, stride=1, padding="SAME")
annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction'))
return out, tf.expand_dims(annotation_pred, dim=4)
I get the following error:
ValueError: No gradients provided for any variable: ...
Someone can help me?
When you do annotation_pred = tf.to_float(tf.argmax(out, dimension=4, name='prediction')), you get an index of the max value in your tensor. This index can't be derivated, thus the gradient can't flow throught this operation.
So as your loss is only defined by this value, and the gradient can't flow throught it, no gradient can be calculated for your network.
I don't know specificately how the dice loss work, but maybe you wanted to use tf.max instead of tf.argmax, or you have to find a way to use an operation that can let the gradient flow.