CancelledError: [_Derived_]RecvAsync is cancelled - tensorflow

I am having an issue. I run the same code on my local machine with CPU and Tensorflow 1.14.0. It works fine. However, when I run it on GPU with Tensorflow 2.0, I get
CancelledError: [_Derived_]RecvAsync is cancelled. [[{{node Adam/Adam/update/AssignSubVariableOp/_65}}]] [[Reshape_13/_62]] [Op:__inference_distributed_function_3722]
Function call stack: distributed_function
Reproducible code is here:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
import matplotlib.pyplot as plt
%matplotlib inline
batch_size = 32
num_obs = 100
num_cats = 1 # number of categorical features
n_steps = 10 # number of timesteps in each sample
n_numerical_feats = 18 # number of numerical features in each sample
cat_size = 12 # number of unique categories in each categorical feature
embedding_size = 1 # embedding dimension for each categorical feature
labels = np.random.random(size=(num_obs*n_steps,1)).reshape(-1,n_steps,1)
print(labels.shape)
#(100, 10, 1)
#3 numerical variable
num_data = np.random.random(size=(num_obs*n_steps,n_numerical_feats))
print(num_data.shape)
#(1000, 3)
#Reshaping numeric features to fit into an LSTM network
features = num_data.reshape(-1,n_steps, n_numerical_feats)
print(features.shape)
#(100, 10, 3)
#one categorical variables with 4 levels
cat_data = np.random.randint(0,cat_size,num_obs*n_steps)
print(cat_data.shape)
#(1000,)
idx = cat_data.reshape(-1, n_steps)
print(idx.shape)
#(100, 10)
numerical_inputs = keras.layers.Input(shape=(n_steps, n_numerical_feats), name='numerical_inputs', dtype='float32')
#<tf.Tensor 'numerical_inputs:0' shape=(?, 10, 36) dtype=float32>
cat_input = keras.layers.Input(shape=(n_steps,), name='cat_input')
#<tf.Tensor 'cat_input:0' shape=(None, 10) dtype=float32>
cat_embedded = keras.layers.Embedding(cat_size, embedding_size, embeddings_initializer='uniform')(cat_input)
#<tf.Tensor 'embedding_1/Identity:0' shape=(None, 10, 1) dtype=float32>
merged = keras.layers.concatenate([numerical_inputs, cat_embedded])
#<tf.Tensor 'concatenate_1/Identity:0' shape=(None, 10, 37) dtype=float32>
lstm_out = keras.layers.LSTM(64, return_sequences=True)(merged)
#<tf.Tensor 'lstm_2/Identity:0' shape=(None, 10, 64) dtype=float32>
Dense_layer1 = keras.layers.Dense(32, activation='relu', use_bias=True)(lstm_out)
#<tf.Tensor 'dense_4/Identity:0' shape=(None, 10, 32) dtype=float32>
Dense_layer2 = keras.layers.Dense(1, activation='linear', use_bias=True)(Dense_layer1 )
#<tf.Tensor 'dense_5/Identity:0' shape=(None, 10, 1) dtype=float32>
model = keras.models.Model(inputs=[numerical_inputs, cat_input], outputs=Dense_layer2)
#compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
EPOCHS =5
#fit the model
#you can use input layer names instead
history = model.fit([features, idx],
y = labels,
epochs=EPOCHS,
batch_size=batch_size)
Does anyone have similar issues? Obviously this is a bug but i do not know how to come around because I want to use Tensorflow 2.0.

I found that tensorflow-gpu2.0.0 was compiled with cuda7.6.0.
Then I update my cuda from 7.4.2 to 7.6.4, the problem solved.
Update cuda to 7.6.2;
Use TF_FORCE_GPU_ALLOW_GROWTH=true to force allow GPU growth.

I have faced similar issues, these steps may help you with code on tf2.0
Check the GPU memory, make sure nothing else is running on it.
Put and Run this script before importing Keras or Tensorflow, Restart runtime then first execute this.
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
gpu = gpus[0]
tf.config.experimental.set_memory_growth(gpu, True)
Try Reducing your model size, batch size if possible. Until it works.

Related

Keras incompatible shapes NN

So I have this neural network and I am feeding examples "X" and labels "Y" whose shapes are:
X.shape = (10,10,2)
Y.shape = (10,10,2)
The code for the model looks like:
import tensorflow as tf
from convert import process
import numpy as np
X, Y, rate = process('songs/song1.wav')
X = np.array(X[:10])
Y = np.array(Y[:10])
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(128))
model.add(tf.keras.layers.Dense(20))
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.fit(X, Y, epochs=2)
Now for some reason once I run this i get the error:
ValueError: Shapes (None, 10, 2) and (None, 20) are incompatible
I am confused because I fed it data where each example of both "X" and "Y" have shapes (10, 2). So why is it saying that I passed it (None, 10, 2) and (None, 20)
Your last layer uses linear activation whereas you choose categorical_crossentropy loss. Set either
model.add(tf.keras.layers.Dense(20, activations='softmax'))
....loss='categorical_crossentropy')
or,
model.add(tf.keras.layers.Dense(20))
....loss='mse')
Also check your data shape, especially the label (y).

How to solve "KeyError: '/conv2d_1/kernel:0'"

I am trying to use the colab to run the gym package with pacman, since the spec in colab is more powerful than my notebook. This program is successful simulate in Jupyter in my notebook, which using tensorflow 1.14. However, the errors keep appears when I put in google colab to simulate, and I also debug and change part of the code, so that the code can be used in tensor flow 2.0. Below is my code
#First we import all the necessary libraries
import numpy as np
import gym
import tensorflow as tf
from tensorflow import keras
from keras.layers import Flatten, Conv2D, Dense
#from tensorflow.contrib.layers import Flatten, conv2d, Dense
from collections import deque, Counter
import random
from datetime import datetime
#Now we define a function called preprocess_observation for preprocessing our input game screen.
#We reduce the image size and convert the image into greyscale.
color = np.array([210, 164, 74]).mean()
def preprocess_observation(obs):
# Crop and resize the image
img = obs[1:176:2, ::2]
# Convert the image to greyscale
img = img.mean(axis=2)
# Improve image contrast
img[img==color] = 0
# Next we normalize the image from -1 to +1
img = (img - 128) / 128-1
return img.reshape(88,80,1)
#Let us initialize our gym environment
env = gym.make('MsPacman-v0')
n_outputs = env.action_space.n
print(n_outputs)
print(env.env.get_action_meanings())
observation = env.reset()
import tensorflow as tf
import matplotlib.pyplot as plt
for i in range(22):
if i > 20:
plt.imshow(observation)
plt.show()
observation, _, _, _ = env.step(1)
#Okay, Now we define a function called q_network for building our Q network. We input the game state to the Q network
#and get the Q values for all the actions in that state.
#We build Q network with three convolutional layers with same padding followed by a fully connected layer.
tf.compat.v1.reset_default_graph()
def q_network(X, name_scope):
# Initialize layers
initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=2.0)
with tf.compat.v1.variable_scope(name_scope) as scope:
# initialize the convolutional layers
#layer_1 = tf.keras.layers.Conv2D(X, 32, kernel_size=(8,8), stride=4, padding='SAME', weights_initializer=initializer)
layer_1_set = Conv2D(32, (8,8), strides=4, padding="SAME", kernel_initializer=initializer)
layer_1= layer_1_set(X)
tf.compat.v1.summary.histogram('layer_1',layer_1)
#layer_2 = tf.keras.layers.Conv2D(layer_1, 64, kernel_size=(4,4), stride=2, padding='SAME', weights_initializer=initializer)
layer_2_set = Conv2D(64, (4,4), strides=2, padding="SAME", kernel_initializer=initializer)
layer_2= layer_2_set(layer_1)
tf.compat.v1.summary.histogram('layer_2',layer_2)
#layer_3 = tf.keras.layers.Conv2D(layer_2, 64, kernel_size=(3,3), stride=1, padding='SAME', weights_initializer=initializer)
layer_3_set = Conv2D(64, (3,3), strides=1, padding="SAME", kernel_initializer=initializer)
layer_3= layer_3_set(layer_2)
tf.compat.v1.summary.histogram('layer_3',layer_3)
flatten_layer = Flatten() # instantiate the layer
flat = flatten_layer(layer_3)
fc_set = Dense(128, kernel_initializer=initializer)
fc=fc_set(flat)
tf.compat.v1.summary.histogram('fc',fc)
#Add final output layer
output_set = Dense(n_outputs, activation= None, kernel_initializer=initializer)
output= output_set(fc)
tf.compat.v1.summary.histogram('output',output)
vars = {v.name[len(scope.name):]: v for v in tf.compat.v1.get_collection(key=tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}
#Return both variables and outputs together
return vars, output
#Next we define a function called epsilon_greedy for performing epsilon greedy policy. In epsilon greedy policy we either select the best action
#with probability 1 - epsilon or a random action with probability epsilon.
#We use decaying epsilon greedy policy where value of epsilon will be decaying over time
#as we don't want to explore forever. So over time our policy will be exploiting only good actions.
epsilon = 0.5
eps_min = 0.05
eps_max = 1.0
eps_decay_steps = 500000
def epsilon_greedy(action, step):
p = np.random.random(1).squeeze()
epsilon = max(eps_min, eps_max - (eps_max-eps_min) * step/eps_decay_steps)
if np.random.rand() < epsilon:
return np.random.randint(n_outputs)
else:
return action
#Now, we initialize our experience replay buffer of length 20000 which holds the experience.
#We store all the agent's experience i.e (state, action, rewards) in the
#experience replay buffer and we sample from this minibatch of experience for training the network.
buffer_len = 20000
exp_buffer = deque(maxlen=buffer_len)
# Now we define our network hyperparameters,
num_episodes = 800
batch_size = 48
input_shape = (None, 88, 80, 1)
learning_rate = 0.001
X_shape = (None, 88, 80, 1)
discount_factor = 0.97
global_step = 0
copy_steps = 100
steps_train = 4
start_steps = 2000
logdir = 'logs'
tf.compat.v1.reset_default_graph()
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# Now we define the placeholder for our input i.e game state
X = tf.placeholder(tf.float32, shape=X_shape)
#X = tf.Variable(tf.float32, tf.ones(shape=X_shape))
# we define a boolean called in_training_model to toggle the training
in_training_mode = tf.placeholder(tf.bool)
# we build our Q network, which takes the input X and generates Q values for all the actions in the state
mainQ, mainQ_outputs = q_network(X, 'mainQ')
# similarly we build our target Q network, for policy evaluation
targetQ, targetQ_outputs = q_network(X, 'targetQ')
# define the placeholder for our action values
X_action = tf.placeholder(tf.int32, shape=(None,))
Q_action = tf.reduce_sum(targetQ_outputs * tf.one_hot(X_action, n_outputs), axis=-1, keepdims=True)
#Copy the primary Q network parameters to the target Q network
copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
copy_target_to_main = tf.group(*copy_op)
#Compute and optimize loss using gradient descent optimizer
# define a placeholder for our output i.e action
y = tf.placeholder(tf.float32, shape=(None,1))
# now we calculate the loss which is the difference between actual value and predicted value
loss = tf.reduce_mean(tf.square(y - Q_action))
# we use adam optimizer for minimizing the loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
loss_summary = tf.summary.scalar('LOSS', loss)
merge_summary = tf.summary.merge_all()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
Ok up to here, the error come out when i run this cell in colab :
#Copy the primary Q network parameters to the target Q network
copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
copy_target_to_main = tf.group(*copy_op)
The error gives:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-13-58715282cea8> in <module>()
----> 1 copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
2 copy_target_to_main = tf.group(*copy_op)
<ipython-input-13-58715282cea8> in <listcomp>(.0)
----> 1 copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
2 copy_target_to_main = tf.group(*copy_op)
KeyError: '/conv2d_1/kernel:0'
I have two question?
First, how to solve the question that already stated above.
Second, in tensor-flow 2.0 above,the placeholder command is replaced by tf.Variable, i rewrite the code:
X = tf.placeholder(tf.float32, shape=X_shape) to become
X = tf.Variable(tf.float32, tf.ones(shape=X_shape))
and still get error, and i have to use command below:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
X = tf.placeholder(tf.float32, shape=X_shape)
but get warning like this:
WARNING:tensorflow:From /usr/local/lib/python3.6/dist- packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating: non-resource variables are not supported in the long term
I doing intensive searching in the Stack overflow website by using keyword, yet i can't find solution. Really looking forward to any advise. Thank you very much.

Keras fit_generator with images from directory and a constant tensor

I have a simple CNN with input images of shape (5,5,3). As a first step I want to add a constant tensor to the input.
According to the answer in my previous SO question, I have to define the constant tensor as an input layer (const_input), so that I can Add() it to the image data (raw_input). The model is compiled without errors:
from __future__ import print_function
import tensorflow as tf
import numpy as np
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Add
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
# Python 2.7.10 - keras version 2.2.0 - tf.VERSION '1.8.0'
cnn_layer1 = Conv2D(32, (4, 4), activation='relu')
cnn_layer2 = MaxPooling2D(pool_size=(2, 2))
cnn_layer3 = Dense(64, activation='relu')
cnn_layer4 = Dropout(0.1)
cnn_output = Dense(2, activation='softmax')
raw_input = Input(shape=(5, 5, 3))
const_input = Input(shape=(5, 5, 3))
pre_proc = Add()([raw_input, const_input])
lay1 = cnn_layer1(pre_proc)
lay2 = cnn_layer2(lay1)
lay3 = Flatten()(lay2)
lay4 = cnn_layer3(lay3)
lay5 = cnn_layer4(lay4)
lay_out = cnn_output(lay5)
model = Model(inputs=[raw_input, const_input], outputs=lay_out)
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
Now I try to provide the constant tensor as an input along with the images that are read from directory:
batch_size = 10
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
'cd_data/train',
target_size=(5, 5),
classes=['cat', 'dog'],
batch_size=batch_size)
validation_generator = validation_datagen.flow_from_directory(
'cd_data/validation',
target_size=(5, 5),
classes=['cat', 'dog'],
batch_size=batch_size)
const_array = np.array(
[[[5.0,0.0,0.0],[0.0,0.0,-3.0],[-10.0,0.0,0.0],[0.0,0.0,4.0],[-20.0,0.0,0.0]],
[[-15.0,0.0,12.0],[0.0,4.0,0.0],[-3.0,0.0,10.0],[-18.0,0.0,0.0],[20.0,0.0,-6.0]],
[[0.0,0.0,6.0],[0.0,-2.0,-6.0],[0.0,0.0,2.0],[0.0,0.0,-9.0],[7.0,-6.0,0.0]],
[[-3.0,4.0,0.0],[11.0,-12.0,0.0],[0.0,0.0,0.0],[0.0,0.0,7.0],[0.0,0.0,2.0]],
[[0.0,0.0,0.0],[0.0,1.0,-2.0],[4.0,0.0,3.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]])
def merge_generator():
while True:
next_image = train_generator.next()
yield [next_image[0], const_array], next_image[1]
train_gen_with_const = merge_generator()
Executing the fit_generator leads to error below
model.fit_generator(
train_gen_with_const,
steps_per_epoch=2,
epochs=1,
verbose=2, # one line per epoch
validation_data=validation_generator,
validation_steps=2)
ValueError: Error when checking input: expected input_2 to have 4 dimensions, but got array with shape (5, 5, 3)
I tried to provide the missing dimension like this
const_batch = np.broadcast_to(const_array, (batch_size, 5, 5, 3))
def merge_generator():
while True:
next_image = train_generator.next()
yield [next_image[0], const_batch], next_image[1]
but this leads to
ValueError: All input arrays (x) should have the same number of samples. Got array shapes: [(2, 5, 5, 3), (10, 5, 5, 3)]
What is the right way to provide this constant tensor input?
Any help is highly appreciated!
The problem lies with your validation_data= argument; your model expects two input arrays, whereas validation_generator supplies only one. You fixed this with train_gen_with_const - just extend it to val:
def merge_generator(): # const_batch inside the function to apply to each image
while True:
next_image = train_generator.next()
const_batch = np.broadcast_to(const_array, (len(next_image[0]), 5, 5, 3))
yield [next_image[0], const_batch], next_image[1]
def val_merge_generator():
while True:
next_image = validation_generator.next()
const_batch = np.broadcast_to(const_array, (len(next_image[0]), 5, 5, 3))
yield [next_image[0], const_batch], next_image[1]
Remember, internally, fit_generator calls train_on_batch(x, y) and evaluate(x, y) - so each must receive the same dimensionality for x and y from both generators.

keras trainable attribute not compatible with tensorflow?

It seems that keras trainable attribute is ignored by tensorflow, which makes it very inconvenient to use keras as a syntactical shortcut in tensorflow.
For example:
import keras
import tensorflow as tf
import numpy as np
import keras.backend as K
Conv2 = keras.layers.Conv2D(filters=16, kernel_size=3, padding='same')
Conv2.trainable = False #This layers has been set to not trainable.
A=keras.layers.Input(batch_shape=(1,16,16,3))
B = Conv2(A)
x = np.random.randn(1, 16, 16,3)
y = np.random.randn(1,16, 16, 16)
True_y = tf.placeholder(shape=(1,16,16,16), dtype=tf.float32)
loss = tf.reduce_sum((B - True_y) ** 2)
opt_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
print(tf.trainable_variables())
# [<tf.Variable 'conv2d_1/kernel:0' shape=(3, 3, 3, 16) dtype=float32_ref>, <tf.Variable 'conv2d_1/bias:0' shape=(16,) dtype=float32_ref>]
sess = K.get_session()
for _ in range(10):
out = sess.run([opt_op, loss], feed_dict={A:x, True_y:y})
print(out[1])
OutPut:
5173.94
4968.7754
4785.889
4624.289
4482.1
4357.5757
4249.1504
4155.329
4074.634
4005.6482
It simply means the loss is decreasing and the weights are trainable.
I read the blog ''Keras as a simplified interface to TensorFlow'', but it mentioned nothing about the trainable problem.
Any suggestion is appreciated.
Your conclusion is basically correct. Keras is a wrapper around TensorFlow, but not all Keras functionality transfers directly into TensorFlow, so you need to be careful when you mix Keras and raw TF.
Specifically, in this case, if you want to call the minimize function yourself, you need to specify which variables you want to train on using the var_list argument of minimize.

Tensorflow vgg16 prediction dramatically slow

I use vgg.h5 model + Keras (Tensorflow backend on GPU) for real-time objects classification. It works good.
Then I try to use pure tensorflow graphs with weights from vgg.h5:
I've parsed vgg.h5 with h5py and have received weights for all layers in numpay.array format
I've built a graph (I store kernels and biases in tf.Variable)
But I can't receive output prediction vector. After investigation I've found out that all convolutional layers work, but the first full connection layer output (fc1 with 25088 x 4096 weights matrix) in vgg16 calculated for about 5 minutes. It's not appropriate for real time classification.
So, maybe anyone has experience of building vgg16 from scratch in tensorflow and can help? Why tensorflow as Keras backend works good, but pure tensorflow (with the same weights) can't calculate full connection output? Is there any additional optimisation in Keras for realising full connection (Dense) layers?
Here is a test variant of your code, instrumented with printing shapes of tensors in several places:
import tensorflow as tf
import numpy as np
with tf.Session() as sess:
# mock the previous layer's output with a placeholder
pool5_input = tf.placeholder(dtype = tf.float32, shape = (None,7,7,512))
# insert a print operation to print the shape
pool5 = tf.Print(pool5_input, [ tf.shape(pool5_input) ], "pool5 shape is ", summarize = 4)
layer_name = 'fc1'
wd = tf.Variable(np.ones((25088, 4096), dtype='float32'), trainable=False, name=layer_name+'_wd')
bd = tf.Variable(np.ones((4096,), dtype='float32'), trainable=False, name=layer_name+'_bd')
layer_shape = [-1, wd.get_shape().as_list()[0]]
print('layer_shape:', layer_shape)
fc1_flat = tf.reshape(pool5, shape=layer_shape)
fc1_flat = tf.Print(fc1_flat, [ tf.shape(fc1_flat) ], "fc1_flat shape is ")
fc1 = tf.nn.relu( tf.nn.bias_add( tf.matmul(fc1_flat, wd, name=layer_name), bd ) )
fc1 = tf.Print(fc1, [ tf.shape(fc1) ], "fc1 shape is ")
import time
sess.run(tf.global_variables_initializer())
# evaluate network for in input of (minibatch_size, 7, 7, 512)
minibatch_size = 32
start = time.time()
output = sess.run(fc1, feed_dict = { pool5_input: np.ones((minibatch_size, 7, 7, 512), dtype = 'float32')})
elapsed = time.time() - start
print("time to evaluate fully connected layer for minibatch size %d: %.3f seconds" % (minibatch_size, elapsed))
print("output shape is",output.shape)
I get the following output:
layer_shape: [-1, 25088]
...: I tensorflow/core/kernels/logging_ops.cc:79] pool5 shape is [32 7 7 512]
...: I tensorflow/core/kernels/logging_ops.cc:79] fc1_flat shape is [32 25088]
...: I tensorflow/core/kernels/logging_ops.cc:79] fc1 shape is [32 4096]
time to evaluate fully connected layer for minibatch size 32: 0.329 seconds
output shape is (32, 4096)
so for me it takes less than a second (on a GPU) for a minibatch size of 32.
You could insert similar tf.Print() statements into your code and verify that you have the same (or similar) dimensions. By multiplying the sizes of the dimensions you can see how much memory is used at each stage.