Tensorflow Executor failed to create kernel on GPU

Tensorflow Executor failed to create kernel on GPU - tensorflow

I am trying to train a network using tensorflow on GPU. But this warning is thrown out during the training process.
I checked free memory size of the gpu. It seems ok.
E tensorflow/core/common_runtime/executor.cc:623] Executor failed to > create kernel. Invalid argument: Default AvgPoolingOp only supports
NHWC on device type CPU
[[{{node vgg_src/pool1}} = AvgPool[T=DT_FLOAT,
data_format="NCHW", ksize=[1, 1, 2, 2], padding="SAME", strides=[1,
1, 2, 2], _device="/job:localhost/replica:0/task:0/device:GPU:0"]
(vgg_src/conv1_2/Relu)]]
Although I can train and run the network properly, I still want to know what causes this problem. And how could I fix this problem?
--
Update
Add the code of my model which is a modified vgg-16 network.
import os
import tensorflow as tf
import numpy as np
import pdb
vgg_mean = [0.485, 0.456, 0.406]
vgg_std = [0.229, 0.224, 0.225]
data = None
dir_path = os.path.dirname(os.path.realpath(__file__))
# dir_path = os.path.normpath(os.path.join(dir_path, os.pardir))
weights_path = os.path.join(dir_path, 'models', 'vgg16_onnx.npy')
class Model():
def __init__(self, vgg16_npy_path=None):
global data
if vgg16_npy_path is None:
path = weights_path
print(path)
if os.path.exists(path):
vgg16_npy_path = path
else:
print("VGG16 weights were not found in the project directory!")
exit(0)
if data is None:
data = np.load(vgg16_npy_path, encoding='latin1')
self.data_dict = data.item()
print("VGG16 weights loaded")
else:
self.data_dict = data.item()
def build(self, bgr_input):
'''notice that opencv load image with bgr order, but the pretrained model is designed for rgb'''
blue, green, red = tf.split(axis=3, num_or_size_splits=3, value=bgr_input)
rgb = tf.concat(axis=3, values=[
(red - vgg_mean[0])/vgg_std[0],
(green - vgg_mean[1])/vgg_std[1],
(blue - vgg_mean[2])/vgg_std[2],
])
self.conv1_1 = self.conv_layer(rgb, "conv1_1")
self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
self.pool1 = self.avg_pool(self.conv1_2, 'pool1')
self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
self.pool2 = self.avg_pool(self.conv2_2, 'pool2')
self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
self.pool3 = self.avg_pool(self.conv3_3, 'pool3')
self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
self.pool4 = self.avg_pool(self.conv4_3, 'pool4')
self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
self.pool5 = self.avg_pool(self.conv5_3, 'pool5')
self.fc6 = self.fc_layer(self.pool5, 'fc6')
self.fc7 = self.fc_layer(self.fc6, 'fc7')
self.fc8 = self.fc_layer(self.fc7, 'fc8')
self.data_dict = None
def avg_pool(self, bottom, name):
return tf.nn.avg_pool(bottom,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def max_pool(self, bottom, name):
return tf.nn.max_pool(bottom,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def conv_layer(self, bottom, name, stride = 1):
with tf.variable_scope(name):
filt = self.get_conv_filter(name)
conv = tf.nn.conv2d(bottom, filt, [1, stride, stride, 1], padding='SAME')
conv_biases = self.get_bias(name)
bias = tf.nn.bias_add(conv, conv_biases)
mean = self.get_mean(name)
variance = self.get_variance(name)
offset = self.get_beta(name)
scale = self.get_gamma(name)
norm = tf.nn.batch_normalization(bias, mean, variance, offset, scale, 1e-20 )
relu = tf.nn.relu(norm)
return relu
def fc_layer(self, bottom, name):
with tf.variable_scope(name):
shape = bottom.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(bottom, [-1, dim])
weights = self.get_fc_weight(name)
biases = self.get_bias(name)
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
return fc
def get_mean(self, name):
return tf.constant(self.data_dict[name][4], name = "mean")
def get_variance(self, name):
return tf.constant(self.data_dict[name][5], name = "variance")
def get_gamma(self, name):
return tf.constant(self.data_dict[name][2], name = "gamma")
def get_beta(self, name):
return tf.constant(self.data_dict[name][3], name = "beta")
def get_conv_filter(self, name):
return tf.constant(np.rollaxis(np.rollaxis(np.rollaxis(self.data_dict[name][0], 1), 2), 3), name="filter")
def get_bias(self, name):
return tf.constant(self.data_dict[name][1], name="biases")
def get_fc_weight(self, name):
return tf.constant(np.rollaxis(self.data_dict[name][0], 1), name="weights")

This is not GPU-memory size issue.
Read the thread.
https://github.com/tensorpack/tensorpack/issues/263
You may be need to change the code according to the thread
TensorFlow only supports NHWC on cpu,
Check out these two lines in this thread. May be you need to change the parameter
- with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \
+ with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NHWC')
with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NHWC')

Related

Custom layer with tf.extract_image_patches extremely slow

I'm new to tensorflow. I'm trying to implement a custom pooling layer, using owa operators (https://github.com/jiforcen/ordered-weighted-pooling). For that I'm using tf.extract_image_patches, but this operation is extremely slow when the input data dimensions are large, as raised here Issue #13017.
I believe that the pooling layer that I implemented has a behavior very similar to what is performed by the tf.keras.layers.MaxPooling2D layer. Inspecting the code of MaxPooling2D I saw that it calls the gen_nn_ops.max_pool method.
I tried to take a look at what's inside gen_nn_ops.max_pool, but I can't find it in the repository. From what I've googled I can't find the source code because it's automatically generated by bazel. If I build from source, I'll see this file inside bazel-genfiles, right? This file contains automatically generated Python wrappers to underlying C++ implementations.
Is it possible to create a custom pooling operation in C++ and use it in my Python code? I'm adding the custom layer code that I implemented.
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.python.util.tf_export import keras_export
from tensorflow.python.keras.utils import conv_utils
# import skimage.measure
#keras_export('keras.constraints.UnitSumNonNeg', 'keras.constraints.unit_sum_non_neg')
class UnitSumNonNeg(Constraint):
"""Limits weights to be non-negative and with sum equal to one
Also available via the shortcut function `keras.constraints.unit_sum_non_neg`.
"""
def __call__(self, w):
aux = w * tf.cast(tf.math.greater_equal(w, 0.), w.dtype)
return aux/(K.epsilon() + tf.reduce_sum(aux, axis=[0], keepdims=True))
class OWAPoolingNew(tf.keras.layers.Layer):
def __init__(self,
pool_size=(2, 2),
strides=None,
padding='valid',
data_format=None,
name=None,
sort=True,
train=True,
seed=None,
all_channels=False,
**kwargs):
super(OWAPoolingNew, self).__init__(name=name, **kwargs)
self.pool_size = pool_size
self.strides = pool_size if strides == None else strides
self.padding = padding
self.data_format = conv_utils.normalize_data_format('channels_last')
self.sort = sort
self.train = train
self.seed = seed if seed != None else 10
self.all_channels = all_channels
def build(self, input_shape):
if self.all_channels:
weights_shape = (self.pool_size[0] * self.pool_size[1], input.shape[-1])
else:
weights_shape = (self.pool_size[0] * self.pool_size[1], 1)
tf.random.set_seed(self.seed)
kernel = tf.random.uniform(shape=weights_shape)
kernel /= tf.reduce_sum(kernel, axis=[0], keepdims=True)
self.kernel = tf.Variable(initial_value = kernel, trainable=self.train, dtype='float32', constraint=UnitSumNonNeg())
# def owapool(self, a, axis=[]):
# a = tf.reshape(a, shape=a.shape[0:4]+(-1,))
# a = tf.sort(a, direction='DESCENDING', axis=-1)
# return tf.reduce_sum(tf.math.multiply(self.kernel, a), axis=-1)
def call(self, inputs):
_, height, width, channels = inputs.get_shape().as_list()
if(self.padding.upper()=='SAME'): # Complete size to pad 'SAME'
pad_bottom = self.pool_size[0] * height%self.pool_size[0]
pad_right = self.pool_size[1] * width%self.pool_size[1]
paddings = tf.constant([[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]])
inputs = tf.pad(inputs, paddings, "CONSTANT")
# Extract pooling regions
stride = [1, self.strides[0], self.strides[1], 1]
ksize = [1, self.pool_size[0], self.pool_size[1], 1]
x_tensor_p = tf.image.extract_patches(inputs, sizes = ksize, strides = stride,
rates = [1, 1, 1, 1], padding='SAME')
_, pool_height, pool_width, elems = x_tensor_p.get_shape().as_list()
# Extract pooling regions for each channel
elems = int(elems / channels)
inputs = tf.reshape(inputs, [-1, pool_height, pool_width, elems, channels]) # Reshape tensor
# Sort values for pooling
if self.sort:
inputs = tf.sort(inputs, axis=-2, direction='DESCENDING', name=None)
outputs = tf.reduce_sum(tf.math.multiply(self.kernel, inputs), axis=-2)
return outputs
```

Tensorflow: Implemented Hourglass model for human pose estimation but it's not converging

I am new to tensorflow. I have to make human pose estimator. I implemented stacked hourglass model but it's not converging.
here's my code. I am not expert in tensorflow so I thought maybe my code have fault or I didn't understand the paper properly.
x_image is the input image (-1,256,256,3) and y_true is the set of heat map (-1,128,128,12). Each heatmap have one peak which specifies the position of joint.
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.reset_default_graph()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape,stddev = 0.1)
return tf.Variable(init_random_dist)
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape = shape)
return tf.Variable(init_bias_vals)
def conv2d(x,W):
# x --> [batch, H, W, Channels]
# W --> [filter H, filter W, Channels In, Channels Out]
return tf.nn.conv2d(x, W, strides = [1,1,1,1], padding = 'SAME')
def max_pool_2by2(x):
# x --> [batch,h,w,c]
return tf.nn.max_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
def convolutional_layer(input_x, shape):
W = init_weights(shape)
b = init_bias([shape[3]])
return tf.nn.relu(tf.nn.bias_add(conv2d(input_x, W) ,b))
def residual_layer(convo):
convo_1 = convolutional_layer(convo, [1,1,256,128])
convo_2 = convolutional_layer(convo_1, [3,3,128,128])
convo_3 = convolutional_layer(convo_2, [1,1,128,256])
return convo_3
def hourglass(convo, size):
if size == 4.0:
convo = residual_layer(convo)
convo = residual_layer(convo)
convo = residual_layer(convo)
return convo
convo = residual_layer(convo)
convo_1 = tf.nn.max_pool(convo, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
convo = hourglass(convo_1, size/2.0)
convo_1 = residual_layer(convo_1)
convo = tf.add(convo, convo_1)
convo = tf.image.resize_nearest_neighbor(convo, (int(size),int(size)))
return convo
x = tf.compat.v1.placeholder(tf.float32,shape=[None, 256, 256 , 3])
y_true = tf.placeholder(tf.float32, shape = [None, 128, 128, 12])
x_image = tf.reshape(x, [-1, 256, 256, 3])
# input
convo = convolutional_layer(x_image, shape = [7,7,3,256])
convo = hourglass(convo, 256.0)
convo = convolutional_layer(convo, shape = [3,3,256,12])
y_pred = tf.nn.max_pool(convo, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
loss = tf.reduce_mean(tf.squared_difference(y_pred, y_true))
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()

Itg is a bit difficult to say without knowing the full context and data. It seems you have just one image? That is most likely not enough for training.
Maybe you can also draw the model that you want to achieve in tensorflow?
You could try to change the learning rate to some other value (e.g. larger) to make it converge. Possibly you need to implement a scheduler for the learning rate to dynamically adapt it depending on the situation (see for an example: https://www.tensorflow.org/tutorials/text/transformer?hl=en).

different results in inference between python and c++ opencv Mat::

i'm doing a re identification network, implementing a triplet-loss function, at that point everything is fine. the networks works fine in python, I implemented the network on keras with tensorflow as backend, I passed the .hd5 to a .pb file to make inference in tensorflow c++, the probmes is that with the same images the result is difference between python and c++ and I don't know why anyone to help me?
here is the the model in python:
import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
}
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()
def create_features_model(self):
# Define the vision modules
img_input = layer.Input(shape=(self.shape))
x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)
x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)
x = layer.Flatten()(x)
x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
features_model = md(img_input, out)
features_model.summary()
return features_model
def create_model(self):
base_model = self.feature_model
# triplet framework, shared weights
input_shape = (self.shape)
input_target = layer.Input(shape=input_shape, name='input_target')
input_positive = layer.Input(shape=input_shape, name='input_pos')
input_negative = layer.Input(shape=input_shape, name='input_neg')
net_target = base_model(input_target)
net_positive = base_model(input_positive)
net_negative = base_model(input_negative)
# The Lamda layer produces output using given function. Here its Euclidean distance.
positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])
# This lambda layer simply stacks outputs so both distances are available to the objective
distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
[positive_distance, negative_distance, diference])
model = md([input_target, input_positive, input_negative], distances, name='result')
# Setting up optimizer designed for variable learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
loss=self.triplet_loss, metrics=[self.accuracy])
return model
def triplet_loss(self, _, y_pred):
margin = K.constant(0.5)
return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def accuracy(self, _, y_pred):
return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])
def lnorm(self, x):
return K.l2_normalize(x, axis=-1)
def euclidean_distance(self, vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
this is how I made inference on python:
from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative
else:
print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")
tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
net.triplet_model.load_weights("/home/amejia/PycharmProjects/triplet_loss/simple-grande.hdf5")
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
print(a)
the inference in c++ :
in c++ this si how I made inference:
tensorflow::Tensor target(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
img_to_float2(tracks, detections, target, positive, frame);
std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
{"input_pos:0", positive},
{"input_neg:0", target}};
std::vector<tensorflow::Tensor> Outputs;
tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);
auto data = Outputs[0].flat<float>();
std::cout << Outputs[0].DebugString() << std::endl;
and this is the function to put create the in tensor:
void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {
auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();
resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);
cv::Mat trackImg = tracks.get_img().clone();
resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);

Tensorflow code runs fine with CPU but when I try to run code on GPU using jupyter kernel dies

I have my kernel dying each time I run this code on GPU. The problem can not be cuda or cudnn related since I can run another code on GPU.
#define shapes
weight_shapes = [(8,8,4,32), (4,4,32,64), (3,3,64,64), (3136,512),
(512,action_size), (32), (64), (64), (512), (action_size)]
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
x = tf.nn.bias_add(x, b)
#x = tf.layers.batch_normalization(x,training = True,trainable = True,epsilon = 1e-5,name = 'batch_norm1')
return tf.nn.relu(x)
def dense(x, W, b):
return tf.add(tf.matmul(x, W), b)
def policy(inputs_, W):
with tf.name_scope("conv1"):
# Input is 84x84x4
conv1 = conv2d(inputs_, W[0], W[5], strides=4)
with tf.name_scope("conv2"):
conv2 = conv2d(conv1, W[1], W[6], strides=2)
with tf.name_scope("conv3"):
conv3 = conv2d(conv2, W[2], W[7], strides=1)
## --> [7, 7, 64]
with tf.name_scope("flatten"):
flatten = tf.contrib.layers.flatten(conv3)
## --> [3136]
with tf.name_scope("fc1"):
fc1 = tf.nn.relu(dense(flatten, W[3], W[8]))
with tf.name_scope("logits"):
logits = dense(fc1, W[4], W[9])
return logits
And the main part of code is:
################################
### build the network policy####
################################
class PGNetwork:
def __init__(self, state_size, action_size, learning_rate):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
self.weights = [
tf.get_variable('wc1', shape = weight_shapes[0], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wc2', shape = weight_shapes[1], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wc3', shape = weight_shapes[2], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wd1', shape = weight_shapes[3], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('wd2', shape = weight_shapes[4], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc1', shape = weight_shapes[5], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc2', shape = weight_shapes[6], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bc3', shape = weight_shapes[7], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bd1', shape = weight_shapes[8], initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bd2', shape = weight_shapes[9], initializer=tf.contrib.layers.xavier_initializer())
]
with tf.name_scope("inputs"):
# We create the placeholders
# *state_size means that we take each elements of state_size in tuple hence is like if we wrote
# [None, 84, 84, 4] #the first argument is related to batch size
self.inputs_= tf.placeholder(tf.float32, [None, *state_size], name="inputs_")
self.actions = tf.placeholder(tf.int32, [None, action_size], name="actions")
self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ], name="discounted_episode_rewards_")
self.flat_multiplier_tensor = tf.placeholder(tf.float32, shape = [None])
# Add this placeholder for having this variable in tensorboard
self.mean_reward_ = tf.placeholder(tf.float32, name="mean_reward")
with tf.variable_scope('PGNetwork'):
self.logits = policy(self.inputs_, self.weights)
with tf.name_scope("softmax"):
self.action_distribution = tf.nn.softmax(self.logits)
with tf.name_scope("sample_gradient"):
self.split_inputs = tf.unstack(self.inputs_, num = batch_size_zero_padded, axis=0)
self.split_actions = tf.unstack(self.actions, num = batch_size_zero_padded, axis = 0)
self.intermediate = [tf.expand_dims(self.split_inputs[i], 0) for i in range(batch_size_zero_padded)]
and then when I try to run the following code, kernel dies: The kernel appears to have died. It will restart automatically.
# Reset the graph
tf.reset_default_graph()
# Instantiate the PGNetwork
PGNetwork = PGNetwork(state_size, action_size, learning_rate)
# Initialize Session
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
I would appreciate any idea on what might be going wrong! I am using python 3.6 with tensorflow-gpu 1.9.0.

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

I defined a model function which named "drrn_model". While I was training my model, I use model by:
shared_model = tf.make_template('shared_model', drrn_model)
train_output = shared_model(train_input, is_training=True)
It begin training step by step, and I can restore .ckpt file to the model when I want to continue to train the model from an old point.
But there is a problem when I test my trained model.
I use the code below directly without using tf.make_template:
train_output = drrn_model(train_input, is_training=False)
Then the terminal gave me a lots of NotFoundError like "Key LastLayer/Variable_2 not found in checkpoint".
But when I use
shared_model = tf.make_template('shared_model', drrn_model)
output_tensor = shared_model(input_tensor,is_training=False)
It can test normally.
So why we must use tf.make_template() again in testing stage. What is the difference between drrn_model and make_template when we construct our model.
And there is another question: the BN layer in tensorflow.
I have tried many ways but the outputs is always wrong(always worse then the version without BN layer).
There is my newest version of model with BN layer:
tensor = None
def drrn_model(input_tensor, is_training):
with tf.device("/gpu:0"):
with tf.variable_scope("FirstLayer"):
conv_0_w = tf.get_variable("conv_w", [3, 3, 1, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
tensor = tf.nn.conv2d(tf.nn.relu(batchnorm(input_tensor, is_training= is_training)), conv_0_w, strides=[1,1,1,1], padding="SAME")
first_layer = tensor
### recursion ###
with tf.variable_scope("recycle", reuse=False):
tensor = drrnblock(first_layer, tensor, is_training)
for i in range(1,10):
with tf.variable_scope("recycle", reuse=True):
tensor = drrnblock(first_layer, tensor, is_training)
### end layer ###
with tf.variable_scope("LastLayer"):
conv_end_w = tf.get_variable("conv_w", [3, 3, 128, 1], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv_end_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(tensor, is_training= is_training)), conv_end_w, strides=[1, 1, 1, 1], padding='SAME')
tensor = tf.add(input_tensor,conv_end_layer)
return tensor
def drrnblock(first_layer, input_layer, is_training):
conv1_w = tf.get_variable("conv1__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv1_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(input_layer, is_training= is_training)), conv1_w, strides=[1,1,1,1], padding= "SAME")
conv2_w = tf.get_variable("conv2__w", [3, 3, 128, 128], initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / 9)))
conv2_layer = tf.nn.conv2d(tf.nn.relu(batchnorm(conv1_layer, is_training=is_training)), conv2_w, strides=[1, 1, 1, 1], padding="SAME")
tensor = tf.add(first_layer, conv2_layer)
return tensor
def batchnorm(inputs, is_training, decay = 0.999):# there is my BN layer
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
print("batch_mean.shape: ", batch_mean.shape)
train_mean = tf.assign(pop_mean, pop_mean*decay+batch_mean*(1-decay))
train_var = tf.assign(pop_var, pop_var*decay+batch_var*(1-decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,batch_mean,batch_var,beta,scale,variance_epsilon=1e-3)
else:
return tf.nn.batch_normalization(inputs,pop_mean,pop_var,beta,scale,variance_epsilon=1e-3)
Please tell me where is wrong in my code.
Thanks a lot!!

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Tensorflow Executor failed to create kernel on GPU - tensorflow

Related

Custom layer with tf.extract_image_patches extremely slow

Tensorflow: Implemented Hourglass model for human pose estimation but it's not converging

different results in inference between python and c++ opencv Mat::

Tensorflow code runs fine with CPU but when I try to run code on GPU using jupyter kernel dies

why if we use "tf.make_template()" in training stage, we must use tf.make_template() again in testing stage

Categories

Resources