Custom layer with tf.extract_image_patches extremely slow

Custom layer with tf.extract_image_patches extremely slow - tensorflow

I'm new to tensorflow. I'm trying to implement a custom pooling layer, using owa operators (https://github.com/jiforcen/ordered-weighted-pooling). For that I'm using tf.extract_image_patches, but this operation is extremely slow when the input data dimensions are large, as raised here Issue #13017.
I believe that the pooling layer that I implemented has a behavior very similar to what is performed by the tf.keras.layers.MaxPooling2D layer. Inspecting the code of MaxPooling2D I saw that it calls the gen_nn_ops.max_pool method.
I tried to take a look at what's inside gen_nn_ops.max_pool, but I can't find it in the repository. From what I've googled I can't find the source code because it's automatically generated by bazel. If I build from source, I'll see this file inside bazel-genfiles, right? This file contains automatically generated Python wrappers to underlying C++ implementations.
Is it possible to create a custom pooling operation in C++ and use it in my Python code? I'm adding the custom layer code that I implemented.
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.constraints import Constraint
from tensorflow.python.util.tf_export import keras_export
from tensorflow.python.keras.utils import conv_utils
# import skimage.measure
#keras_export('keras.constraints.UnitSumNonNeg', 'keras.constraints.unit_sum_non_neg')
class UnitSumNonNeg(Constraint):
"""Limits weights to be non-negative and with sum equal to one
Also available via the shortcut function `keras.constraints.unit_sum_non_neg`.
"""
def __call__(self, w):
aux = w * tf.cast(tf.math.greater_equal(w, 0.), w.dtype)
return aux/(K.epsilon() + tf.reduce_sum(aux, axis=[0], keepdims=True))
class OWAPoolingNew(tf.keras.layers.Layer):
def __init__(self,
pool_size=(2, 2),
strides=None,
padding='valid',
data_format=None,
name=None,
sort=True,
train=True,
seed=None,
all_channels=False,
**kwargs):
super(OWAPoolingNew, self).__init__(name=name, **kwargs)
self.pool_size = pool_size
self.strides = pool_size if strides == None else strides
self.padding = padding
self.data_format = conv_utils.normalize_data_format('channels_last')
self.sort = sort
self.train = train
self.seed = seed if seed != None else 10
self.all_channels = all_channels
def build(self, input_shape):
if self.all_channels:
weights_shape = (self.pool_size[0] * self.pool_size[1], input.shape[-1])
else:
weights_shape = (self.pool_size[0] * self.pool_size[1], 1)
tf.random.set_seed(self.seed)
kernel = tf.random.uniform(shape=weights_shape)
kernel /= tf.reduce_sum(kernel, axis=[0], keepdims=True)
self.kernel = tf.Variable(initial_value = kernel, trainable=self.train, dtype='float32', constraint=UnitSumNonNeg())
# def owapool(self, a, axis=[]):
# a = tf.reshape(a, shape=a.shape[0:4]+(-1,))
# a = tf.sort(a, direction='DESCENDING', axis=-1)
# return tf.reduce_sum(tf.math.multiply(self.kernel, a), axis=-1)
def call(self, inputs):
_, height, width, channels = inputs.get_shape().as_list()
if(self.padding.upper()=='SAME'): # Complete size to pad 'SAME'
pad_bottom = self.pool_size[0] * height%self.pool_size[0]
pad_right = self.pool_size[1] * width%self.pool_size[1]
paddings = tf.constant([[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]])
inputs = tf.pad(inputs, paddings, "CONSTANT")
# Extract pooling regions
stride = [1, self.strides[0], self.strides[1], 1]
ksize = [1, self.pool_size[0], self.pool_size[1], 1]
x_tensor_p = tf.image.extract_patches(inputs, sizes = ksize, strides = stride,
rates = [1, 1, 1, 1], padding='SAME')
_, pool_height, pool_width, elems = x_tensor_p.get_shape().as_list()
# Extract pooling regions for each channel
elems = int(elems / channels)
inputs = tf.reshape(inputs, [-1, pool_height, pool_width, elems, channels]) # Reshape tensor
# Sort values for pooling
if self.sort:
inputs = tf.sort(inputs, axis=-2, direction='DESCENDING', name=None)
outputs = tf.reduce_sum(tf.math.multiply(self.kernel, inputs), axis=-2)
return outputs
```

Related

Tensorflow: Implemented Hourglass model for human pose estimation but it's not converging

I am new to tensorflow. I have to make human pose estimator. I implemented stacked hourglass model but it's not converging.
here's my code. I am not expert in tensorflow so I thought maybe my code have fault or I didn't understand the paper properly.
x_image is the input image (-1,256,256,3) and y_true is the set of heat map (-1,128,128,12). Each heatmap have one peak which specifies the position of joint.
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.reset_default_graph()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape,stddev = 0.1)
return tf.Variable(init_random_dist)
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape = shape)
return tf.Variable(init_bias_vals)
def conv2d(x,W):
# x --> [batch, H, W, Channels]
# W --> [filter H, filter W, Channels In, Channels Out]
return tf.nn.conv2d(x, W, strides = [1,1,1,1], padding = 'SAME')
def max_pool_2by2(x):
# x --> [batch,h,w,c]
return tf.nn.max_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
def convolutional_layer(input_x, shape):
W = init_weights(shape)
b = init_bias([shape[3]])
return tf.nn.relu(tf.nn.bias_add(conv2d(input_x, W) ,b))
def residual_layer(convo):
convo_1 = convolutional_layer(convo, [1,1,256,128])
convo_2 = convolutional_layer(convo_1, [3,3,128,128])
convo_3 = convolutional_layer(convo_2, [1,1,128,256])
return convo_3
def hourglass(convo, size):
if size == 4.0:
convo = residual_layer(convo)
convo = residual_layer(convo)
convo = residual_layer(convo)
return convo
convo = residual_layer(convo)
convo_1 = tf.nn.max_pool(convo, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
convo = hourglass(convo_1, size/2.0)
convo_1 = residual_layer(convo_1)
convo = tf.add(convo, convo_1)
convo = tf.image.resize_nearest_neighbor(convo, (int(size),int(size)))
return convo
x = tf.compat.v1.placeholder(tf.float32,shape=[None, 256, 256 , 3])
y_true = tf.placeholder(tf.float32, shape = [None, 128, 128, 12])
x_image = tf.reshape(x, [-1, 256, 256, 3])
# input
convo = convolutional_layer(x_image, shape = [7,7,3,256])
convo = hourglass(convo, 256.0)
convo = convolutional_layer(convo, shape = [3,3,256,12])
y_pred = tf.nn.max_pool(convo, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
loss = tf.reduce_mean(tf.squared_difference(y_pred, y_true))
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()

Itg is a bit difficult to say without knowing the full context and data. It seems you have just one image? That is most likely not enough for training.
Maybe you can also draw the model that you want to achieve in tensorflow?
You could try to change the learning rate to some other value (e.g. larger) to make it converge. Possibly you need to implement a scheduler for the learning rate to dynamically adapt it depending on the situation (see for an example: https://www.tensorflow.org/tutorials/text/transformer?hl=en).

Gradients are None for Custom Convolution Layer

I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging

Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.

Cannot get GradientTape to give non null results

I am trying to manually implement a very simple RNN using tensorflow2. I modeled my code on the example to manually make models on tensorflow website. The code, stripped to bare essentials for this purpose, is
class ModelSimple(object):
def __init__(self):
# Initialize the weights to `5.0` and the bias to `0.0`
# In practice, these should be initialized to random values (for example, with `tf.random.normal`)
self.W = tf.Variable(tf.random.normal([]))
self.b = tf.Variable(tf.random.normal([]))
def __call__(self, x):
return self.W * x + self.b
def loss(predicted_y, target_y):
return tf.reduce_mean(tf.square(predicted_y - target_y))
NUM_EXAMPLES = 1000
inputs = tf.random.normal(shape=[NUM_EXAMPLES])
outputs = tf.zeros(NUM_EXAMPLES)
model = ModelSimple()
with tf.GradientTape() as t:
t.watch([model.W,model.b])
current_loss = loss(model(inputs), outputs)
dW, db = t.gradient(current_loss, [model.W, model.b])
print(dW,db)
This gives nice tensors for dW and db. Then I try to do what I described above
class ModelRNN(object):
def __init__(self, n_inputs, n_neurons):
self.n_inputs = n_inputs
self.n_neurons = n_neurons
# weights for new input
self.Wx = tf.Variable(tf.random.normal(shape=[self.n_inputs, self.n_neurons], dtype=tf.float32))
# weights for previous output
self.Wy = tf.Variable(tf.random.normal(shape=[self.n_neurons, self.n_neurons], dtype=tf.float32))
# bias weights
self.b = tf.Variable(tf.zeros([1, self.n_neurons], dtype=tf.float32))
def __call__(self, X_batch):
# get shape of input
batch_size, num_time_steps, _ = X_batch.get_shape()
# we will loop through the time steps and the output of the previous computation feeds into
# the next one.
# this variable keeps track of it and is initialized to zero
y_last = tf.Variable(tf.zeros([batch_size, self.n_neurons], dtype=tf.float32))
# the outputs will be stored in this tensor
Ys = tf.Variable(tf.zeros([batch_size, num_time_steps, self.n_neurons], dtype=tf.float32))
for t in range(num_time_steps):
Xt = X_batch[:, t, :]
yt = tf.tanh(tf.matmul(y_last, self.Wy) +
tf.matmul(Xt, self.Wx) +
self.b)
y_last.assign(yt)
Ys[:, t, :].assign(yt)
return Ys
inputs = tf.convert_to_tensor(np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
],dtype=np.float32))
outputs=tf.Variable(tf.zeros((4,2,5),dtype=np.float32))
model = ModelRNN(3, 5)
with tf.GradientTape() as t:
t.watch([model.Wx,model.Wy,model.b])
current_loss = loss(model(inputs), outputs)
dWx,dWy,db = t.gradient(current_loss, [model.Wx, model.Wy,model.b])
print(dWx,dWy,db)
and it turns out dWx,dWy,db are all None. I have tried several things (including watching them using the GradientTape despite them being variables) and yet I keep getting None. What am I doing wrong?

It looks like this is related to this issue:
Tensorflow cannot get gradient wrt a Variable, but can wrt a Tensor
Replacing assign with a python list and tf.stack results in a gradient being returned
Ys = []
for t in range(num_time_steps):
Xt = X_batch[:, t, :]
yt = tf.tanh(tf.matmul(y_last, self.Wy) +
tf.matmul(Xt, self.Wx) +
self.b)
y_last.assign(yt)
Ys.append(yt)
return tf.stack(Ys,axis=1)

Tensorflow Executor failed to create kernel on GPU

I am trying to train a network using tensorflow on GPU. But this warning is thrown out during the training process.
I checked free memory size of the gpu. It seems ok.
E tensorflow/core/common_runtime/executor.cc:623] Executor failed to > create kernel. Invalid argument: Default AvgPoolingOp only supports
NHWC on device type CPU
[[{{node vgg_src/pool1}} = AvgPool[T=DT_FLOAT,
data_format="NCHW", ksize=[1, 1, 2, 2], padding="SAME", strides=[1,
1, 2, 2], _device="/job:localhost/replica:0/task:0/device:GPU:0"]
(vgg_src/conv1_2/Relu)]]
Although I can train and run the network properly, I still want to know what causes this problem. And how could I fix this problem?
--
Update
Add the code of my model which is a modified vgg-16 network.
import os
import tensorflow as tf
import numpy as np
import pdb
vgg_mean = [0.485, 0.456, 0.406]
vgg_std = [0.229, 0.224, 0.225]
data = None
dir_path = os.path.dirname(os.path.realpath(__file__))
# dir_path = os.path.normpath(os.path.join(dir_path, os.pardir))
weights_path = os.path.join(dir_path, 'models', 'vgg16_onnx.npy')
class Model():
def __init__(self, vgg16_npy_path=None):
global data
if vgg16_npy_path is None:
path = weights_path
print(path)
if os.path.exists(path):
vgg16_npy_path = path
else:
print("VGG16 weights were not found in the project directory!")
exit(0)
if data is None:
data = np.load(vgg16_npy_path, encoding='latin1')
self.data_dict = data.item()
print("VGG16 weights loaded")
else:
self.data_dict = data.item()
def build(self, bgr_input):
'''notice that opencv load image with bgr order, but the pretrained model is designed for rgb'''
blue, green, red = tf.split(axis=3, num_or_size_splits=3, value=bgr_input)
rgb = tf.concat(axis=3, values=[
(red - vgg_mean[0])/vgg_std[0],
(green - vgg_mean[1])/vgg_std[1],
(blue - vgg_mean[2])/vgg_std[2],
])
self.conv1_1 = self.conv_layer(rgb, "conv1_1")
self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
self.pool1 = self.avg_pool(self.conv1_2, 'pool1')
self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
self.pool2 = self.avg_pool(self.conv2_2, 'pool2')
self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
self.pool3 = self.avg_pool(self.conv3_3, 'pool3')
self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
self.pool4 = self.avg_pool(self.conv4_3, 'pool4')
self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
self.pool5 = self.avg_pool(self.conv5_3, 'pool5')
self.fc6 = self.fc_layer(self.pool5, 'fc6')
self.fc7 = self.fc_layer(self.fc6, 'fc7')
self.fc8 = self.fc_layer(self.fc7, 'fc8')
self.data_dict = None
def avg_pool(self, bottom, name):
return tf.nn.avg_pool(bottom,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def max_pool(self, bottom, name):
return tf.nn.max_pool(bottom,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def conv_layer(self, bottom, name, stride = 1):
with tf.variable_scope(name):
filt = self.get_conv_filter(name)
conv = tf.nn.conv2d(bottom, filt, [1, stride, stride, 1], padding='SAME')
conv_biases = self.get_bias(name)
bias = tf.nn.bias_add(conv, conv_biases)
mean = self.get_mean(name)
variance = self.get_variance(name)
offset = self.get_beta(name)
scale = self.get_gamma(name)
norm = tf.nn.batch_normalization(bias, mean, variance, offset, scale, 1e-20 )
relu = tf.nn.relu(norm)
return relu
def fc_layer(self, bottom, name):
with tf.variable_scope(name):
shape = bottom.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(bottom, [-1, dim])
weights = self.get_fc_weight(name)
biases = self.get_bias(name)
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
return fc
def get_mean(self, name):
return tf.constant(self.data_dict[name][4], name = "mean")
def get_variance(self, name):
return tf.constant(self.data_dict[name][5], name = "variance")
def get_gamma(self, name):
return tf.constant(self.data_dict[name][2], name = "gamma")
def get_beta(self, name):
return tf.constant(self.data_dict[name][3], name = "beta")
def get_conv_filter(self, name):
return tf.constant(np.rollaxis(np.rollaxis(np.rollaxis(self.data_dict[name][0], 1), 2), 3), name="filter")
def get_bias(self, name):
return tf.constant(self.data_dict[name][1], name="biases")
def get_fc_weight(self, name):
return tf.constant(np.rollaxis(self.data_dict[name][0], 1), name="weights")

This is not GPU-memory size issue.
Read the thread.
https://github.com/tensorpack/tensorpack/issues/263
You may be need to change the code according to the thread
TensorFlow only supports NHWC on cpu,
Check out these two lines in this thread. May be you need to change the parameter
- with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \
+ with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NHWC')
with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NHWC')

Re-use speechT example with LSTM network

From example at https://github.com/timediv/speechT, I'm trying to adapt to use with LSTM network but failed please help. I tried many combination but I always got error i.e. Input must be sequence or else. I need to implement LSTM network to the example to for speech recognition purpose and after I tried for couple of weeks I still get stuck in the coding problem. Anyone can help me provide example of using LSTM network with the sample will be good.
class InputBatchLoader(BaseInputLoader):
def __init__(self, input_size, batch_size, data_generator_creator, max_steps=None):
super().__init__(input_size)
self.batch_size = batch_size
self.data_generator_creator = data_generator_creator
self.steps_left = max_steps
with tf.device("/cpu:0"):
with tf.device("/cpu:0"):
# Define input and label placeholders
self.inputs = tf.placeholder(tf.float32, [batch_size, None, input_size], name='inputs')
self.sequence_lengths = tf.placeholder(tf.int32, [batch_size], name='sequence_lengths')
self.labels = tf.sparse_placeholder(tf.int32, name='labels')
# Queue for inputs and labels
self.queue = tf.FIFOQueue(dtypes=[tf.float32, tf.int32, tf.string],
capacity=100)
# queues do not support sparse tensors yet, we need to serialize...
serialized_labels = tf.serialize_many_sparse(self.labels)
self.enqueue_op = self.queue.enqueue([self.inputs,
self.sequence_lengths,
serialized_labels])
class Wav2LetterLSTMModel(SpeechModel): #Add Sep 14, 2017 to create LSTM model
def __init__(self, input_loader: BaseInputLoader, input_size: int, num_classes: int):
super().__init__(input_loader, input_size, num_classes)
def _create_network(self, num_classes):
cellsize = 256
num_layers = 3
inputs = self.inputs
lstm_cell = rnn.BasicLSTMCell(cellsize, forget_bias=1.0)
outputs, states = tf.nn.dynamic_rnn(lstm_cell, inputs, dtype=tf.float32)
return tf.transpose(outputs, (1, 0, 2))
def create_default_model(flags, input_size: int, speech_input: BaseInputLoader) -> SpeechModel:
model = Wav2LetterLSTMModel(input_loader=speech_input,
input_size=input_size,
num_classes=speecht.vocabulary.SIZE + 1) #Add Sep 14, 2017, to use LSTM model
# TODO how can we restore only selected variables so we do not need to always create the full network?
if flags.command == 'train':
model.add_training_ops(learning_rate=flags.learning_rate,
learning_rate_decay_factor=flags.learning_rate_decay_factor,
max_gradient_norm=flags.max_gradient_norm,
momentum=flags.momentum)
model.add_decoding_ops()
elif flags.command == 'export':
model.add_training_ops()
model.add_decoding_ops()
else:
model.add_training_ops()
model.add_decoding_ops(language_model=flags.language_model,
lm_weight=flags.lm_weight,
word_count_weight=flags.word_count_weight,
valid_word_count_weight=flags.valid_word_count_weight)
model.finalize(log_dir=flags.log_dir,
run_name=flags.run_name,
run_type=flags.run_type)
return model

At last I use this
XT = tf.transpose(inputs, [1, 0, 2])
XR = tf.reshape(XT, [-1, self.input_size])
X_split = tf.split(XR, cellsize, 0)
lstm = rnn.BasicLSTMCell(cellsize, forget_bias=1.0, state_is_tuple=True)
outputs, _states = rnn.static_rnn(lstm, X_split, dtype=tf.float32)

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Custom layer with tf.extract_image_patches extremely slow - tensorflow

Related

Tensorflow: Implemented Hourglass model for human pose estimation but it's not converging

Gradients are None for Custom Convolution Layer

Cannot get GradientTape to give non null results

Tensorflow Executor failed to create kernel on GPU

Re-use speechT example with LSTM network

Categories

Resources