I'm trying to create a char cnn using Keras. That type of cnn requires you to use Convolutional1D layer. But all the ways I try to add them to my model, it gives me errors at creation stage. Here is my code:
def char_cnn(n_vocab, max_len, n_classes):
conv_layers = [[256, 7, 3],
[256, 7, 3],
[256, 3, None],
[256, 3, None],
[256, 3, None],
[256, 3, 3]]
fully_layers = [1024, 1024]
th = 1e-6
embedding_size = 128
inputs = Input(shape=(max_len,), name='sent_input', dtype='int64')
# Embedding layer
x = Embedding(n_vocab, embedding_size, input_length=max_len)(inputs)
# Convolution layers
for cl in conv_layers:
x = Convolution1D(cl[0], cl[1])(x)
x = ThresholdedReLU(th)(x)
if not cl[2] is None:
x = MaxPooling1D(cl[2])(x)
x = Flatten()(x)
#Fully connected layers
for fl in fully_layers:
x = Dense(fl)(x)
x = ThresholdedReLU(th)(x)
x = Dropout(0.5)(x)
predictions = Dense(n_classes, activation='softmax')(x)
model = Model(input=inputs, output=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy')
return model
And here is the error I receive when I try to call char_cnn function
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
685 graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686 input_tensors_as_shapes, status)
687 except errors.InvalidArgumentError as err:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
515 compat.as_text(c_api.TF_Message(self.status.status)),
--> 516 c_api.TF_GetCode(self.status.status))
517 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: Negative dimension size caused by subtracting 3 from 1 for 'conv1d_26/convolution/Conv2D' (op: 'Conv2D') with input shapes: [?,1,1,256], [1,3,256,256].
How to fix it?
Your downsampling is too aggressive and the key argument here is max_len: when it's too small, the sequence becomes too short to perform either a convolution or a max-pooling. You set pool_size=3, hence it shrinks the sequence by a factor of 3 after each pooling (see the example below). I suggest you try pool_size=2.
The minimal max_len that this network can handle is max_len=123. In this case x shape is transformed in the following way (according to conv_layers):
(?, 123, 128)
(?, 39, 256)
(?, 11, 256)
(?, 9, 256)
(?, 7, 256)
(?, 5, 256)
Setting a smaller value, like max_len=120 causes x.shape=(?, 4, 256) before the last layer and this can't be performed.
I'm trying to make this old code work. Tensorflow has deleted train and next_batch.
I adjusted the code as much as I can but faced a stone hard wall here.
from keras.datasets import mnist
import keras as K
(Xtr, Ytr), (Xte, Yte) = mnist.load_data()
#Xtr, Ytr = mnist.ds_train.batch(5000) #5000 for training (nn candidates)
#Xte, Yte = mnist.test.next_batch(200) #200 for testing
# tf Graph Input
xtr = tf.placeholder("float", [None, 784])
xte = tf.placeholder("float", [784])
# Nearest Neighbor calculation using L1 Distance
# Calculate L1 Distance
distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))), reduction_indices=1)
# Prediction: Get min distance index (Nearest neighbor)
pred = tf.arg_min(distance, 0)
accuracy = 0.
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
xtr = tf.reshape(xtr, (-1, 28, 28))
xte = tf.reshape(xtr, (-1, 28, 28))
Training start
# Start training
with tf.Session() as sess:
# loop over test data
for i in range(Xte.size):
# Get nearest neighbor\
n_index = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[i, :]})
# Get nearest neighbor class label and compare it to its true label
print("Test", i, "Prediction:", np.argmax(Ytr[nn_index]), "True Class:", np.argmax(Yte[i]))
# Calculate accuracy
if np.argmax(Ytr[nn_index]) == np.argmax(Yte[i]):
accuracy += 1./len(Xte)
it seems to be impossible to have an index in an array size in loop.
However, The OUTPUT is like this
(10000, 28, 28)
(28, 28)
ValueError Traceback (most recent call last)
<ipython-input-31-4c49a9166101> in <module>
9 Xtee = Xte[i, :, :]
10 print(Xtee.shape)
---> 11 nn_index = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[50, :]})
12 # Get nearest neighbor class label and compare it to its true label
13 print("Test", i, "Prediction:", np.argmax(Ytr[nn_index]), "True Class:", np.argmax(Yte[i]))
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1163 not subfeed_t.get_shape().is_compatible_with(np_val.shape)):
1164 raise ValueError(
-> 1165 f'Cannot feed value of shape {str(np_val.shape)} for Tensor '
1166 f'{subfeed_t.name}, which has shape '
1167 f'{str(subfeed_t.get_shape())}')
ValueError: Cannot feed value of shape (28, 28) for Tensor Reshape_3:0, which has shape (?, 28, 28)
I have an array with a shape of [274 documents, 439 equal length sentences per document, 384-dimensional sbert embeddings per sentence]. I'm trying to fit a CNN model that predicts a binary value per document.
Below is the model architecture:
embedding_layer = Embedding(274, 384, input_length=439)
sequence_input = Input(shape=(439,))
embedded_sequences = embedding_layer(sequence_input)
# first conv filter
embedded_sequences = Reshape((439, 384, 1))(embedded_sequences)
x = Conv2D(100, (5, 384), activation='relu')(embedded_sequences)
x = MaxPooling2D((439 - 5 + 1, 1))(x)
# second conv filter
y = Conv2D(100, (4, 384), activation='relu')(embedded_sequences)
y = MaxPooling2D((439 - 4 + 1, 1))(y)
# third conv filter
z = Conv2D(100, (3, 384), activation='relu')(embedded_sequences)
z = MaxPooling2D((439 - 3 + 1, 1))(z)
# concatenate the convolutional layers
alpha = concatenate([x,y,z])
# flatten the concatenated values
alpha = Flatten()(alpha)
# add dropout
alpha = Dropout(0.5)(alpha)
# make predictions
preds = Dense(274, activation='softmax')(alpha)
# build model
model = Model(sequence_input, preds)
adadelta = optimizers.Adadelta()
model.fit(x=X_train_sent_emb_3m, y=y_train_sent_emb_3m, epochs=25 , validation_data=(X_test_sent_emb_3m, y_test_sent_emb_3m))
The model compiles but when I run the fit call I'm getting the following error message:
Epoch 1/25
WARNING:tensorflow:Model was constructed with shape (None, 439) for input KerasTensor(type_spec=TensorSpec(shape=(None, 439), dtype=tf.float32, name='input_15'), name='input_15', description="created by layer 'input_15'"), but it was called on an input with incompatible shape (None, 439, 384).
ValueError: total size of new array must be unchanged, input_shape = [439, 384, 384], output_shape = [439, 384, 1]
Any suggestions on what I need to change to make the model work for the shape of the data?
I have tensor (None, 196) and after reshaping, it becomes (None, 14, 14).
And now, I want to copy it to channel axis, so that the shape should be (None, 14, 14, 512). Lastly, I want to copy to timestep axis, so it becomes (None, 10, 14, 14, 512). I accomplish those steps using this snippet code:
def replicate(tensor, input_target):
batch_size = K.shape(tensor)[0]
nf, h, w, c = input_target
x = K.reshape(tensor, [batch_size, 1, h, w, 1])
# Replicate to channel dimension
x = K.tile(x, [batch_size, 1, 1, 1, c])
# Replicate to timesteps dimension
x = K.tile(x, [batch_size, nf, 1, 1, 1])
return x
x = ...
x = Lambda(replicate, arguments={'input_target':input_shape})(x)
another_x = Input(shape=input_shape) # shape (10, 14, 14, 512)
x = layers.multiply([x, another_x])
x = ...
I plot the model and the output shape is just like I want it to be. But, the problem arises in model training. I set the batch size to 2. This the the error message:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [8,10,14,14,512] vs. [2,10,14,14,512]
[[{{node multiply_1/mul}} = Mul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/multiply_1/mul_grad/Sum"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](Lambda_2/Tile_1, _arg_another_x_0_0/_189)]]
[[{{node metrics/top_k_categorical_accuracy/Mean_1/_265}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_6346_metrics/top_k_categorical_accuracy/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Looks like, K.tile() increases the batch size from 2 to 8. When I set the batch size to 10, it becomes 1000.
So, my question is how to achieve the result as I want? Is it good way to use tile()? Or, should I use repeat_elements()? Thanks!
I am using Tensorflow 1.12.0 and Keras 2.2.4.
As a rule of thumb, try to avoid bringing batch size to the transformations happening in the Lambda layer.
When you use tile operation, you only set only the dimension that needs to change (for example you had batch_size value in your tile operation which is wrong). Also I am using tf.tile instead of K.tile (TF 1.12 doesn't have tile in the Keras backend it seems).
def replicate(tensor, input_target):
_, nf, h, w, c = input_target
x = K.reshape(tensor, [-1, 1, h, w, 1])
# Replicate to channel dimension
# You can combine below lines to tf.tile(x, [1, nf, 1, 1, c]) as well
x = tf.tile(x, [1, 1, 1, 1, c])
# Replicate to timesteps dimension
x = tf.tile(x, [1, nf, 1, 1, 1])
return x
Simple example
input_shape= [None, 10, 14, 14, 512]
x = Input(shape=(196,))
x = Lambda(replicate, arguments={'input_target':input_shape})(x)
Which gives
>>> (?, 10, 14, 14, 512)
I am trying to use Tensorflow's 2.0 new MirroredStrategy but I am receiving an error saying:
ValueError: We currently do not support distribution strategy with a `Sequential` model that is created without `input_shape`/`input_dim` set in its first layer or a subclassed model.
class Model(kr.Model):
def __init__(self, input_shape, conv_sizes, num_outputs):
self.num_outputs = num_outputs
rows, cols, depth = input_shape
self.one_hot = kl.Lambda(lambda x: tf.one_hot(tf.cast(x, 'int32'), num_outputs), input_shape=(rows, cols))
self.concat = kl.Concatenate(axis=-1)
vision_layers = []
for i, (filters, kernel, stride) in enumerate(conv_sizes):
if not i:
depth += num_outputs - 1
vision_layers += [kl.Conv2D(filters, kernel, stride, activation='relu',
input_shape=(rows, cols, depth))]
vision_layers += [kl.Conv2D(filters, kernel, stride, activation='relu')]
vision_layers += [kl.MaxPool2D(pool_size=(2, 2))]
flatten = kl.Flatten()
dense = kl.Dense(num_outputs)
self.net = kr.Sequential(vision_layers+[flatten]+[dense])
self.build(input_shape=(None, ) + input_shape)
def call(self, inputs):
one_hot = self.one_hot(inputs[:, :, :, -1])
return self.net(self.concat([inputs[:, :, :, :-1], one_hot]))
Reproduction code:
model_args = {'conv_sizes': [(32, (2, 2), 1), (32, (2, 2), 1), (32, (2, 2), 1)],
'input_shape': (50, 50, 6),
'num_outputs': 5}
def dummy_loss(values, targets):
return tf.reduce_sum(values-targets, axis=-1)
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = Model(**model_args)
model.compile(optimizer=kr.optimizers.Adam(learning_rate=0.01), loss=dummy_loss)
Traceback (most recent call last):
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-4-dc492e7c638b>", line 18, in <module>
model.compile(optimizer=kr.optimizers.Adam(learning_rate=0.01), loss=dummy_loss)
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/joao/anaconda3/envs/tf2/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 263, in compile
'We currently do not support distribution strategy with a '
ValueError: We currently do not support distribution strategy with a `Sequential` model that is created without `input_shape`/`input_dim` set in its first layer or a subclassed model.
Model Summary (model.summary()):
Model: "model_1"
Layer (type) Output Shape Param #
lambda (Lambda) multiple 0
concatenate (Concatenate) multiple 0
sequential (Sequential) (None, 5) 13573
Total params: 13,573
Trainable params: 13,573
Non-trainable params: 0
I would do away with the Sequential approach and use the Model class directly:
def create_model(input_shape, conv_sizes, fc_sizes, num_outputs):
num_outputs = num_outputs
rows, cols, depth = input_shape
input_layer = kl.Input(shape=(rows, cols, depth))
actions = tf.slice(input_layer, [0, 0, 0, depth - 1], [-1, rows, cols, 1])
non_actions = tf.slice(input_layer, [0, 0, 0, 0], [-1, rows, cols, depth - 1])
one_hot = kl.Lambda(lambda x: tf.one_hot(tf.cast(x, 'int32'), num_outputs),
input_shape=(rows, cols))(actions)
concat = kl.Concatenate(axis=-1)([non_actions, tf.reshape(one_hot, (-1, rows, cols, num_outputs))])
vision_layer = concat
for i, (filters, kernel, stride) in enumerate(conv_sizes):
vision_layer = kl.Conv2D(filters, kernel, stride, activation='relu')(vision_layer)
vision_layer = kl.MaxPool2D(pool_size=(2, 2))(vision_layer)
flatten = kl.Flatten()(vision_layer)
dense = kl.Dense(num_outputs)(flatten)
return kr.Model(inputs=input_layer, outputs=[dense])
This seems like a trivial question, but I've been unable to find the answer.
I have batched sequences of images of shape:
[batch_size, number_of_frames, frame_height, frame_width, number_of_channels]
and I would like to pass each frame through a few convolutional and pooling layers. However, TensorFlow's conv2d layer accepts 4D inputs of shape:
[batch_size, frame_height, frame_width, number_of_channels]
My first attempt was to use tf.map_fn over axis=1, but I discovered that this function does not propagate gradients.
My second attempt was to use tf.unstack over the first dimension and then use tf.while_loop. However, my batch_size and number_of_frames are dynamically determined (i.e. both are None), and tf.unstack raises {ValueError} Cannot infer num from shape (?, ?, 30, 30, 3) if num is unspecified. I tried specifying num=tf.shape(self.observations)[1], but this raises {TypeError} Expected int for argument 'num' not <tf.Tensor 'A2C/infer/strided_slice:0' shape=() dtype=int32>.
Since all the images (num_of_frames) are passed to the same convolutional model, you can stack both batch and frames together and do the normal convolution. Can be achieved by just using tf.resize as shown below:
# input with size [batch_size, frame_height, frame_width, number_of_channels
x = tf.placeholder(tf.float32,[None, None,32,32,3])
# reshape for the conv input
x_reshapped = tf.reshape(x,[-1, 32, 32, 3])
x_reshapped output size will be (50, 32, 32, 3)
# define your conv network
y = tf.layers.conv2d(x_reshapped,5,kernel_size=(3,3),padding='SAME')
#(50, 32, 32, 3)
#Get back the input shape
out = tf.reshape(x,[-1, tf.shape(x)[1], 32, 32, 3])
The output size would be same as the input: (10, 5, 32, 32, 3
with tf.Session() as sess:
print(sess.run(out, {x:np.random.normal(size=(10,5,32,32,3))}).shape)
#(10, 5, 32, 32, 3)