I am getting OOM while running PRE TRAINED Bert Model with new dataset with 20k - tensorflow

I have pre trained model with Accuracy of 96 with 2 epochs and I am trying to use that model on new dataset of 20k tweets for sentiment analysis. while doing that I am getting below error.
I haven't faced any issues while training model with same size of data but not sure why I am getting while using that model.
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]
Code:
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample,InputFeatures
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model.summary()
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
bert (TFBertMainLayer) multiple 109482240
_________________________________________________________________
dropout_37 (Dropout) multiple 0
_________________________________________________________________
classifier (Dense) multiple 1538
=================================================================
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
train = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='training',seed=123)
test = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='validation',seed=123)
Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
for data in train.take(1):
train_feat = data[0].numpy()
train_lab = data[1].numpy()
train = pd.DataFrame([train_feat,train_lab]).T
train.columns = ['DATA_COLUMN','LABEL_COLUMN']
train['DATA_COLUMN'] = train['DATA_COLUMN'].str.decode('utf-8')
for data in test.take(1):
test_feat = data[0].numpy()
test_lab = data[1].numpy()
test = pd.DataFrame([test_feat,test_lab]).T
test.columns = ['DATA_COLUMN','LABEL_COLUMN']
test['DATA_COLUMN'] = test['DATA_COLUMN'].str.decode('utf-8')
test.head()
def convert_data_to_examples(train, test, DATA_COLUMN, LABEL_COLUMN):
train_InputExamples = train.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
validation_InputExamples = test.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
return train_InputExamples, validation_InputExamples
train_InputExamples, validation_InputExamples = convert_data_to_examples(train,
test,
'DATA_COLUMN',
'LABEL_COLUMN')
def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):
features = [] # -> will hold InputFeatures to be converted later
for e in examples:
# Documentation is really strong for this method, so please take a look at it
input_dict = tokenizer.encode_plus(
e.text_a,
add_special_tokens=True,
max_length=max_length, # truncates if len(s) > max_length
return_token_type_ids=True,
return_attention_mask=True,
pad_to_max_length=True, # pads to the right by default # CHECK THIS for pad_to_max_length
truncation=True
)
input_ids, token_type_ids, attention_mask = (input_dict["input_ids"],
input_dict["token_type_ids"], input_dict['attention_mask'])
features.append(
InputFeatures(
input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label
)
)
def gen():
for f in features:
yield (
{
"input_ids": f.input_ids,
"attention_mask": f.attention_mask,
"token_type_ids": f.token_type_ids,
},
f.label,
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None]),
"token_type_ids": tf.TensorShape([None]),
},
tf.TensorShape([]),
),
)
DATA_COLUMN = 'DATA_COLUMN'
LABEL_COLUMN = 'LABEL_COLUMN'
# We can call the functions we created above with the following lines:
train_InputExamples,validation_InputExamples = convert_data_to_examples(train,test,DATA_COLUMN,LABEL_COLUMN)
train_data = convert_examples_to_tf_dataset(list(train_InputExamples),tokenizer)
train_data = train_data.shuffle(100).batch(32).repeat(2)
validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples),tokenizer)
validation_data = validation_data.shuffle(100).batch(32)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=2, validation_data=validation_data)
#this is my new data with 20k rows on which I want to run pretrained model:
tweets_list = statement_df['sentiment'].tolist()
#this part of the code is serving that purpose
tf_batch = tokenizer(tweets_list, max_length=128, padding=True, truncation=True, return_tensors='tf')
#print(tf_batch)
tf_outputs = model(tf_batch) # this line is thrown OOM issues
tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
labels = ['Negative','Positive']
label = tf.argmax(tf_predictions, axis=1)
label = label.numpy()
for i in range(len(tweets_list)):
print(tweets_list[i], ": \n", labels[label[i]])
Error:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather_v2(params, indices, validate_indices, axis, batch_dims, name)
4830 name=name,
4831 axis=axis,
-> 4832 batch_dims=batch_dims)
4833
4834
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather(***failed resolving arguments***)
4811 # TODO(apassos) find a less bad way of detecting resource variables
4812 # without introducing a circular dependency.
-> 4813 return params.sparse_read(indices, name=name)
4814 except AttributeError:
4815 return gen_array_ops.gather_v2(params, indices, axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/resource_variable_ops.py in sparse_read(self, indices, name)
701 variable_accessed(self)
702 value = gen_resource_variable_ops.resource_gather(
--> 703 self._handle, indices, dtype=self._dtype, name=name)
704
705 if self._dtype == dtypes.variant:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_resource_variable_ops.py in resource_gather(resource, indices, dtype, batch_dims, validate_indices, name)
547 return _result
548 except _core._NotOkStatusException as e:
--> 549 _ops.raise_from_not_ok_status(e, name)
550 except _core._FallbackException:
551 pass
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6860 message = e.message + (" name: " + name if name is not None else "")
6861 # pylint: disable=protected-access
-> 6862 six.raise_from(core._status_to_exception(e.code, message), None)
6863 # pylint: enable=protected-access
6864
/usr/local/lib/python3.7/dist-packages/six.py in raise_from(value, from_value)
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]

Related

ValueError: Input 0 of layer is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(224, 224, 3), what is the problem?

I am trying to build a machine learning model using pre-trained VGG16 with tensorflow, but I keep getting the same problem with the shape of the input. Compared to other public codes, the only difference is that I use a tf.data.dataset to share the data, instead of the DirectoryIterator of tf.image
Here is my code:
zip_ref = ZipFile(zip_file, 'r')
zip_ref.extractall(repository_dir)
zip_ref.close()
train_dir = os.path.join(repository_dir, "seg_train", "seg_train")
test_dir = os.path.join(repository_dir, "seg_test", "seg_test")
os.system(f"rm -r {os.path.join(repository_dir, 'seg_pred')}")
# load variables
validation_percentage = 0.2
label_mode = "int"
# for our model purposes
img_size = (224, 224)
color_mode='rgb'
data_train, data_val = image_dataset_from_directory(
train_dir,
batch_size=None,
label_mode=label_mode,
color_mode=color_mode,
image_size=img_size,
validation_split=validation_percentage,
subset="both",
seed=123,
)
data_test = image_dataset_from_directory(
test_dir,
batch_size=None,
label_mode=label_mode,
color_mode=color_mode,
image_size=img_size,
)
classes = data_train.class_names
print(classes)
scale = 1.0/255
normalization_layer = tf.keras.layers.Rescaling(scale)
data_train_norm = data_train.map(lambda x,y: (normalization_layer(x), y))
data_val_norm = data_val.map(lambda x,y: (normalization_layer(x), y))
data_test_norm = data_test.map(lambda x,y: (normalization_layer(x), y))
input_size = None
for img, label in data_train_norm.take(1).as_numpy_iterator():
input_size = img.shape
print(input_size)
base_model = VGG16(
input_shape=input_size, # Shape of our images
include_top = False, # Leave out the last fully connected layer
weights = 'imagenet'
)
# we do not train the parameters
for layer in base_model.layers:
layer.trainable = False
# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model.output)
# https://medium.com/analytics-vidhya/car-brand-classification-using-vgg16-transfer-learning-f219a0f09765
# FC layer very simple and with a softmax activation unit
x = layers.Dense(len(classes), activation="softmax")(x)
landscapeModel01 = Model(inputs=base_model.input, outputs=x, name="landscapeModel01")
loss = "sparse_categorical_crossentropy"
optimizer = "adam"
landscapeModel01.compile(
optimizer=optimizer,
loss=loss,
metrics=["loss","accuracy"]
)
#fit data
shuffle=True # variable
epochs=50 # variable, according if it is able to converge
batch_size = 200
print(landscapeModel01.input)
landscapeModel01.fit(
data_train_norm,
validation_data=data_val_norm,
epochs=epochs,
shuffle=shuffle,
batch_size=batch_size
)
and this is the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [10], line 8
4 batch_size = 200
6 print(landscapeModel01.input)
----> 8 landscapeModel01.fit(
9 data_train_norm,
10 validation_data=data_val_norm,
11 epochs=epochs,
12 shuffle=shuffle,
13 batch_size=batch_size
14 )
File ~/anaconda3/envs/faa/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /tmp/__autograph_generated_file8y_bf523.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function *
return step_function(self, iterator)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step **
outputs = model.train_step(data)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/training.py", line 993, in train_step
y_pred = self(x, training=True)
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/renan/anaconda3/envs/faa/lib/python3.10/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
raise ValueError(
ValueError: Input 0 of layer "landscapeModel01" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(224, 224, 3)
What can I fix to make the code work?
versions:
tensorflow==2.10.0
#EDIT
I just found the solution: I was loading images with a batch size equals none, but the trained model demanded that the images had one, even if it was 1.
Solution
I just needed to load images in the image_dataset_from_directory with a batch_size parameter different from None. Considering my investigation did not consider data augmentation in the beginning, I chose 1.

Fine-Tune Universal Sentence Encoder Large with TF2

Below is my code for fine-tuning the Universal Sentence Encoder Multilingual Large 2. I am not able to resolve the resulting error. I tried adding a tf.keras.layers.Input layer which results in the same error. Any suggestion on how to successfully build a fine-tuning sequential model for USEM2 will be much appreciated.
import tensorflow as tf
import tensorflow_text
import tensorflow_hub as hub
module_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/2"
embedding_layer = hub.KerasLayer(module_url, trainable=True, input_shape=[None,], dtype=tf.string)
hidden_layer = tf.keras.layers.Dense(32, activation='relu')
output_layer = tf.keras.layers.Dense(5, activation='softmax')
model = tf.keras.models.Sequential()
model.add(embedding_layer)
model.add(hidden_layer)
model.add(output_layer)
model.summary()
WARNING:tensorflow:Entity <tensorflow.python.saved_model.function_deserialization.RestoredFunction object at 0x7fdf34216390> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Shape must be rank 1 but is rank 2 for 'text_preprocessor_1/SentenceTokenizer/SentencepieceTokenizeOp' (op: 'SentencepieceTokenizeOp') with input shapes: [], [?,?], [], [], [], [], [].
WARNING:tensorflow:Entity <tensorflow.python.saved_model.function_deserialization.RestoredFunction object at 0x7fdf34216390> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Shape must be rank 1 but is rank 2 for 'text_preprocessor_1/SentenceTokenizer/SentencepieceTokenizeOp' (op: 'SentencepieceTokenizeOp') with input shapes: [], [?,?], [], [], [], [], [].
WARNING: Entity <tensorflow.python.saved_model.function_deserialization.RestoredFunction object at 0x7fdf34216390> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Shape must be rank 1 but is rank 2 for 'text_preprocessor_1/SentenceTokenizer/SentencepieceTokenizeOp' (op: 'SentencepieceTokenizeOp') with input shapes: [], [?,?], [], [], [], [], [].
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-61-7ea0d071abf8> in <module>
1 model = tf.keras.models.Sequential()
2
----> 3 model.add(embedding_layer)
4 model.add(hidden_layer)
5 model.add(output)
~/pyenv36/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/pyenv36/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
176 # and create the node connecting the current layer
177 # to the input layer we just created.
--> 178 layer(x)
179 set_inputs = True
180
~/pyenv36/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
840 not base_layer_utils.is_in_eager_or_tf_function()):
841 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 842 outputs = call_fn(cast_inputs, *args, **kwargs)
843 # Wrap Tensors in `outputs` in `tf.identity` to avoid
844 # circular dependencies.
~/pyenv36/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
ValueError: in converted code:
relative to /home/neubig/pyenv36/lib/python3.6/site-packages:
tensorflow_hub/keras_layer.py:209 call *
result = f()
tensorflow_core/python/saved_model/load.py:436 _call_attribute
return instance.__call__(*args, **kwargs)
tensorflow_core/python/eager/def_function.py:457 __call__
result = self._call(*args, **kwds)
tensorflow_core/python/eager/def_function.py:494 _call
results = self._stateful_fn(*args, **kwds)
tensorflow_core/python/eager/function.py:1823 __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
tensorflow_core/python/eager/function.py:1141 _filtered_call
self.captured_inputs)
tensorflow_core/python/eager/function.py:1230 _call_flat
flat_outputs = forward_function.call(ctx, args)
tensorflow_core/python/eager/function.py:540 call
executor_type=executor_type)
tensorflow_core/python/ops/functional_ops.py:859 partitioned_call
executor_type=executor_type)
tensorflow_core/python/ops/gen_functional_ops.py:672 stateful_partitioned_call
executor_type=executor_type, name=name)
tensorflow_core/python/framework/op_def_library.py:793 _apply_op_helper
op_def=op_def)
tensorflow_core/python/framework/func_graph.py:548 create_op
compute_device)
tensorflow_core/python/framework/ops.py:3429 _create_op_internal
op_def=op_def)
tensorflow_core/python/framework/ops.py:1773 __init__
control_input_ops)
tensorflow_core/python/framework/ops.py:1613 _create_c_op
raise ValueError(str(e))
ValueError: Shape must be rank 1 but is rank 2 for 'text_preprocessor_1/SentenceTokenizer/SentencepieceTokenizeOp' (op: 'SentencepieceTokenizeOp') with input shapes: [], [?,?], [], [], [], [], [].
As much as I known, Universal Sentence Encoder Multilingual in tf.hub does not support trainable=True so far.
However, these code snippets can make the model do inference:
Using V2
module_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/2"
embedding_layer = hub.KerasLayer(module_url)
hidden_layer = tf.keras.layers.Dense(32, activation='relu')
output_layer = tf.keras.layers.Dense(5, activation='softmax')
inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
x = embedding_layer(tf.squeeze(tf.cast(inputs, tf.string)))["outputs"]
x = hidden_layer(x)
outputs = output_layer(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
Using V3
module_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3"
embedding_layer = hub.KerasLayer(module_url)
hidden_layer = tf.keras.layers.Dense(32, activation='relu')
output_layer = tf.keras.layers.Dense(5, activation='softmax')
inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
x = embedding_layer(tf.squeeze(tf.cast(inputs, tf.string)))
x = hidden_layer(x)
outputs = output_layer(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
inference
model.predict([["hello tf2"]])

How to write a custom resize layer that takes a resize value from an Input layer?

I am trying to add a custom resize layer that does not have a fixed resize value, instead, it takes a scale value from an input layer.
I found this but it has a fixed resize value: Add a resizing layer to a keras sequential model
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow.keras.backend as K
class Resize(Layer):
def init(self):
super(Resize,self).__init__()
def build(self,input_shape):
super(Resize,self).build(input_shape)
def call(self, x, size):
out = tf.image.resize(x,size=size)
return out
def get_output_shape_for(self, input_shape):
return (None,None,3)
inp = Input((10,10,3))
size = Input((1,), dtype='int32')
out = Resize()(inp, size=(100,100)) #(inp, size=(size,size))
model = Model([inp,size], out)
model.summary()
When I try this:
inp = Input((10,10,3))
size = Input((1,), dtype='int32')
out = Resize()(inp, size=(size,size))
model = Model([inp,size], out)
model.summary()
error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/keras/api/_v1/keras/models/__init__.py in <module>()
2 size = Input((1,), dtype='int32')
3
----> 4 out = Resize()(inp, size=(size,size)) #(inp, size=(size,size))
5
6 model = Model([inp,size], out)
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
634 outputs = base_layer_utils.mark_as_return(outputs, acd)
635 else:
--> 636 outputs = call_fn(inputs, *args, **kwargs)
637
638 except TypeError as e:
~/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
147 except Exception as e: # pylint:disable=broad-except
148 if hasattr(e, 'ag_error_metadata'):
--> 149 raise e.ag_error_metadata.to_exception(type(e))
150 else:
151 raise
ValueError: in converted code:
<ipython-input-1-ab7021ffbc7d>:14 call *
out = tf.image.resize(x,size=size)
/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/image_ops_impl.py:1182 resize_images
skip_resize_if_same=True)
/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/image_ops_impl.py:1045 _resize_images_common
raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
ValueError: 'size' must be a 1-D Tensor of 2 elements: new_height, new_width
​
One workaround is to set size=Input(tensor=K.variable([2,2], dtype=tf.int32)).
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow.keras.backend as K
class Resize(Layer):
def init(self):
super(Resize,self).__init__()
def build(self,input_shape):
super(Resize,self).build(input_shape)
def call(self, inputs):
x = inputs[0]
size = inputs[1]
out = tf.image.resize(x,size=size)
return out
def get_output_shape_for(self, input_shape):
return (None,None,3)
inp = Input((10,10,3))
var_size = K.variable([2,2], dtype=tf.int32)
size = Input(tensor=var_size, name='size')
out = Resize()([inp, size])
model = Model([inp,size], out)
model.summary()
# Model: "model"
# __________________________________________________________________________________________________
# Layer (type) Output Shape Param # Connected to
# ==================================================================================================
# input_1 (InputLayer) [(None, 10, 10, 3)] 0
# __________________________________________________________________________________________________
# input_2 (InputLayer) [(2,)] 0
# __________________________________________________________________________________________________
# resize (Resize) (None, None, None, 3 0 input_1[0][0]
# input_2[0][0]
# ==================================================================================================
# Total params: 0
# Trainable params: 0
# Non-trainable params: 0
input_mat = np.random.randn(100,10,10,3)
K.set_value(var_size, [5,5])
res = model.predict({'x': input_mat})
# res.shape (100,5,5,3)
K.set_value(var_size, [3,3])
res = model.predict({'x': input_mat})
# res.shape (100,3,3,3)

How do you write a custom activation function in python for Keras?

I'm trying to write a custom activation function for use with Keras. I can not write it with tensorflow primitives as it does properly compute the derivative. I followed How to make a custom activation function with only Python in Tensorflow? and it works very we in creating a tensorflow function. However, when I tried putting it into Keras as an activation function for the classic MNIST demo. I got errors. I also tried the tf_spiky function from the above reference.
Here is the sample code
tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf_spiky),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
Here's my entire error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-73a57f81db19> in <module>
3 tf.keras.layers.Dense(512, activation=tf_spiky),
4 tf.keras.layers.Dropout(0.2),
----> 5 tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
6 x=tf.keras.layers.Activation(tf_spiky)
7 y=tf.keras.layers.Flatten(input_shape=(28, 28))
/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
472 self._setattr_tracking = False # pylint: disable=protected-access
473 try:
--> 474 method(self, *args, **kwargs)
475 finally:
476 self._setattr_tracking = previous_value # pylint: disable=protected-access
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py in __init__(self, layers, name)
106 if layers:
107 for layer in layers:
--> 108 self.add(layer)
109
110 #property
/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
472 self._setattr_tracking = False # pylint: disable=protected-access
473 try:
--> 474 method(self, *args, **kwargs)
475 finally:
476 self._setattr_tracking = previous_value # pylint: disable=protected-access
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py in add(self, layer)
173 # If the model is being built continuously on top of an input layer:
174 # refresh its output.
--> 175 output_tensor = layer(self.outputs[0])
176 if isinstance(output_tensor, list):
177 raise TypeError('All layers in a Sequential model '
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
728
729 # Check input assumptions set before layer building, e.g. input rank.
--> 730 self._assert_input_compatibility(inputs)
731 if input_list and self._dtype is None:
732 try:
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in _assert_input_compatibility(self, inputs)
1463 if x.shape.ndims is None:
1464 raise ValueError('Input ' + str(input_index) + ' of layer ' +
-> 1465 self.name + ' is incompatible with the layer: '
1466 'its rank is undefined, but the layer requires a '
1467 'defined rank.')
ValueError: Input 0 of layer dense_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
From this I gather the last Dense layer is unable to get the dimensions of the output after the activation function or something to that. I did see in the tensorflow code that many activation functions register a shape. But either I'm not doing that correctly or I'm going in the wrong direction. But I'm guessing something needs to be done to the tensorflow function to make it an activation function that Keras can use.
I would appreciate any help you can give.
As requested here is the sample codes for tf_spiky, it works as described in the above reference. However, once put into Keras I get the errors shown. This is pretty much as shown in the *How to make a custom activation function with only Python in Tensorflow?" stackoverflow article.
def spiky(x):
print(x)
r = x % 1
if r <= 0.5:
return r
else:
return 0
def d_spiky(x):
r = x % 1
if r <= 0.5:
return 1
else:
return 0
np_spiky = np.vectorize(spiky)
np_d_spiky = np.vectorize(d_spiky)
np_d_spiky_32 = lambda x: np_d_spiky(x).astype(np.float32)
import tensorflow as tf
from tensorflow.python.framework import ops
def tf_d_spiky(x,name=None):
with tf.name_scope(name, "d_spiky", [x]) as name:
y = tf.py_func(np_d_spiky_32,
[x],
[tf.float32],
name=name,
stateful=False)
return y[0]
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
def spikygrad(op, grad):
x = op.inputs[0]
n_gr = tf_d_spiky(x)
return grad * n_gr
np_spiky_32 = lambda x: np_spiky(x).astype(np.float32)
def tf_spiky(x, name=None):
with tf.name_scope(name, "spiky", [x]) as name:
y = py_func(np_spiky_32,
[x],
[tf.float32],
name=name,
grad=spikygrad) # <-- here's the call to the gradient
return y[0]
The solution is in this post Output from TensorFlow `py_func` has unknown rank/shape
The easiest fix is to add y[0].set_shape(x.get_shape()) before the return statement in the definition of tf_spiky.
Perhaps someone out there knows how to properly work with tensorflow shape functions. Digging around I found a unchanged_shape shape function in tensorflow.python.framework.common_shapes, which be appropriate here, but I don't know how to attach it to the tf_spiky function. Seems a python decorator is in order here. It would probably be a service to others to explain customizing tensorflow functions with shape functions.

how to condition encoder final hidden state on the inputs of RNN dynamic decoder with ScheduledOutputTrainingHelper?

I'm trying to use tensorflow to code RDD encoder and decoder and with different length sequence inputs, so hope both encoder and decoder can be dynamic. Additionally, a decoder inputs is conditioned by the encoder final hidden states (context vector), which is similar to the Related Paper see picture a in page 3. The decoder is trying to fully inference during training with feeding previous outputs and context vector as inputs at each step.
import tensorflow as tf
import copy
import math
from tensorflow.python.layers.core import Dense
class RNNEncoder_Decoder(object):
def __init__(self,input_dim,
context_dim,output_dim,hidden_dim,
layers_stacked_count,learning_rate):
self.graph = tf.get_default_graph()
self.input_dim = input_dim
self.output_dim = output_dim
self.context_dim = context_dim
self.hidden_dim = hidden_dim
self.layers_stacked_count = layers_stacked_count
self.learning_rate = learning_rate
self.sampling_probability = tf.constant(dtype=tf.float32,value=1.0)
# [batch_size,sequence_length,input_dimension]
self.enc_inp = tf.placeholder(tf.float32, [None,None,self.input_dim], name='encoder_inputs')
self.expected_out = tf.placeholder(tf.float32, [None,None,self.input_dim], name='expected_outs')
# fullly inference during trianing
self.dec_inp = tf.zeros_like(self.expected_out,dtype=tf.float32,name='decoder_inputs')
seq_length = tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(self.enc_inp), 2)), 1)
self.seq_length = tf.cast(seq_length, tf.int32)
with tf.variable_scope('RNNEncoderDecoder'):
with tf.variable_scope("Enocder") as encoder_varscope:
# create encoder LSTM cell
encoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('EncoderCell_{}'.format(i)):
encoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.encoder_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)
# ruuning dynamic rnn encoder
_, enc_state = tf.nn.dynamic_rnn(cell = self.encoder_cell,
initial_state=None,
dtype=tf.float32,
inputs = self.enc_inp,
sequence_length = self.seq_length
)
# extract top layer hidden state as feature representation
self.context_vector = enc_state[-1].h
cell_state0 = tf.zeros_like(enc_state[0].c,dtype=tf.float32)
hidden_state0 = tf.zeros_like(enc_state[0].h,dtype=tf.float32)
dec_init_state = (enc_state[1], # pass the top layer state of enocder to the bottom layer of decoder
tf.nn.rnn_cell.LSTMStateTuple(cell_state0, hidden_state0))
# condition extracted features on decoder inputs
# with a shape that matches decoder inputs in all but (potentially) the final dimension.
# tile context vector from [batch_size,context_dim] to [batch_size,decoder_sequence_length,context_dim]
context_vector_shape = tf.shape(self.context_vector)
context_vector_reshaped = tf.reshape(self.context_vector,
[context_vector_shape[0], 1, context_vector_shape[1]]
)
enc_inp_shape = tf.shape(self.enc_inp)
self.auxiliary_inputs = tf.tile(context_vector_reshaped,
multiples=[1,enc_inp_shape[1],1]
)
with tf.variable_scope("Deocder") as decoder_varscope:
# create decoder LSTM cell
decoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('DecoderCell_{}'.format(i)):
decoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells)
dec_out_dense = Dense(units = self.output_dim,
activation = None,
use_bias = False,
kernel_initializer = tf.truncated_normal_initializer(
dtype=tf.float32,
stddev = 1.0 / math.sqrt(float(self.hidden_dim))
),
name = 'dec_outp_linear_projection'
)
training_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
inputs = self.dec_inp,
sequence_length = self.seq_length,
auxiliary_inputs = self.auxiliary_inputs, # condtional on inputs
sampling_probability = 1.0, # for fullly inference
name = 'feeding_conditional_input'
)
decoder = tf.contrib.seq2seq.BasicDecoder(
cell = self.decoder_cell,
helper = training_helper,
initial_state = dec_init_state,
output_layer = dec_out_dense
)
outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
impute_finished = True
)
self.outputs = outputs
### optimize loss part
def get_decoder_prediction(self,X,session):
feed_dict = {
self.enc_inp:X
}
feed_dict.update({self.expected_out:X})
run = [self.outputs]
return session.run(run,feed_dict=feed_dict)
context_dim = 32
output_dim = input_dim = 1
hidden_dim = 32
layers_stacked_count = 2
learning_rate = 0.01
test = RNNEncoder_Decoder(input_dim=input_dim,
context_dim=context_dim,
output_dim=output_dim,
hidden_dim=hidden_dim,
layers_stacked_count=layers_stacked_count,
learning_rate=learning_rate
)
Without "auxiliary_inputs = self.auxiliary_inputs", it running successfully,
But with auxiliary_inputs = self.auxiliary_inputs I got following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-02522a01f0d8> in <module>()
9 hidden_dim=hidden_dim,
10 layers_stacked_count=layers_stacked_count,
---> 11 learning_rate=learning_rate
12 )
<ipython-input-2-86494b8d99fa> in __init__(self, input_dim, context_dim, output_dim, hidden_dim, layers_stacked_count, learning_rate)
98
99 outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
--> 100 impute_finished = True
101 )
102 self.outputs = outputs
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
284 ],
285 parallel_iterations=parallel_iterations,
--> 286 swap_memory=swap_memory)
287
288 final_outputs_ta = res[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2776 return result
2777
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2602 self.Enter()
2603 original_body_result, exit_vars = self._BuildLoop(
-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants)
2605 finally:
2606 self.Exit()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2552 structure=original_loop_vars,
2553 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554 body_result = body(*packed_vars_for_body)
2555 if not nest.is_sequence(body_result):
2556 body_result = [body_result]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
232 """
233 (next_outputs, decoder_state, next_inputs,
--> 234 decoder_finished) = decoder.step(time, inputs, state)
235 next_finished = math_ops.logical_or(decoder_finished, finished)
236 if maximum_iterations is not None:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
137 """
138 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139 cell_outputs, cell_state = self._cell(inputs, state)
140 if self._output_layer is not None:
141 cell_outputs = self._output_layer(cell_outputs)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
936 [-1, cell.state_size])
937 cur_state_pos += cell.state_size
--> 938 cur_inp, new_state = cell(cur_inp, cur_state)
939 new_states.append(new_state)
940
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
554 input_size = inputs.get_shape().with_rank(2)[1]
555 if input_size.value is None:
--> 556 raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
557 scope = vs.get_variable_scope()
558 with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
ValueError: Could not infer input size from inputs.get_shape()[-1]
I'm just getting start to use tensforflow, so could anyone help me with:
Is this a correct way to condition the last hidden state of encoder on the inputs of decoder?
and why the inputs of decoder become None after I feed the auxiliary_inputs as the error?
Just Find the mistake I made:
using "context_vector_shape" to define the shape of auxiliary_inputs tensor will result no dimension size in all as (?,?,?), which lead to "ValueError: Could not infer input size from inputs.get_shape()[-1]",
directly define the shape of auxiliary_inputs tensor as (?,?,context_dim) will solve this question.