how to condition encoder final hidden state on the inputs of RNN dynamic decoder with ScheduledOutputTrainingHelper? - dynamic

I'm trying to use tensorflow to code RDD encoder and decoder and with different length sequence inputs, so hope both encoder and decoder can be dynamic. Additionally, a decoder inputs is conditioned by the encoder final hidden states (context vector), which is similar to the Related Paper see picture a in page 3. The decoder is trying to fully inference during training with feeding previous outputs and context vector as inputs at each step.
import tensorflow as tf
import copy
import math
from tensorflow.python.layers.core import Dense
class RNNEncoder_Decoder(object):
def __init__(self,input_dim,
context_dim,output_dim,hidden_dim,
layers_stacked_count,learning_rate):
self.graph = tf.get_default_graph()
self.input_dim = input_dim
self.output_dim = output_dim
self.context_dim = context_dim
self.hidden_dim = hidden_dim
self.layers_stacked_count = layers_stacked_count
self.learning_rate = learning_rate
self.sampling_probability = tf.constant(dtype=tf.float32,value=1.0)
# [batch_size,sequence_length,input_dimension]
self.enc_inp = tf.placeholder(tf.float32, [None,None,self.input_dim], name='encoder_inputs')
self.expected_out = tf.placeholder(tf.float32, [None,None,self.input_dim], name='expected_outs')
# fullly inference during trianing
self.dec_inp = tf.zeros_like(self.expected_out,dtype=tf.float32,name='decoder_inputs')
seq_length = tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(self.enc_inp), 2)), 1)
self.seq_length = tf.cast(seq_length, tf.int32)
with tf.variable_scope('RNNEncoderDecoder'):
with tf.variable_scope("Enocder") as encoder_varscope:
# create encoder LSTM cell
encoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('EncoderCell_{}'.format(i)):
encoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.encoder_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)
# ruuning dynamic rnn encoder
_, enc_state = tf.nn.dynamic_rnn(cell = self.encoder_cell,
initial_state=None,
dtype=tf.float32,
inputs = self.enc_inp,
sequence_length = self.seq_length
)
# extract top layer hidden state as feature representation
self.context_vector = enc_state[-1].h
cell_state0 = tf.zeros_like(enc_state[0].c,dtype=tf.float32)
hidden_state0 = tf.zeros_like(enc_state[0].h,dtype=tf.float32)
dec_init_state = (enc_state[1], # pass the top layer state of enocder to the bottom layer of decoder
tf.nn.rnn_cell.LSTMStateTuple(cell_state0, hidden_state0))
# condition extracted features on decoder inputs
# with a shape that matches decoder inputs in all but (potentially) the final dimension.
# tile context vector from [batch_size,context_dim] to [batch_size,decoder_sequence_length,context_dim]
context_vector_shape = tf.shape(self.context_vector)
context_vector_reshaped = tf.reshape(self.context_vector,
[context_vector_shape[0], 1, context_vector_shape[1]]
)
enc_inp_shape = tf.shape(self.enc_inp)
self.auxiliary_inputs = tf.tile(context_vector_reshaped,
multiples=[1,enc_inp_shape[1],1]
)
with tf.variable_scope("Deocder") as decoder_varscope:
# create decoder LSTM cell
decoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('DecoderCell_{}'.format(i)):
decoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells)
dec_out_dense = Dense(units = self.output_dim,
activation = None,
use_bias = False,
kernel_initializer = tf.truncated_normal_initializer(
dtype=tf.float32,
stddev = 1.0 / math.sqrt(float(self.hidden_dim))
),
name = 'dec_outp_linear_projection'
)
training_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
inputs = self.dec_inp,
sequence_length = self.seq_length,
auxiliary_inputs = self.auxiliary_inputs, # condtional on inputs
sampling_probability = 1.0, # for fullly inference
name = 'feeding_conditional_input'
)
decoder = tf.contrib.seq2seq.BasicDecoder(
cell = self.decoder_cell,
helper = training_helper,
initial_state = dec_init_state,
output_layer = dec_out_dense
)
outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
impute_finished = True
)
self.outputs = outputs
### optimize loss part
def get_decoder_prediction(self,X,session):
feed_dict = {
self.enc_inp:X
}
feed_dict.update({self.expected_out:X})
run = [self.outputs]
return session.run(run,feed_dict=feed_dict)
context_dim = 32
output_dim = input_dim = 1
hidden_dim = 32
layers_stacked_count = 2
learning_rate = 0.01
test = RNNEncoder_Decoder(input_dim=input_dim,
context_dim=context_dim,
output_dim=output_dim,
hidden_dim=hidden_dim,
layers_stacked_count=layers_stacked_count,
learning_rate=learning_rate
)
Without "auxiliary_inputs = self.auxiliary_inputs", it running successfully,
But with auxiliary_inputs = self.auxiliary_inputs I got following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-02522a01f0d8> in <module>()
9 hidden_dim=hidden_dim,
10 layers_stacked_count=layers_stacked_count,
---> 11 learning_rate=learning_rate
12 )
<ipython-input-2-86494b8d99fa> in __init__(self, input_dim, context_dim, output_dim, hidden_dim, layers_stacked_count, learning_rate)
98
99 outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
--> 100 impute_finished = True
101 )
102 self.outputs = outputs
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
284 ],
285 parallel_iterations=parallel_iterations,
--> 286 swap_memory=swap_memory)
287
288 final_outputs_ta = res[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2776 return result
2777
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2602 self.Enter()
2603 original_body_result, exit_vars = self._BuildLoop(
-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants)
2605 finally:
2606 self.Exit()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2552 structure=original_loop_vars,
2553 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554 body_result = body(*packed_vars_for_body)
2555 if not nest.is_sequence(body_result):
2556 body_result = [body_result]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
232 """
233 (next_outputs, decoder_state, next_inputs,
--> 234 decoder_finished) = decoder.step(time, inputs, state)
235 next_finished = math_ops.logical_or(decoder_finished, finished)
236 if maximum_iterations is not None:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
137 """
138 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139 cell_outputs, cell_state = self._cell(inputs, state)
140 if self._output_layer is not None:
141 cell_outputs = self._output_layer(cell_outputs)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
936 [-1, cell.state_size])
937 cur_state_pos += cell.state_size
--> 938 cur_inp, new_state = cell(cur_inp, cur_state)
939 new_states.append(new_state)
940
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
554 input_size = inputs.get_shape().with_rank(2)[1]
555 if input_size.value is None:
--> 556 raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
557 scope = vs.get_variable_scope()
558 with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
ValueError: Could not infer input size from inputs.get_shape()[-1]
I'm just getting start to use tensforflow, so could anyone help me with:
Is this a correct way to condition the last hidden state of encoder on the inputs of decoder?
and why the inputs of decoder become None after I feed the auxiliary_inputs as the error?

Just Find the mistake I made:
using "context_vector_shape" to define the shape of auxiliary_inputs tensor will result no dimension size in all as (?,?,?), which lead to "ValueError: Could not infer input size from inputs.get_shape()[-1]",
directly define the shape of auxiliary_inputs tensor as (?,?,context_dim) will solve this question.

Related

Tensorflow mixed_precision error `x` and `y` must have the same dtype, got tf.float16 != tf.float32

mixed_precision.set_global_policy(policy="mixed_float16") gives an error when I add this line
error =
TypeError Traceback (most recent call
last) in
5 #mixed_precision.set_global_policy(policy="float32")
6 input_shape = (224, 224, 3)
----> 7 base_model = tf.keras.applications.EfficientNetB0(include_top=False)
8 base_model.trainable = False # freeze base model layers
9
4 frames
/usr/local/lib/python3.7/dist-packages/keras/applications/efficientnet.py
in EfficientNetB0(include_top, weights, input_tensor, input_shape,
pooling, classes, classifier_activation, **kwargs)
559 classes=classes,
560 classifier_activation=classifier_activation,
--> 561 **kwargs)
562
563
/usr/local/lib/python3.7/dist-packages/keras/applications/efficientnet.py
in EfficientNet(width_coefficient, depth_coefficient, default_size,
dropout_rate, drop_connect_rate, depth_divisor, activation,
blocks_args, model_name, include_top, weights, input_tensor,
input_shape, pooling, classes, classifier_activation)
332 # original implementation.
333 # See https://github.com/tensorflow/tensorflow/issues/49930 for more details
--> 334 x = x / tf.math.sqrt(IMAGENET_STDDEV_RGB)
335
336 x = layers.ZeroPadding2D(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/traceback_utils.py
in error_handler(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.traceback)
--> 153 raise e.with_traceback(filtered_tb) from None
154 finally:
155 del filtered_tb
/usr/local/lib/python3.7/dist-packages/keras/layers/core/tf_op_layer.py
in handle(self, op, args, kwargs)
105 isinstance(x, keras_tensor.KerasTensor)
106 for x in tf.nest.flatten([args, kwargs])):
--> 107 return TFOpLambda(op)(*args, **kwargs)
108 else:
109 return self.NOT_SUPPORTED
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py
in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.traceback)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
TypeError: Exception encountered when calling layer
"tf.math.truediv_3" (type TFOpLambda).
x and y must have the same dtype, got tf.float16 != tf.float32.
Call arguments received by layer "tf.math.truediv_3" (type
TFOpLambda): • x=tf.Tensor(shape=(None, None, None, 3),
dtype=float16) • y=tf.Tensor(shape=(3,), dtype=float32) •
name=None
this is code =
from tensorflow.keras import layers
# Create base model
mixed_precision.set_global_policy(policy="mixed_float16")
input_shape = (224, 224, 3)
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False # freeze base model layers
# Create Functional model
inputs = layers.Input(shape=input_shape, name="input_layer")
# Note: EfficientNetBX models have rescaling built-in but if your model didn't you could have a layer like below
# x = layers.Rescaling(1./255)(x)
x = base_model(inputs, training=False) # set base_model to inference mode only
x = layers.GlobalAveragePooling2D(name="pooling_layer")(x)
x = layers.Dense(len(class_names))(x) # want one output neuron per class
# Separate activation of output layer so we can output float32 activations
outputs = layers.Activation("softmax", dtype=tf.float32, name="softmax_float32")(x)
model = tf.keras.Model(inputs, outputs)
# Compile the model
model.compile(loss="sparse_categorical_crossentropy", # Use sparse_categorical_crossentropy when labels are *not* one-hot
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])
When I change this line with float32 instead of mixed_float16,like
this mixed_precision.set_global_policy(policy="float32") the
error goes away. I want to use Mixed_precision, how can I do it?

Why flatten() is not working in co-lab whereas it worked in kaggle-notebook posted by other user?

I am working on a project for pneumonia detection. I have looked over kaggle for notebooks on the same. there was a user who stacked two pretrained model densenet169 and mobilenet. I copies whole kaggle notebook from the user where he didn't get any error, but when I ran it in google colab I get this error in this part:
part where error is:
from keras.layers.merge import concatenate
from keras.layers import Input
import tensorflow as tf
input_shape = (224,224,3)
input_layer = Input(shape = (224, 224, 3))
#first model
base_mobilenet = MobileNetV2(weights = 'imagenet', include_top = False, input_shape = input_shape)
base_densenet = DenseNet169(weights = 'imagenet', include_top = False, input_shape = input_shape)
for layer in base_mobilenet.layers:
layer.trainable = False
for layer in base_densenet.layers:
layer.trainable = False
model_mobilenet = base_mobilenet(input_layer)
model_mobilenet = GlobalAveragePooling2D()(model_mobilenet)
output_mobilenet = Flatten()(model_mobilenet)
model_densenet = base_densenet(input_layer)
model_densenet = GlobalAveragePooling2D()(model_densenet)
output_densenet = Flatten()(model_densenet)
merged = tf.keras.layers.Concatenate()([output_mobilenet, output_densenet])
x = BatchNormalization()(merged)
x = Dense(256,activation = 'relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(128,activation = 'relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation = 'sigmoid')(x)
stacked_model = tf.keras.models.Model(inputs = input_layer, outputs = x)
Error Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-35-69c389bc7252> in <module>()
18 model_mobilenet = base_mobilenet(input_layer)
19 model_mobilenet = GlobalAveragePooling2D()(model_mobilenet)
---> 20 output_mobilenet = Flatten(data_format=None)(model_mobilenet)
21
22 model_densenet = base_densenet(input_layer)
5 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1031
1032 if self._activity_regularizer:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
672 # Full static shape is guaranteed to be available.
673 # Performance: Using `constant_op` is much faster than passing a list.
--> 674 flattened_shape = constant_op.constant([inputs.shape[0], -1])
675 return array_ops.reshape(inputs, flattened_shape)
676 else:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
263 """
264 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 265 allow_broadcast=True)
266
267
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
274 with trace.Trace("tf.constant"):
275 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 276 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
277
278 g = ops.get_default_graph()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
299 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
300 """Implementation of eager constant."""
--> 301 t = convert_to_eager_tensor(value, ctx, dtype)
302 if shape is None:
303 return t
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
You have mixed up your imports a bit.
Here is a fixed version of your code
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Flatten, BatchNormalization, Dense, Dropout
from tensorflow.keras.applications import MobileNetV2, DenseNet169
import tensorflow as tf
input_shape = (224,224,3)
input_layer = Input(shape = (224, 224, 3))
#first model
base_mobilenet = MobileNetV2(weights = 'imagenet', include_top = False, input_shape = input_shape)
base_densenet = DenseNet169(weights = 'imagenet', include_top = False, input_shape = input_shape)
for layer in base_mobilenet.layers:
layer.trainable = False
for layer in base_densenet.layers:
layer.trainable = False
model_mobilenet = base_mobilenet(input_layer)
model_mobilenet = GlobalAveragePooling2D()(model_mobilenet)
output_mobilenet = Flatten()(model_mobilenet)
model_densenet = base_densenet(input_layer)
model_densenet = GlobalAveragePooling2D()(model_densenet)
output_densenet = Flatten()(model_densenet)
merged = tf.keras.layers.Concatenate()([output_mobilenet, output_densenet])
x = BatchNormalization()(merged)
x = Dense(256,activation = 'relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(128,activation = 'relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation = 'sigmoid')(x)
stacked_model = tf.keras.models.Model(inputs = input_layer, outputs = x)

I am getting OOM while running PRE TRAINED Bert Model with new dataset with 20k

I have pre trained model with Accuracy of 96 with 2 epochs and I am trying to use that model on new dataset of 20k tweets for sentiment analysis. while doing that I am getting below error.
I haven't faced any issues while training model with same size of data but not sure why I am getting while using that model.
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]
Code:
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import InputExample,InputFeatures
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model.summary()
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
bert (TFBertMainLayer) multiple 109482240
_________________________________________________________________
dropout_37 (Dropout) multiple 0
_________________________________________________________________
classifier (Dense) multiple 1538
=================================================================
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
train = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='training',seed=123)
test = tf.keras.preprocessing.text_dataset_from_directory('aclImdb/train',batch_size=30000,validation_split=0.2,
subset='validation',seed=123)
Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
for data in train.take(1):
train_feat = data[0].numpy()
train_lab = data[1].numpy()
train = pd.DataFrame([train_feat,train_lab]).T
train.columns = ['DATA_COLUMN','LABEL_COLUMN']
train['DATA_COLUMN'] = train['DATA_COLUMN'].str.decode('utf-8')
for data in test.take(1):
test_feat = data[0].numpy()
test_lab = data[1].numpy()
test = pd.DataFrame([test_feat,test_lab]).T
test.columns = ['DATA_COLUMN','LABEL_COLUMN']
test['DATA_COLUMN'] = test['DATA_COLUMN'].str.decode('utf-8')
test.head()
def convert_data_to_examples(train, test, DATA_COLUMN, LABEL_COLUMN):
train_InputExamples = train.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
validation_InputExamples = test.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
return train_InputExamples, validation_InputExamples
train_InputExamples, validation_InputExamples = convert_data_to_examples(train,
test,
'DATA_COLUMN',
'LABEL_COLUMN')
def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):
features = [] # -> will hold InputFeatures to be converted later
for e in examples:
# Documentation is really strong for this method, so please take a look at it
input_dict = tokenizer.encode_plus(
e.text_a,
add_special_tokens=True,
max_length=max_length, # truncates if len(s) > max_length
return_token_type_ids=True,
return_attention_mask=True,
pad_to_max_length=True, # pads to the right by default # CHECK THIS for pad_to_max_length
truncation=True
)
input_ids, token_type_ids, attention_mask = (input_dict["input_ids"],
input_dict["token_type_ids"], input_dict['attention_mask'])
features.append(
InputFeatures(
input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label
)
)
def gen():
for f in features:
yield (
{
"input_ids": f.input_ids,
"attention_mask": f.attention_mask,
"token_type_ids": f.token_type_ids,
},
f.label,
)
return tf.data.Dataset.from_generator(
gen,
({"input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32}, tf.int64),
(
{
"input_ids": tf.TensorShape([None]),
"attention_mask": tf.TensorShape([None]),
"token_type_ids": tf.TensorShape([None]),
},
tf.TensorShape([]),
),
)
DATA_COLUMN = 'DATA_COLUMN'
LABEL_COLUMN = 'LABEL_COLUMN'
# We can call the functions we created above with the following lines:
train_InputExamples,validation_InputExamples = convert_data_to_examples(train,test,DATA_COLUMN,LABEL_COLUMN)
train_data = convert_examples_to_tf_dataset(list(train_InputExamples),tokenizer)
train_data = train_data.shuffle(100).batch(32).repeat(2)
validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples),tokenizer)
validation_data = validation_data.shuffle(100).batch(32)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=2, validation_data=validation_data)
#this is my new data with 20k rows on which I want to run pretrained model:
tweets_list = statement_df['sentiment'].tolist()
#this part of the code is serving that purpose
tf_batch = tokenizer(tweets_list, max_length=128, padding=True, truncation=True, return_tensors='tf')
#print(tf_batch)
tf_outputs = model(tf_batch) # this line is thrown OOM issues
tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
labels = ['Negative','Positive']
label = tf.argmax(tf_predictions, axis=1)
label = label.numpy()
for i in range(len(tweets_list)):
print(tweets_list[i], ": \n", labels[label[i]])
Error:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather_v2(params, indices, validate_indices, axis, batch_dims, name)
4830 name=name,
4831 axis=axis,
-> 4832 batch_dims=batch_dims)
4833
4834
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py in gather(***failed resolving arguments***)
4811 # TODO(apassos) find a less bad way of detecting resource variables
4812 # without introducing a circular dependency.
-> 4813 return params.sparse_read(indices, name=name)
4814 except AttributeError:
4815 return gen_array_ops.gather_v2(params, indices, axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/resource_variable_ops.py in sparse_read(self, indices, name)
701 variable_accessed(self)
702 value = gen_resource_variable_ops.resource_gather(
--> 703 self._handle, indices, dtype=self._dtype, name=name)
704
705 if self._dtype == dtypes.variant:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_resource_variable_ops.py in resource_gather(resource, indices, dtype, batch_dims, validate_indices, name)
547 return _result
548 except _core._NotOkStatusException as e:
--> 549 _ops.raise_from_not_ok_status(e, name)
550 except _core._FallbackException:
551 pass
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6860 message = e.message + (" name: " + name if name is not None else "")
6861 # pylint: disable=protected-access
-> 6862 six.raise_from(core._status_to_exception(e.code, message), None)
6863 # pylint: enable=protected-access
6864
/usr/local/lib/python3.7/dist-packages/six.py in raise_from(value, from_value)
ResourceExhaustedError: OOM when allocating tensor with shape[1079190,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResourceGather]

`_UserObject` object has no attribute `call_and return_conditional_losses`

I am trying to use multistep training. The prediction of the first neural network is used as input for the second neural network. So I need to load the first neural network and call prediction while using the second neural network. I need the first neural network's graph /function and use it in the second neural network.
#neural network class
class Linear(Model):
def __init__(self, n_layers, activate = "tanh", dtype = "float32"):
super(Linear, self).__init__()
self.activation = activate
self.title=title
self.model = Sequential()
self.n_layers=n_layers
#self.model._set_inputs(inputs)
num_layers = len(n_layers)
for l in range(0, num_layers-1):
name = "layer_" + str(l)
m = Dense(n_layers[l+1], input_shape=(n_layers[l],), activation = self.activation, name = name, dtype=dtype)
self.model.add(m)
name = "layer_" + str(num_layers-1)
m = Dense(n_layers[-1], input_shape=(n_layers[-2],), name = name, dtype=dtype)
self.model.add(m)
#tf.function
def __call__(self, X):
Y = self.model(X)
#Pred=self.model.predict(X)
return Y
#Saving and loading
def save(self,name):
tf.saved_model.save(self.model_NN,'saved_model/')
def load(self,name):
restored_saved_model=keras.models.load_model('saved_model/')
return restored_saved_model
#I am training models one after another and using pretection of first model as input of second. So I need to load and save.
def train(self, epoch,multistepping):
for l in range(0, self.number_of_NN):
print("making neural network object",l)
self.NN_list.append(self.net)
for l in range(0,self.number_of_NN):
model = self.pde(self.NN_list[l],self.D,self.dt,self.q, self.cond_i,self.cond_b, self.lr, self.lr_schedule, self.dtype)
print(" training model number ",l)
model.train(epoch[l])
model.save(l)
model.title=l
#model_old=copy.copy(model)
#Bring it outside
def function(xy):
#load weights from previous model
if l>0:
temp_model=model.load(l-1)
x_1 = xy[:, 0][:, None]
y_1 = xy[:, 1][:, None]
U = temp_model.predict(x_1, y_1)
U=U[:,-1]
return np.asarray(U)
if multistepping==1:
self.cond_i.u_func = function
#The error message I am prompted
~/Desktop/V3/v3/v2/Ishrak/pde_d_Poisson_2D_v3.py in load(self, name)
131
132 def load(self,name):
--> 133 restored_saved_model=keras.models.load_model('saved_model/')
134 return restored_saved_model
135 #Have to check
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/save.py in load_model(filepath, custom_objects, compile)
148 if isinstance(filepath, six.string_types):
149 loader_impl.parse_saved_model(filepath)
--> 150 return saved_model_load.load(filepath, compile)
151
152 raise IOError(
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in load(path, compile)
87 # TODO(kathywu): Add saving/loading of optimizer, compiled losses and metrics.
88 # TODO(kathywu): Add code to load from objects that contain all endpoints
---> 89 model = tf_load.load_internal(path, loader_cls=KerasObjectLoader)
90
91 # pylint: disable=protected-access
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/saved_model/load.py in load_internal(export_dir, tags, loader_cls)
550 loader = loader_cls(object_graph_proto,
551 saved_model_proto,
--> 552 export_dir)
553 root = loader.get(0)
554 root.tensorflow_version = meta_graph_def.meta_info_def.tensorflow_version
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in __init__(self, *args, **kwargs)
117 def __init__(self, *args, **kwargs):
118 super(KerasObjectLoader, self).__init__(*args, **kwargs)
--> 119 self._finalize()
120
121 def _finalize(self):
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in _finalize(self)
137 for node in self._nodes:
138 if isinstance(node, RevivedNetwork):
--> 139 call_fn = node.keras_api.call_and_return_conditional_losses
140 if call_fn.input_signature is None:
141 inputs = infer_inputs_from_restored_call_function(call_fn)
AttributeError: '_UserObject' object has no attribute 'call_and_return_conditional_losses'
How do I save and load a TensorFlow model in this scenario?

combined multiple tensorflow dataset?

i have image dataset that i apply couple augmentation methods and create a separate dataset for each method, then i concatenate all datasets to one and used in CNN, however i am getting this error
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in call(self, ctx, args)
415 attrs=("executor_type", executor_type,
416 "config_proto", config),
--> 417 ctx=ctx)
418 # Replace empty list with None
419 outputs = outputs or None
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 if any(ops._is_keras_symbolic_tensor(x) for x in inputs):
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: logits and labels must have the same first dimension, got logits shape [100352,3] and labels shape [32]
[[{{node loss/dense_1_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_keras_scratch_graph_2206]
code:
path_train_ds = tf.data.Dataset.from_tensor_slices(X_train)
label_train_ds = tf.data.Dataset.from_tensor_slices(tf.cast(y_train, tf.int64))
image_train_ds = path_train_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
image_label_train_ds = tf.data.Dataset.zip((image_train_ds, label_train_ds))
def random_flip_up_down(img,label):
tf_img = tf.image.random_flip_up_down(img,1)
return tf_img,label
image_label_train_random_flip_up_down=image_label_train_ds.map(random_flip_up_down)
def random_saturation(img,label):
tf_img = tf.image.random_saturation(img,0.3,0.8,1)
return tf_img,label
image_label_train_random_saturation =image_label_train_ds.map(random_saturation)
def concet(ds):
ds0 = ds[0]
for ds1 in ds[1:]:
ds0 = ds0.concatenate(ds1)
return ds0
ds =[image_label_train_random_flip_up_down,image_label_train_random_saturation]
image_label_ds_aug = concet(ds)
model_4.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
history_4 = model_4.fit(image_label_ds_aug, epochs=8, steps_per_epoch=math.ceil(10000/BATCH_SIZE),
validation_data=(image_label_test_ds),callbacks = [MetricsCheckpoint('logs')])