`_UserObject` object has no attribute `call_and return_conditional_losses` - tensorflow2.0

I am trying to use multistep training. The prediction of the first neural network is used as input for the second neural network. So I need to load the first neural network and call prediction while using the second neural network. I need the first neural network's graph /function and use it in the second neural network.
#neural network class
class Linear(Model):
def __init__(self, n_layers, activate = "tanh", dtype = "float32"):
super(Linear, self).__init__()
self.activation = activate
self.title=title
self.model = Sequential()
self.n_layers=n_layers
#self.model._set_inputs(inputs)
num_layers = len(n_layers)
for l in range(0, num_layers-1):
name = "layer_" + str(l)
m = Dense(n_layers[l+1], input_shape=(n_layers[l],), activation = self.activation, name = name, dtype=dtype)
self.model.add(m)
name = "layer_" + str(num_layers-1)
m = Dense(n_layers[-1], input_shape=(n_layers[-2],), name = name, dtype=dtype)
self.model.add(m)
#tf.function
def __call__(self, X):
Y = self.model(X)
#Pred=self.model.predict(X)
return Y
#Saving and loading
def save(self,name):
tf.saved_model.save(self.model_NN,'saved_model/')
def load(self,name):
restored_saved_model=keras.models.load_model('saved_model/')
return restored_saved_model
#I am training models one after another and using pretection of first model as input of second. So I need to load and save.
def train(self, epoch,multistepping):
for l in range(0, self.number_of_NN):
print("making neural network object",l)
self.NN_list.append(self.net)
for l in range(0,self.number_of_NN):
model = self.pde(self.NN_list[l],self.D,self.dt,self.q, self.cond_i,self.cond_b, self.lr, self.lr_schedule, self.dtype)
print(" training model number ",l)
model.train(epoch[l])
model.save(l)
model.title=l
#model_old=copy.copy(model)
#Bring it outside
def function(xy):
#load weights from previous model
if l>0:
temp_model=model.load(l-1)
x_1 = xy[:, 0][:, None]
y_1 = xy[:, 1][:, None]
U = temp_model.predict(x_1, y_1)
U=U[:,-1]
return np.asarray(U)
if multistepping==1:
self.cond_i.u_func = function
#The error message I am prompted
~/Desktop/V3/v3/v2/Ishrak/pde_d_Poisson_2D_v3.py in load(self, name)
131
132 def load(self,name):
--> 133 restored_saved_model=keras.models.load_model('saved_model/')
134 return restored_saved_model
135 #Have to check
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/save.py in load_model(filepath, custom_objects, compile)
148 if isinstance(filepath, six.string_types):
149 loader_impl.parse_saved_model(filepath)
--> 150 return saved_model_load.load(filepath, compile)
151
152 raise IOError(
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in load(path, compile)
87 # TODO(kathywu): Add saving/loading of optimizer, compiled losses and metrics.
88 # TODO(kathywu): Add code to load from objects that contain all endpoints
---> 89 model = tf_load.load_internal(path, loader_cls=KerasObjectLoader)
90
91 # pylint: disable=protected-access
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/saved_model/load.py in load_internal(export_dir, tags, loader_cls)
550 loader = loader_cls(object_graph_proto,
551 saved_model_proto,
--> 552 export_dir)
553 root = loader.get(0)
554 root.tensorflow_version = meta_graph_def.meta_info_def.tensorflow_version
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in __init__(self, *args, **kwargs)
117 def __init__(self, *args, **kwargs):
118 super(KerasObjectLoader, self).__init__(*args, **kwargs)
--> 119 self._finalize()
120
121 def _finalize(self):
~/anaconda3/envs/tf2.1/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/saved_model/load.py in _finalize(self)
137 for node in self._nodes:
138 if isinstance(node, RevivedNetwork):
--> 139 call_fn = node.keras_api.call_and_return_conditional_losses
140 if call_fn.input_signature is None:
141 inputs = infer_inputs_from_restored_call_function(call_fn)
AttributeError: '_UserObject' object has no attribute 'call_and_return_conditional_losses'
How do I save and load a TensorFlow model in this scenario?

Related

Tensorflow mixed_precision error `x` and `y` must have the same dtype, got tf.float16 != tf.float32

mixed_precision.set_global_policy(policy="mixed_float16") gives an error when I add this line
error =
TypeError Traceback (most recent call
last) in
5 #mixed_precision.set_global_policy(policy="float32")
6 input_shape = (224, 224, 3)
----> 7 base_model = tf.keras.applications.EfficientNetB0(include_top=False)
8 base_model.trainable = False # freeze base model layers
9
4 frames
/usr/local/lib/python3.7/dist-packages/keras/applications/efficientnet.py
in EfficientNetB0(include_top, weights, input_tensor, input_shape,
pooling, classes, classifier_activation, **kwargs)
559 classes=classes,
560 classifier_activation=classifier_activation,
--> 561 **kwargs)
562
563
/usr/local/lib/python3.7/dist-packages/keras/applications/efficientnet.py
in EfficientNet(width_coefficient, depth_coefficient, default_size,
dropout_rate, drop_connect_rate, depth_divisor, activation,
blocks_args, model_name, include_top, weights, input_tensor,
input_shape, pooling, classes, classifier_activation)
332 # original implementation.
333 # See https://github.com/tensorflow/tensorflow/issues/49930 for more details
--> 334 x = x / tf.math.sqrt(IMAGENET_STDDEV_RGB)
335
336 x = layers.ZeroPadding2D(
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/traceback_utils.py
in error_handler(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.traceback)
--> 153 raise e.with_traceback(filtered_tb) from None
154 finally:
155 del filtered_tb
/usr/local/lib/python3.7/dist-packages/keras/layers/core/tf_op_layer.py
in handle(self, op, args, kwargs)
105 isinstance(x, keras_tensor.KerasTensor)
106 for x in tf.nest.flatten([args, kwargs])):
--> 107 return TFOpLambda(op)(*args, **kwargs)
108 else:
109 return self.NOT_SUPPORTED
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py
in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.traceback)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
TypeError: Exception encountered when calling layer
"tf.math.truediv_3" (type TFOpLambda).
x and y must have the same dtype, got tf.float16 != tf.float32.
Call arguments received by layer "tf.math.truediv_3" (type
TFOpLambda): • x=tf.Tensor(shape=(None, None, None, 3),
dtype=float16) • y=tf.Tensor(shape=(3,), dtype=float32) •
name=None
this is code =
from tensorflow.keras import layers
# Create base model
mixed_precision.set_global_policy(policy="mixed_float16")
input_shape = (224, 224, 3)
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False # freeze base model layers
# Create Functional model
inputs = layers.Input(shape=input_shape, name="input_layer")
# Note: EfficientNetBX models have rescaling built-in but if your model didn't you could have a layer like below
# x = layers.Rescaling(1./255)(x)
x = base_model(inputs, training=False) # set base_model to inference mode only
x = layers.GlobalAveragePooling2D(name="pooling_layer")(x)
x = layers.Dense(len(class_names))(x) # want one output neuron per class
# Separate activation of output layer so we can output float32 activations
outputs = layers.Activation("softmax", dtype=tf.float32, name="softmax_float32")(x)
model = tf.keras.Model(inputs, outputs)
# Compile the model
model.compile(loss="sparse_categorical_crossentropy", # Use sparse_categorical_crossentropy when labels are *not* one-hot
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])
When I change this line with float32 instead of mixed_float16,like
this mixed_precision.set_global_policy(policy="float32") the
error goes away. I want to use Mixed_precision, how can I do it?

IndexError when trying to run Pytorch Network

I'm trying to train my first CNN. I split the training images into train and validation data by randomly choosing indices and using Subset and DataLoader. The validation and training splits don't have any of the same indices, so that's not the problem. They also cover the entire dataset.
train = datasets.ImageFolder('train_images', transform=transform)
torch.manual_seed(37)
val_split = random.sample(range(len(img_sizes)), int(0.1 * len(img_sizes)))
train_split = [x for x in range(len(img_sizes)) if x not in val_split]
train_data = Subset(train, train_split)
val_data = Subset(train, val_split)
train_loader = DataLoader(train_data, batch_size = 10, shuffle = True)
val_loader = DataLoader(val_data, batch_size = 10, shuffle = False)
However, when I try to enumerate through the train_loader, I get this index out of range error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_8652\2928585573.py in <module>
13
14 # Run the training batches
---> 15 for b, (X_train, y_train) in enumerate(train_loader):
16
17 # Apply the model
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataloader.py in <listcomp>(.0)
558 if self.num_workers == 0: # same-process loading
559 indices = next(self.sample_iter) # may raise StopIteration
--> 560 batch = self.collate_fn([self.dataset[i] for i in indices])
561 if self.pin_memory:
562 batch = _utils.pin_memory.pin_memory_batch(batch)
D:\dum\envs\pytorchenv\lib\site-packages\torch\utils\data\dataset.py in __getitem__(self, idx)
105
106 def __getitem__(self, idx):
--> 107 return self.dataset[self.indices[idx]]
108
109 def __len__(self):
D:\dum\envs\pytorchenv\lib\site-packages\torchvision\datasets\folder.py in __getitem__(self, index)
129 tuple: (sample, target) where target is class_index of the target class.
130 """
--> 131 path, target = self.samples[index]
132 sample = self.loader(path)
133 if self.transform is not None:
IndexError: list index out of range
Anyone know what the problem is?

AttributeError: 'numpy.ndarray' object has no attribute 'op'

I am have a time series data and I am trying to build and train an LSTM model over it. I have 1 input and 1 Output corresponding to my model. I am trying to build a Many to Many model where Input length is exactly equal to output length.
The shape of my inputs are
print(np.shape(X))
(1700,70,401)
#(examples, Timestep, Features)
Shape of my output is
print(np.shape(Y_1))
(1700,70,3)
#(examples, Timestep, Features)
Now When I am trying to approach this problem via sequential API everything is running fine.
model = Sequential()
model.add(LSTM(32, input_shape=(70,401), return_sequences=True))
model.add(Dense(3,activation='softmax'))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),loss=tf.keras.losses.CategoricalCrossentropy())
model.fit(X, Y_1, epochs=2,verbose=1)
But When I am approaching it from the functional API approach then it is showing the error
AttributeError: 'numpy.ndarray' object has no attribute 'op'
input_layer = Input(shape=(70,401))
hidden = LSTM(32,return_sequences=True)(input_layer)
output_1 = Dense(3, activation='softmax')(hidden)
# output_2 = Dense(np.shape(Y_2)[2], activation='softmax')(hidden)
model_lstm = Model(inputs=X, outputs = Y_1)
My question is How do I resolve the error?
I can not use the sequential API to solve the problem because I want to use Multiple Outputs to train i.e. I have 2 different outputs on which I want to train(But for the scope of this question let's just assume I have one set of input and one set of output)!!
The Entire error that I am getting is
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-66-df3a5a1656f0> in <module>
----> 1 model_lstm = Model(X, Y_1)
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in __init__(self, *args, **kwargs)
144
145 def __init__(self, *args, **kwargs):
--> 146 super(Model, self).__init__(*args, **kwargs)
147 _keras_api_gauge.get_cell('model').set(True)
148 # initializing _distribution_strategy here since it is possible to call
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in __init__(self, *args, **kwargs)
165 'inputs' in kwargs and 'outputs' in kwargs):
166 # Graph network
--> 167 self._init_graph_network(*args, **kwargs)
168 else:
169 # Subclassed network
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in _init_graph_network(self, inputs, outputs, name, **kwargs)
268
269 if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs):
--> 270 base_layer_utils.create_keras_history(self._nested_outputs)
271
272 self._base_init(name=name, **kwargs)
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in create_keras_history(tensors)
182 keras_tensors: The Tensors found that came from a Keras Layer.
183 """
--> 184 _, created_layers = _create_keras_history_helper(tensors, set(), [])
185 return created_layers
186
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in _create_keras_history_helper(tensors, processed_ops, created_layers)
208 if getattr(tensor, '_keras_history', None) is not None:
209 continue
--> 210 op = tensor.op # The Op that created this Tensor.
211 if op not in processed_ops:
212 # Recursively set `_keras_history`.
AttributeError: 'numpy.ndarray' object has no attribute 'op'
Update
I tried type cast X and Y_1 to the tensor objects as suggested in the comments. It is perfectly working in the case of Sequential API but failing for Fnctional API.
X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
y_tensor=tf.convert_to_tensor(Y_1, dtype=tf.int32)
model_lstm = Model(X_tensor, y_tensor)
Error
AttributeError: Tensor.op is meaningless when eager execution is enabled.
AttributeError Traceback (most recent call last)
<ipython-input-100-d090ea2b5a90> in <module>
----> 1 model_lstm = Model(X_tensor, y_tensor)
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in __init__(self, *args, **kwargs)
144
145 def __init__(self, *args, **kwargs):
--> 146 super(Model, self).__init__(*args, **kwargs)
147 _keras_api_gauge.get_cell('model').set(True)
148 # initializing _distribution_strategy here since it is possible to call
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in __init__(self, *args, **kwargs)
165 'inputs' in kwargs and 'outputs' in kwargs):
166 # Graph network
--> 167 self._init_graph_network(*args, **kwargs)
168 else:
169 # Subclassed network
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in _init_graph_network(self, inputs, outputs, name, **kwargs)
268
269 if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs):
--> 270 base_layer_utils.create_keras_history(self._nested_outputs)
271
272 self._base_init(name=name, **kwargs)
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in create_keras_history(tensors)
182 keras_tensors: The Tensors found that came from a Keras Layer.
183 """
--> 184 _, created_layers = _create_keras_history_helper(tensors, set(), [])
185 return created_layers
186
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in _create_keras_history_helper(tensors, processed_ops, created_layers)
208 if getattr(tensor, '_keras_history', None) is not None:
209 continue
--> 210 op = tensor.op # The Op that created this Tensor.
211 if op not in processed_ops:
212 # Recursively set `_keras_history`.
/root/anaconda3/envs/TensorPy36/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in op(self)
1078 def op(self):
1079 raise AttributeError(
-> 1080 "Tensor.op is meaningless when eager execution is enabled.")
1081
1082 #property
AttributeError: Tensor.op is meaningless when eager execution is enabled.
I made a mistake in the code itself while executing the Model part of in the functional API version.
model_lstm = Model(inputs=X, outputs = Y_1)
I have given the variables inside the above part of the code only!! While in this part we just define what our model is going to be. Hence Here we will just write what is our input layer that needs to be considered and what will be my output layer here!! Input layer while constructing my model will be input_layer in the code and output layer will be output_1. Hence code should be
model_lstm = Model(inputs=input_layer, outputs = output_1)
and after that we can do
model_lstm.fit(X,Y_1)
This will work perfectly fine now!!

How do you write a custom activation function in python for Keras?

I'm trying to write a custom activation function for use with Keras. I can not write it with tensorflow primitives as it does properly compute the derivative. I followed How to make a custom activation function with only Python in Tensorflow? and it works very we in creating a tensorflow function. However, when I tried putting it into Keras as an activation function for the classic MNIST demo. I got errors. I also tried the tf_spiky function from the above reference.
Here is the sample code
tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf_spiky),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
Here's my entire error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-73a57f81db19> in <module>
3 tf.keras.layers.Dense(512, activation=tf_spiky),
4 tf.keras.layers.Dropout(0.2),
----> 5 tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
6 x=tf.keras.layers.Activation(tf_spiky)
7 y=tf.keras.layers.Flatten(input_shape=(28, 28))
/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
472 self._setattr_tracking = False # pylint: disable=protected-access
473 try:
--> 474 method(self, *args, **kwargs)
475 finally:
476 self._setattr_tracking = previous_value # pylint: disable=protected-access
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py in __init__(self, layers, name)
106 if layers:
107 for layer in layers:
--> 108 self.add(layer)
109
110 #property
/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
472 self._setattr_tracking = False # pylint: disable=protected-access
473 try:
--> 474 method(self, *args, **kwargs)
475 finally:
476 self._setattr_tracking = previous_value # pylint: disable=protected-access
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py in add(self, layer)
173 # If the model is being built continuously on top of an input layer:
174 # refresh its output.
--> 175 output_tensor = layer(self.outputs[0])
176 if isinstance(output_tensor, list):
177 raise TypeError('All layers in a Sequential model '
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
728
729 # Check input assumptions set before layer building, e.g. input rank.
--> 730 self._assert_input_compatibility(inputs)
731 if input_list and self._dtype is None:
732 try:
/opt/conda/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in _assert_input_compatibility(self, inputs)
1463 if x.shape.ndims is None:
1464 raise ValueError('Input ' + str(input_index) + ' of layer ' +
-> 1465 self.name + ' is incompatible with the layer: '
1466 'its rank is undefined, but the layer requires a '
1467 'defined rank.')
ValueError: Input 0 of layer dense_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
From this I gather the last Dense layer is unable to get the dimensions of the output after the activation function or something to that. I did see in the tensorflow code that many activation functions register a shape. But either I'm not doing that correctly or I'm going in the wrong direction. But I'm guessing something needs to be done to the tensorflow function to make it an activation function that Keras can use.
I would appreciate any help you can give.
As requested here is the sample codes for tf_spiky, it works as described in the above reference. However, once put into Keras I get the errors shown. This is pretty much as shown in the *How to make a custom activation function with only Python in Tensorflow?" stackoverflow article.
def spiky(x):
print(x)
r = x % 1
if r <= 0.5:
return r
else:
return 0
def d_spiky(x):
r = x % 1
if r <= 0.5:
return 1
else:
return 0
np_spiky = np.vectorize(spiky)
np_d_spiky = np.vectorize(d_spiky)
np_d_spiky_32 = lambda x: np_d_spiky(x).astype(np.float32)
import tensorflow as tf
from tensorflow.python.framework import ops
def tf_d_spiky(x,name=None):
with tf.name_scope(name, "d_spiky", [x]) as name:
y = tf.py_func(np_d_spiky_32,
[x],
[tf.float32],
name=name,
stateful=False)
return y[0]
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
def spikygrad(op, grad):
x = op.inputs[0]
n_gr = tf_d_spiky(x)
return grad * n_gr
np_spiky_32 = lambda x: np_spiky(x).astype(np.float32)
def tf_spiky(x, name=None):
with tf.name_scope(name, "spiky", [x]) as name:
y = py_func(np_spiky_32,
[x],
[tf.float32],
name=name,
grad=spikygrad) # <-- here's the call to the gradient
return y[0]
The solution is in this post Output from TensorFlow `py_func` has unknown rank/shape
The easiest fix is to add y[0].set_shape(x.get_shape()) before the return statement in the definition of tf_spiky.
Perhaps someone out there knows how to properly work with tensorflow shape functions. Digging around I found a unchanged_shape shape function in tensorflow.python.framework.common_shapes, which be appropriate here, but I don't know how to attach it to the tf_spiky function. Seems a python decorator is in order here. It would probably be a service to others to explain customizing tensorflow functions with shape functions.

how to condition encoder final hidden state on the inputs of RNN dynamic decoder with ScheduledOutputTrainingHelper?

I'm trying to use tensorflow to code RDD encoder and decoder and with different length sequence inputs, so hope both encoder and decoder can be dynamic. Additionally, a decoder inputs is conditioned by the encoder final hidden states (context vector), which is similar to the Related Paper see picture a in page 3. The decoder is trying to fully inference during training with feeding previous outputs and context vector as inputs at each step.
import tensorflow as tf
import copy
import math
from tensorflow.python.layers.core import Dense
class RNNEncoder_Decoder(object):
def __init__(self,input_dim,
context_dim,output_dim,hidden_dim,
layers_stacked_count,learning_rate):
self.graph = tf.get_default_graph()
self.input_dim = input_dim
self.output_dim = output_dim
self.context_dim = context_dim
self.hidden_dim = hidden_dim
self.layers_stacked_count = layers_stacked_count
self.learning_rate = learning_rate
self.sampling_probability = tf.constant(dtype=tf.float32,value=1.0)
# [batch_size,sequence_length,input_dimension]
self.enc_inp = tf.placeholder(tf.float32, [None,None,self.input_dim], name='encoder_inputs')
self.expected_out = tf.placeholder(tf.float32, [None,None,self.input_dim], name='expected_outs')
# fullly inference during trianing
self.dec_inp = tf.zeros_like(self.expected_out,dtype=tf.float32,name='decoder_inputs')
seq_length = tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(self.enc_inp), 2)), 1)
self.seq_length = tf.cast(seq_length, tf.int32)
with tf.variable_scope('RNNEncoderDecoder'):
with tf.variable_scope("Enocder") as encoder_varscope:
# create encoder LSTM cell
encoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('EncoderCell_{}'.format(i)):
encoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.encoder_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)
# ruuning dynamic rnn encoder
_, enc_state = tf.nn.dynamic_rnn(cell = self.encoder_cell,
initial_state=None,
dtype=tf.float32,
inputs = self.enc_inp,
sequence_length = self.seq_length
)
# extract top layer hidden state as feature representation
self.context_vector = enc_state[-1].h
cell_state0 = tf.zeros_like(enc_state[0].c,dtype=tf.float32)
hidden_state0 = tf.zeros_like(enc_state[0].h,dtype=tf.float32)
dec_init_state = (enc_state[1], # pass the top layer state of enocder to the bottom layer of decoder
tf.nn.rnn_cell.LSTMStateTuple(cell_state0, hidden_state0))
# condition extracted features on decoder inputs
# with a shape that matches decoder inputs in all but (potentially) the final dimension.
# tile context vector from [batch_size,context_dim] to [batch_size,decoder_sequence_length,context_dim]
context_vector_shape = tf.shape(self.context_vector)
context_vector_reshaped = tf.reshape(self.context_vector,
[context_vector_shape[0], 1, context_vector_shape[1]]
)
enc_inp_shape = tf.shape(self.enc_inp)
self.auxiliary_inputs = tf.tile(context_vector_reshaped,
multiples=[1,enc_inp_shape[1],1]
)
with tf.variable_scope("Deocder") as decoder_varscope:
# create decoder LSTM cell
decoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('DecoderCell_{}'.format(i)):
decoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells)
dec_out_dense = Dense(units = self.output_dim,
activation = None,
use_bias = False,
kernel_initializer = tf.truncated_normal_initializer(
dtype=tf.float32,
stddev = 1.0 / math.sqrt(float(self.hidden_dim))
),
name = 'dec_outp_linear_projection'
)
training_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
inputs = self.dec_inp,
sequence_length = self.seq_length,
auxiliary_inputs = self.auxiliary_inputs, # condtional on inputs
sampling_probability = 1.0, # for fullly inference
name = 'feeding_conditional_input'
)
decoder = tf.contrib.seq2seq.BasicDecoder(
cell = self.decoder_cell,
helper = training_helper,
initial_state = dec_init_state,
output_layer = dec_out_dense
)
outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
impute_finished = True
)
self.outputs = outputs
### optimize loss part
def get_decoder_prediction(self,X,session):
feed_dict = {
self.enc_inp:X
}
feed_dict.update({self.expected_out:X})
run = [self.outputs]
return session.run(run,feed_dict=feed_dict)
context_dim = 32
output_dim = input_dim = 1
hidden_dim = 32
layers_stacked_count = 2
learning_rate = 0.01
test = RNNEncoder_Decoder(input_dim=input_dim,
context_dim=context_dim,
output_dim=output_dim,
hidden_dim=hidden_dim,
layers_stacked_count=layers_stacked_count,
learning_rate=learning_rate
)
Without "auxiliary_inputs = self.auxiliary_inputs", it running successfully,
But with auxiliary_inputs = self.auxiliary_inputs I got following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-02522a01f0d8> in <module>()
9 hidden_dim=hidden_dim,
10 layers_stacked_count=layers_stacked_count,
---> 11 learning_rate=learning_rate
12 )
<ipython-input-2-86494b8d99fa> in __init__(self, input_dim, context_dim, output_dim, hidden_dim, layers_stacked_count, learning_rate)
98
99 outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
--> 100 impute_finished = True
101 )
102 self.outputs = outputs
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
284 ],
285 parallel_iterations=parallel_iterations,
--> 286 swap_memory=swap_memory)
287
288 final_outputs_ta = res[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2776 return result
2777
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2602 self.Enter()
2603 original_body_result, exit_vars = self._BuildLoop(
-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants)
2605 finally:
2606 self.Exit()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2552 structure=original_loop_vars,
2553 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554 body_result = body(*packed_vars_for_body)
2555 if not nest.is_sequence(body_result):
2556 body_result = [body_result]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
232 """
233 (next_outputs, decoder_state, next_inputs,
--> 234 decoder_finished) = decoder.step(time, inputs, state)
235 next_finished = math_ops.logical_or(decoder_finished, finished)
236 if maximum_iterations is not None:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
137 """
138 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139 cell_outputs, cell_state = self._cell(inputs, state)
140 if self._output_layer is not None:
141 cell_outputs = self._output_layer(cell_outputs)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
936 [-1, cell.state_size])
937 cur_state_pos += cell.state_size
--> 938 cur_inp, new_state = cell(cur_inp, cur_state)
939 new_states.append(new_state)
940
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
554 input_size = inputs.get_shape().with_rank(2)[1]
555 if input_size.value is None:
--> 556 raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
557 scope = vs.get_variable_scope()
558 with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
ValueError: Could not infer input size from inputs.get_shape()[-1]
I'm just getting start to use tensforflow, so could anyone help me with:
Is this a correct way to condition the last hidden state of encoder on the inputs of decoder?
and why the inputs of decoder become None after I feed the auxiliary_inputs as the error?
Just Find the mistake I made:
using "context_vector_shape" to define the shape of auxiliary_inputs tensor will result no dimension size in all as (?,?,?), which lead to "ValueError: Could not infer input size from inputs.get_shape()[-1]",
directly define the shape of auxiliary_inputs tensor as (?,?,context_dim) will solve this question.