Unable to create group (name already exists) - tensorflow

import tensorflow as tf
from ..models.encoder import encoder_build
from ..models.decoder import decoder_build
def compute_attention_maps(inputs,name,upsample=False):
attMap = tf.reduce_sum(tf.square(inputs),axis=-1,keepdims=True,name= str(name)+"reducSum")
if upsample:
attMap = tf.keras.layers.UpSampling2D(size=(2, 2),
interpolation='bilinear',
name = str(name)+"bilinear")(attMap)
attMap = tf.squeeze(attMap,axis=-1,name = str(name)+"squeeze")
attMap = tf.reshape(attMap,
(tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]),
name = str(name)+"reshape")
attMap = tf.nn.softmax(attMap,
axis=-1,
name = str(name)+"spatialSoftmax")
return attMap
def compute_mse(x,y,name):
diff = tf.math.squared_difference(x,y,name = str(name)+"squError")
diff = tf.reduce_mean(diff,axis=0, name = str(name)+"mean")
diff = tf.reduce_sum(diff, name = str(name)+"sum")
return diff
def compute_distillation(attention_inputs):
inp1,inp2,inp3,inp4 = attention_inputs
attMap1 = compute_attention_maps(inp1,"attmap1_")
attMap2_upsample = compute_attention_maps(inp2,"attmap2UP_",upsample=True)
attMap2 = compute_attention_maps(inp2,"attmap2_")
attMap3_upsample = compute_attention_maps(inp3,"attmap3UP_",upsample=True)
attMap3 = compute_attention_maps(inp3,"attmap3_")
attMap4 = compute_attention_maps(inp4,"attmap4_")
distillation1 = compute_mse(attMap1,attMap2_upsample,"distil1_")
distillation2 = compute_mse(attMap2,attMap3_upsample,"distil2_")
distillation3 = compute_mse(attMap3,attMap4,"distil3_")
return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")
if __name__ == '__main__':
inputs = tf.keras.layers.Input(shape=(None, None, 3), name='image')
encoderTuple = encoder_build(inputs) # import from encoder.py file
attention_inputs = encoderTuple[1]
outputs = decoder_build(encoderTuple) # import from decoder.py file
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
model.add_loss(compute_distillation(attention_inputs))
model.summary()
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001, clipnorm=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(x = train_generator,
epochs=epochs,
verbose=1,
callbacks=callbacks,
validation_data=validation_generator,
shuffle=True)
i have created keras segmentation model for lane detection (https://arxiv.org/pdf/1908.00821.pdf). I'm able to compile, start training and save models for each epoch without any errors. But if i add my custom loss to model model.add_loss(compute_distillation(attention_inputs)) model gets trained for 1 epoch, after that model is not saving and displaying below error. How to resolve this error?
374/375 [============================>.] - ETA: 0s - loss: 4.4717 - acc: 0.9781Epoch 1/50
78/78[============================>.] - ETA: 37:38 - val_loss: 4.5855 - val_acc: 0.9758
Epoch 00001: saving model to /workspace/work/enet_sad_naiveresize/snapshot/enetNRSAD_Tusimple_L_4.4718_VL_4.5855.h5
Traceback (most recent call last):
File "/workspace/work/enet_sad_naiveresize/bin/train.py", line 82, in <module>
shuffle=True)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py", line 727, in fit
use_multiprocessing=use_multiprocessing)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 603, in fit
steps_name='steps_per_epoch')
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 332, in model_iteration
callbacks.on_epoch_end(epoch, epoch_logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 299, in on_epoch_end
callback.on_epoch_end(epoch, logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 968, in on_epoch_end
self._save_model(epoch=epoch, logs=logs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 1015, in _save_model
self.model.save(filepath, overwrite=True)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py", line 1171, in save
signatures)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py", line 109, in save_model
model, filepath, overwrite, include_optimizer)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 103, in save_model_to_hdf5
save_weights_to_hdf5_group(model_weights_group, model_layers)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 619, in save_weights_to_hdf5_group
g = f.create_group(layer.name)
File "/usr/local/lib/python3.6/dist-packages/h5py/_hl/group.py", line 68, in create_group
gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "h5py/h5g.pyx", line 161, in h5py.h5g.create
ValueError: Unable to create group (name already exists)

The issue is because you are stacking layers (and naming them wrongly) in compute_distillation function, by calling other functions such as compute_attention_maps and compute_mse.
You would have got a similar layer if you hadn't named also and the fact that the error persists even after you named them is because the h5 models expects names in a certain format as explained here https://github.com/keras-team/keras/issues/12195.
A good solution would be to use keras lambda layers in compute_distilation function to create attMap1, attMap2, etc. or define your own custom AttentionMap layer as shown below.
class AttentionMaps(tf.keras.layers.Layer):
def __init__(self, upsample=False):
super(AttentionMaps, self).__init__()
self.upsample = upsample
def call(self, inputs):
attMap = tf.reduce_sum(
tf.square(inputs),
axis=-1,
keepdims=True
)
if self.upsample:
attMap = tf.keras.layers.UpSampling2D(
size=(2, 2),
interpolation='bilinear'
)(attMap)
attMap = tf.squeeze(attMap,axis=-1)
attMap = tf.reshape(
attMap,
(tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]))
attMap = tf.nn.softmax(attMap,
axis=-1,)
return attMap
This custom layer can then be added to your model as per the example below. The names ofthe layers are no longer required, so I removed them.
def compute_distillation(attention_inputs):
inp1,inp2,inp3,inp4 = attention_inputs
attention_layer_1 = AttentionMaps()
attMap1 = attention_layer_1(inp1)
attention_layer_2 = AttentionMaps(upsample=True)
attMap2_upsample = attention_layer_2(inp2)
attention_layer_3 = AttentionMaps()
attMap2 = attention_layer_3(inp2)
attention_layer_4 = AttentionMaps(upsample=True)
attMap3_upsample = attention_layer_4(inp3)
attention_layer_5 = AttentionMaps()
attMap3 = attention_layer_5(inp3)
attention_layer_6 = AttentionMaps(upsample=True)
attMap4_upsample = attention_layer_6(inp4)
distillation1 = compute_mse(attMap1,attMap2_upsample)
distillation2 = compute_mse(attMap2,attMap3_upsample)
distillation3 = compute_mse(attMap3,attMap4_upsample)
return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")

There are a few issues opened in the keras github related to this.
https://github.com/keras-team/keras/issues/6005
https://github.com/keras-team/keras/issues/12195
This issue is not due to the custom loss function but due to the way the model is defined.
You could try out the solutions provided in the above mentioned links such as saving the model weights as a tf file rather than h5 or avoid adding the same instance of activation layer at multiple places in the model. If that doesn't resolve your issue please update the question to include the models.

Related

Keras model compiles well outside SageMaker. But as soon as i try to train it in SageMaker with the Tensorflow instance i get an error

Here is the error: ValueError: Output tensors to a Model must be the output of a TensorFlow Layer (thus holding past layer metadata)
I try to train and deploy a multi-input Keras model with AWS Sagemaker, but there seem to be some showstopper issues with the needed libraries that expect single input for Keras models.
I have 3 categorical inputs variables and one numeric variable. The target variable is also of type categorical.I have no test or validation data. I am only interested in the training without errors.
I merged the arrays after data preparation as follows and then stored them in s3
input_train = np.column_stack((input_cat1, input_cat2, input_num, input_cat3))
training_input_path = sage_maker_session.upload_data('data/training.npz', key_prefix=prefix + training_folder)
print(training_input_path)
s3://sagemaker-eu-central-1-xxxxxxxxxxxxx/user_tracking/training/training.npz
In the train.py script (entry_point), I again fetched the file from s3. And I compiled the Train.py file again without problems, as if I were outside SageMaker.
%%writefile train.py
### import library ###
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=60)
parser.add_argument('--batch-size', type=int, default=50)
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
#parser.add_argument('--model-dir', type=str)
parser.add_argument('--training', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
#parser.add_argument('--training', type=str, default='data')
args, _ = parser.parse_known_args()
epochs = args.epochs
batch_size = args.batch_size
model_dir = args.model_dir
training_dir = args.training
input_train =np.load(os.path.join(training_dir, 'training.npz'))['train_input']
target =np.load(os.path.join(training_dir, 'training.npz'))['train_output']
input_cat1 = input_train[:,0].astype(np.int32)
input_cat2 = input_train[:,1].astype(np.int32)
input_cat3 = input_train[:,3:].astype(np.int32)
input_num = input_train[:,2].astype(np.float32)
n_steps = 2 # number of timesteps in each sample
num_unique_os = 5 #len(le_betriebsystem.classes_)+1
num_unique_browser = 10 #len(le_browser.classes_)+1
num_unique_actions = 210 #len(le_actionen.classes_)+1
#numeric Input
numerical_input = tf.keras.Input(shape=(1,), name='numeric_input')
#categorical Input
os_input = tf.keras.Input(shape=(1,), name='os_input')
browser_input = tf.keras.Input(shape=(1,), name='browser_input')
action_input= tf.keras.Input(shape=(max_seq_len,), name='action_input')
emb_os = tf.keras.layers.Embedding(num_unique_os, 32)(os_input)
emb_browser = tf.keras.layers.Embedding(num_unique_browser, 32)(browser_input)
emb_actions = tf.keras.layers.Embedding(num_unique_actions, 64)(action_input)
actions_repr = tf.keras.layers.LSTM(300, return_sequences=True)(emb_actions)
actions_repr = tf.keras.layers.LSTM(200)(emb_actions)
emb_os = tf.squeeze(emb_os, axis=1)
emb_browser = tf.squeeze(emb_browser, axis=1)
activity_repr = tf.keras.layers.Concatenate()([emb_os, emb_browser, actions_repr,
numerical_input])
x = tf.keras.layers.RepeatVector(n_steps)(activity_repr)
x = tf.keras.layers.LSTM(288, return_sequences=True)(x)
next_n_actions = tf.keras.layers.Dense(num_unique_actions-1, activation='softmax')(x)
model = tf.keras.Model(inputs=[numerical_input, os_input, browser_input, action_input], outputs =
next_n_actions)
model.summary()
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
history = model.fit({'numeric_input': input_num,
'os_input': input_cat1,
'browser_input': input_cat2,
'action_input': input_cat3}, target, batch_size=50, epochs=130)
tf.saved_model.simple_save(
tf.keras.backend.get_session(),
os.path.join(model_dir, '1'),
inputs={'inputs': model.input},
outputs={t.name: t for t in model.outputs})
I received this:
Model Sommary
Meric Tendency
when trying to do the whole thing again with the Tensorflow instance, the following error occurred:
Traceback (most recent call last): File "train.py", line 105, in model = tf.keras.Model(inputs=[numerical_input, os_input, browser_input, action_input], outputs = next_n_actions) File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py", line 121, in init super(Model, self).init(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py", line 80, in init self._init_graph_network(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/checkpointable/base.py", line 474, in _method_wrapper method(self, *args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py", line 224, in _init_graph_network '(thus holding past layer metadata). Found: ' + str(x)) ValueError: Output tensors to a Model must be the output of a TensorFlow Layer (thus holding past layer metadata). Found: Tensor("dense/truediv:0", shape=(?, 2, 209), dtype=float32) 2021-03-08 21:52:04,761 sagemaker-containers ERROR ExecuteUserScriptError: Command "/usr/bin/python train.py --batch-size 50 --epochs 150--model_dir s3://sagemaker-eu-central-1-xxxxxxxxxxxxxxxxx/sagemaker-tensorflow-scriptmode
I used the Tensorflow versions '2.0.4' and '1.15.4' respectly with the kernels conda_tensorflow_p36 and conda_tensorflow2_p36
For more of Code: https://gitlab.com/patricksardin08/data-science/-/tree/master/
Please i need your Helps. I'm here around the clock if anyone wants me to explain the question in more detail.

Keras load_weights() not loading checkpoints

I have been following the RNN tutorial of Tensorflow
https://www.tensorflow.org/tutorials/text/text_generation
The model.load_weights() is not working, and is throwing the error
Traceback (most recent call last):
File "C:/Users/swati.srivastava/PycharmProjects/TensorFlow Practice/main.py", line 1232, in <module>
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
File "C:\Users\swati.srivastava\PycharmProjects\TensorFlow Practice\venv\lib\site-packages\tensorflow\python\keras\engine\training.py", line 2260, in load_weights
filepath, save_format = _detect_save_format(filepath)
File "C:\Users\swati.srivastava\PycharmProjects\TensorFlow Practice\venv\lib\site-packages\tensorflow\python\keras\engine\training.py", line 2868, in _detect_save_format
if saving_utils.is_hdf5_filepath(filepath):
File "C:\Users\swati.srivastava\PycharmProjects\TensorFlow Practice\venv\lib\site-packages\tensorflow\python\keras\saving\saving_utils.py", line 327, in is_hdf5_filepath
return (filepath.endswith('.h5') or filepath.endswith('.keras') or
AttributeError: 'tensorflow.python.util._pywrap_checkpoint_reader.C' object has no attribute 'endswith'
Process finished with exit code 1
My code is
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size)
])
return model
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True
)
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)
checkpoint_num = 2
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
model.build(tf.TensorShape([1, None]))
My project directory looks like
which means that the training checkpoints are created, and exist. None of the checkpoint files are empty.
The only solution I could find was at https://github.com/tensorflow/tensorflow/issues/38745, where it says to do save_weights_only=True, which I have already done.
I think it is some sort of version conflict, but am not sure.
Edit: Added the checkpoint_callback snippet. training_checkpoints directory is created as can be seen in the project directory image
The issue is that load_weights api expects an HDF5 format file, but as per the your code, you do not provide it.
I am assuming that you are using ModelCheckpoint API for checkpoint creation like below:
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
Then you will just provide the checkpoint path which is in your case './training_checkpoints/'
model.load_weights(checkpoint_path)
You can look at the documentation for more details.

tflite converter from a custom keras layer

I'm getting a TypeError when trying to convert a keras .h5 file to tflite.
The new layer is a gaussian kernel (Radial Basis Layer).
To be able to save and load the keras model I defined also the get_config() method in the custom layer. So I'm able to save and load the model correctly.
class RBFLayer(Layer):
def __init__(self, output_dim, centers=None, tol = 1e-6, gamma=0, **kwargs):
super(RBFLayer, self).__init__(**kwargs)
self.centers_ = centers
self.output_dim= output_dim
self.gamma_ = gamma
self.tol_ = tol
def build(self, input_shape):
self.mu = K.variable(self.centers_, name='centers')
self.gamma = K.variable(self.gamma_, name='gamma')
self.tol = K.constant(self.tol_,name='tol')
super(RBFLayer, self).build(input_shape)
def call(self, inputs): #Kernel radial
a,b = self.mu.shape
diff = K.reshape( K.tile(inputs,(1,a))-K.reshape(self.mu,(1,-1)), (-1,a,b))
l2 = K.sum(K.pow(diff, 2), axis=-1)
res = K.exp(-1 * self.gamma * l2)
mask = K.greater( res, self.tol)
return K.switch(mask, res, K.zeros_like(res))
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
def get_config(self):
config = {
'output_dim': self.output_dim,
'centers': self.centers_,
'gamma': self.gamma_,
'tol': self.tol_
}
base_config = super(RBFLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now I want to save the model to tflite. I use TFLiteConverter from keras file including also 'custom_objects'.
def save_tflite(self, base_name):
file =base_name +'.h5'
converter = tf.lite.TFLiteConverter.from_keras_model_file(file, custom_objects={'RBFLayer':RBFLayer})
tflite_model = converter.convert()
open(base_name+".tflite", "wb").write(tflite_model)
I expect to get the tflite model file including the K.variables used while training the complete model (centers, tol, gamma).
When converting I get these error messages:
airgorbn.save_tflite( base_name)
Traceback (most recent call last):
File "<ipython-input-7-cdaa1ec46233>", line 1, in <module>
airgorbn.save_tflite( base_name)
File "C:/Users/AIRFI/Hospital/keras_RadialBasis.py", line 158, in save_tflite
converter = tf.lite.TFLiteConverter.from_keras_model_file(file, custom_objects={'RBFLayer':RBFLayer})
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\lite\python\lite.py", line 747, in from_keras_model_file
keras_model = _keras.models.load_model(model_file, custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\save.py", line 146, in load_model
return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\hdf5_format.py", line 212, in load_model_from_hdf5
custom_objects=custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\saving\model_config.py", line 55, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\layers\serialization.py", line 89, in deserialize
printable_module_name='layer')
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\utils\generic_utils.py", line 192, in deserialize_keras_object
list(custom_objects.items())))
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 353, in from_config
model.add(layer)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\training\tracking\base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "C:\Users\AIRFI\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 154, in add
'Found: ' + str(layer))
TypeError: The added layer must be an instance of class Layer. Found: <__main__.RBFLayer object at 0x0000017D3A75AC50>
you need to define that layer as a custom op.
refer to this https://www.tensorflow.org/lite/guide/ops_custom

String input to categorical column via dataset

I'm trying to learn how to use the Estimator API, using input_fn to provide Dataset backed input to a feature_column generated input layer.
My code looks like
import tensorflow as tf import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
def gen():
for i in range(100000):
for j in range(10):
yield {"in": str(j)}, [str(j+1)]
data = tf.data.Dataset.from_generator(gen, ({"in": tf.string}, tf.string))
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
but running it I get
Traceback (most recent call last):
File "predict.py", line 33, in
input_fn=input_fn)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 711, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 694, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 334, in _model_fn
config=config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 190, in _dnn_model_fn
logits = logit_fn(features=features, mode=mode)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 89, in dnn_logit_fn
features=features, feature_columns=feature_columns)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 230, in input_layer
trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1837, in _get_dense_tensor
inputs, weight_collections=weight_collections, trainable=trainable)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2123, in _get_sparse_tensors
return _CategoricalColumn.IdWeightPair(inputs.get(self), None)
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1533, in get
transformed = column._transform_feature(self) # pylint: disable=protected-access
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2091, in _transform_feature
input_tensor = _to_sparse_input(inputs.get(self.key))
File "/usr/lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 1631, in _to_sparse_input
raise ValueError('Undefined input_tensor shape.')
ValueError: Undefined input_tensor shape.
Looking at the tf sources I get the impression I that the categorical_column_with_vocabulary_list expects a tensor as output instead of a string, but I have a hard time understanding how to make my input_fn provide that the right way.
Does anyone have any idea what I'm doing wrong here?
As a comparison, the following code works perfectly fine: https://pastebin.com/28QUNAjA
EDIT
I noticed that replacing tf.data.Dataset.from_generator with tf.data.Dataset.from_tensor_slices makes the code run.
I.e. the following actually works:
import tensorflow as tf
import random
tf.logging.set_verbosity(tf.logging.DEBUG)
def input_fn():
data = tf.data.Dataset.from_tensor_slices(({"in": map(lambda i: str(i), range(10))}, range(1,11)))
data = data.repeat(1000)
data = data.batch(10)
iterator = data.make_one_shot_iterator()
return iterator.get_next()
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="in",
vocabulary_list=map(lambda i: str(i), range(11)))
embedding_column = tf.feature_column.embedding_column(
categorical_column=vocabulary_feature_column,
dimension=2)
with tf.Session() as sess:
print(sess.run(input_fn()))
classifier = tf.estimator.DNNClassifier(
feature_columns = [embedding_column],
hidden_units = [5,5],
n_classes = 11,
model_dir = '/usr/local/google/home/zond/tmp/predict/snap')
classifier.train(
input_fn=input_fn)
This ought to be a bug, so I created https://github.com/tensorflow/tensorflow/issues/15178.

Cannot convert partially known tensor in TensorFlow/TFLearn

I'm a beginner to TensorFlow and still trying to figure out how it works, so I'm not sure if the error is a problem with my architecture or something more basic -- here I'm trying to train a siamese neural network (we feed a left and right input into left and right NN with identical weights, and try to map it to feature vectors that have small distance if the inputs are similar and large distance if the inputs are different).
The error I get occurs at the regression step:
File "siamese.py", line 59, in <module>
network = regression(y_pred, optimizer='adam',
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tflearn/models/dnn.py", line 63, in __init__
best_val_accuracy=best_val_accuracy)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tflearn/helpers/trainer.py", line 120, in __init__
clip_gradients)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tflearn/helpers/trainer.py", line 646, in initialize_training_ops
ema_num_updates=self.training_steps)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tflearn/summaries.py", line 236, in add_loss_summaries
loss_averages_op = loss_averages.apply([loss] + other_losses)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/training/moving_averages.py", line 292, in apply
colocate_with_primary=(var.op.type == "Variable"))
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/training/slot_creator.py", line 106, in create_zeros_slot
val = array_ops.zeros(primary.get_shape().as_list(), dtype=dtype)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 1071, in zeros
shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name="shape")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 628, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 198, in _tensor_shape_tensor_conversion_function
"Cannot convert a partially known TensorShape to a Tensor: %s" % s)
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?,)
I don't know how to resolve this problem if the first dimension needs to be None for the batch size (correct me if I'm wrong).
Relevant parts of the code are below:
BATCH_SIZE=100
def contrastive_loss(y_pred, y_true, margin=1.0):
return tf.mul(1-y_true, tf.square(y_pred)) + tf.mul(y_true, tf.square(tf.maximum((margin-y_pred),0)))
## Load dataset
f = h5py.File('./data/paired_training_data.hdf','r')
X1 = f["train_X1"]
X2 = f["train_X2"]
Y = f["train_Y_paired"]
## Inputs: 1 example (phoneme pair), dropout probability
inp_sound1 = input_data(shape=[None, 1, N_MFCC_CHANNELS, N_IN_CHANNELS])
networkL = conv_1d(inp_sound1, reuse=None, scope="conv1d")
networkL = max_pool_1x6(networkL)
networkL = fully_connected(networkL, n_units=N_FULLY_CONN, activation='relu', scope="fc1")
networkL = dropout(networkL, .5) # unshared?
networkL = fully_connected(networkL, n_units=N_FULLY_CONN, activation='relu', scope="fc2")
inp_sound2 = input_data(shape=[None, 1, N_MFCC_CHANNELS, N_IN_CHANNELS])
networkR = conv_1d(inp_sound2, reuse=True, scope="conv1d")
networkR = max_pool_1x6(networkR)
networkR = fully_connected(networkR, n_units=N_FULLY_CONN, activation='relu', reuse=True, scope="fc1")
networkR = dropout(networkR, .5)
networkR = fully_connected(networkR, n_units=N_FULLY_CONN, activation='relu', reuse=True, scope="fc2")
l2_loss = tf.reduce_sum(tf.square(tf.sub(networkL, networkR)), 1)
y_pred = tf.sqrt(l2_loss)
#y_true = input_data(shape=[None])
## Training
network = regression(y_pred, optimizer='adam',
loss=contrastive_loss, learning_rate=0.0001, to_one_hot=False)
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit([X1, X2], Y, n_epoch=10, batch_size=BATCH_SIZE, show_metric=True, validation_set=0.1)
Any help -- especially with understanding how to debug these issues on my own in the future -- would be greatly appreciated!
It looks like TensorFlow cannot infer the shape of your contrastive_loss. Try to call set_shape in your contrastive_loss function if you know its output shape in advance:
def contrastive_loss(y_pred, y_true, margin=1.0):
loss = tf.mul(1-y_true, tf.square(y_pred)) + tf.mul(y_true, tf.square(tf.maximum((margin-y_pred),0)))
loss.set_shape([...])
return loss