keras.callbacks.Tensorboard doesn't work in Tensorflow eager execution - tensorflow

I've been trying to visualise a simple model in Tensorboard. The model is build in Tensorflow Eager Execution mode, using keras.
When fitting the model I get the following error:
>>> history = model.fit(x_train, y_train, epochs=1, batch_size=BatchSize, callbacks=[tensorbrd])
Traceback (most recent call last):
File "C:\...\tensorflow\python\ops\gen_logging_ops.py", line 322, in histogram_summary
"HistogramSummary", name, _ctx._post_execution_callbacks, tag, values)
tensorflow.python.eager.core._FallbackException: This function does not handle the case of the path where all inputs are not already EagerTensors.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\...\tensorflow\python\keras\engine\training.py", line 1348, in fit
validation_steps=validation_steps)
File "C:\...\tensorflow\python\keras\engine\training_eager.py", line 990, in fit_loop
callbacks.set_model(callback_model)
File "C:\...\tensorflow\python\keras\callbacks.py", line 71, in set_model
callback.set_model(model)
File "C:\...\tensorflow\python\keras\callbacks.py", line 781, in set_model
tf_summary.histogram('{}_out'.format(layer.name), layer.output)
File "C:\...\tensorflow\python\summary\summary.py", line 187, in histogram
tag=tag, values=values, name=scope)
File "C:\...\tensorflow\python\ops\gen_logging_ops.py", line 326, in histogram_summary
tag, values, name=name, ctx=_ctx)
File "C:\...\tensorflow\python\ops\gen_logging_ops.py", line 340, in histogram_summary_eager_fallback
_attr_T, (values,) = _execute.args_to_matching_eager([values], _ctx, _dtypes.float32)
File "C:\...\tensorflow\python\eager\execute.py", line 191, in args_to_matching_eager
t, dtype, preferred_dtype=default_dtype, ctx=ctx))
File "C:\...\tensorflow\python\framework\ops.py", line 1094, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\...\tensorflow\python\framework\constant_op.py", line 217, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\...\tensorflow\python\framework\constant_op.py", line 167, in constant
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\...\tensorflow\python\framework\constant_op.py", line 113, in convert_to_eager_tensor
return ops.EagerTensor(value, context=handle, device=device, dtype=dtype)
ValueError: Attempt to convert a value (<DeferredTensor 'None' shape=(?, 5000) dtype=float32>) with an unsupported type (<class 'tensorflow.python.keras.engine.base_layer.DeferredTensor'>) to a Tensor.
This is based on Tensorflow 1.10 code:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import os
import h5py
tf.enable_eager_execution()
BatchSize = 256
model = keras.Sequential([
keras.layers.Flatten(input_shape=(1000,5)),
keras.layers.Dense(1, activation=tf.nn.relu)
])
model.compile(optimizer=tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999),
loss='mean_squared_error',
metrics=[ 'mean_squared_error'])
tensorbrd = keras.callbacks.TensorBoard(log_dir=SUMMARIES_FOLDER, histogram_freq=1, write_graph=False, write_images=False)
history = model.fit(x_train, y_train, epochs=1, batch_size=BatchSize, callbacks=[tensorbrd])
Am I making a mistake or is Tensorboard not compatible in this way?
Any help is greatly appreciated!

Related

Error when trying to fit the U-net model in Colab with GPU but works fine without GPU

I get the "Graph execution error" trying to use U-net for segmentation in colab, it is just fine if I don't use GPU in runtime..
I saw a similar issue here, but I couldn't figure out how that can help me with my code:
enter link description here
!wget https://documents.epfl.ch/groups/c/cv/cvlab-unit/www/data/%20ElectronMicroscopy_Hippocampus/training.tif
!wget https://documents.epfl.ch/groups/c/cv/cvlab-unit/www/data/%20ElectronMicroscopy_Hippocampus/training_groundtruth.tif
!pip install patchify
!pip install tensorflow-gpu
# !pip install tensorflow
!pip install keras
!pip install segmentation-models
!pip install multipagetiff
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
from patchify import patchify
from PIL import Image
import segmentation_models as sm
import multipagetiff as mtif
from matplotlib import pyplot as plt
import glob
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow.keras as keras
img = mtif.read_stack("/content/training.tif", units='um')
msk = mtif.read_stack("/content/training_groundtruth.tif", units='um')
scaler = MinMaxScaler()
patch_size = 64 # the size of patches
image_dataset = [] # preparing dataset(images)
for k in range(0, 1):
backtorgb = cv2.cvtColor(img[k,:,:],cv2.COLOR_GRAY2RGB) #converting image data to RGB(3channels)
patches_img = patchify(backtorgb, (patch_size, patch_size, 3), step=patch_size) # extracing patches with no overlap
for i in range(patches_img.shape[0]):
for j in range(patches_img.shape[1]):
single_patch_img = patches_img[i,j,:,:]
single_patch_img = scaler.fit_transform(single_patch_img.reshape(-1, single_patch_img.shape[-1])).reshape(single_patch_img.shape)
image_dataset.append(single_patch_img)
image_dataset = np.array(image_dataset)
image_dataset = image_dataset[:,0,:,:,:]
mask_dataset = [] # preparing dataset(masks)
for k in range(0, 1):
patches_mask = patchify(msk[k,:,:], (patch_size, patch_size), step=patch_size) # extracing patches with no overlap
for i in range(patches_mask.shape[0]):
for j in range(patches_mask.shape[1]):
single_patch_mask = patches_mask[i,j,:,:]
mask_dataset.append(single_patch_mask)
'''#Normalize images ''' # these data are already normalized but this step is crucial
#D not normalize masks, just rescale to 0 to 1.
mask_dataset = np.expand_dims((np.array(mask_dataset)),3) /255.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(image_dataset, mask_dataset, test_size = 0.10, random_state = 0)
activation='softmax'
n_classes=1
LR = 0.0001
optim = keras.optimizers.Adam(LR)
# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
# set class weights for dice_loss (car: 1.; pedestrian: 2.; background: 0.5;)
dice_loss = sm.losses.DiceLoss(class_weights=np.array([0.25, 0.25, 0.25]))
focal_loss = sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)
metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]
###Model 1
BACKBONE1 = 'resnet34'
preprocess_input1 = sm.get_preprocessing(BACKBONE1)
# preprocessing input
X_train1 = preprocess_input1(X_train)
X_test1 = preprocess_input1(X_test)
# define model
model1 = sm.Unet(BACKBONE1, encoder_weights='imagenet', classes=n_classes, activation=activation)
model1.compile(optim, total_loss, metrics=metrics)
# print(model1.summary())
history1=model1.fit(X_train1,
y_train,
batch_size=8,
epochs=5,
verbose=10,
validation_data=(X_test1, y_test))
model_standard.save('mitochondria_standard_Unet.hdf5')
and this is the error I get, I tried different input size and also defined Unet in a very classic fashion.. but they weren't the case ...
Epoch 1/5
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-7-b10395c761fe> in <module>()
28 epochs=5,
29 verbose=10,
---> 30 validation_data=(X_test1, y_test))
31 model_standard.save('mitochondria_standard_Unet.hdf5')
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InternalError: Graph execution error:
Detected at node 'model_3/bn_data/FusedBatchNormV3' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
app.start()
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
self.io_loop.start()
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
self._run_once()
File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
handle._run()
File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 577, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 606, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py", line 556, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-7-b10395c761fe>", line 30, in <module>
validation_data=(X_test1, y_test))
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1409, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1051, in train_function
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1040, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1030, in run_step
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 889, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 459, in call
inputs, training=training, mask=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 750, in call
outputs = self._fused_batch_norm(inputs, training=training)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 595, in _fused_batch_norm
training, _fused_batch_norm_training, _fused_batch_norm_inference)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/control_flow_util.py", line 106, in smart_cond
pred, true_fn=true_fn, false_fn=false_fn, name=name)
File "/usr/local/lib/python3.7/dist-packages/keras/layers/normalization/batch_normalization.py", line 581, in _fused_batch_norm_training
exponential_avg_factor=exponential_avg_factor)
Node: 'model_3/bn_data/FusedBatchNormV3'
cuDNN launch failure : input shape ([8,3,256,256])
[[{{node model_3/bn_data/FusedBatchNormV3}}]] [Op:__inference_train_function_14008]

InvalidArgumentError: Graph execution error: Node: 'categorical_crossentropy/remove_squeezable_dimensions/Squeeze'

I want to train U-Net for a semantic segmentation task using cross entropy as a loss function. I have RGB images and masks. The mask is one hot encoded. When I try to train the model, I got this error.
seed=123
batch_size= 32
n_classes=2
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
#Define a function to perform additional preprocessing after datagen.
#For example, scale images, convert masks to categorical, etc.
def preprocess_data(img, mask, num_class):
#Scale images
img = img / 255. #This can be done in ImageDataGenerator but showing it outside as an example
#Convert mask to one-hot
labelencoder = LabelEncoder()
n, h, w, c = mask.shape
print(mask.shape)
mask = mask.reshape(-1,1)
mask = labelencoder.fit_transform(mask)
mask = mask.reshape(n, h, w, c)
mask = to_categorical(mask, num_class)
print(mask.shape)
return (img, mask)
#Define the generator.
#We are not doing any rotation or zoom to make sure mask values are not interpolated.
#It is important to keep pixel values in mask as 0, 1, 2, 3, .....
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def trainGenerator(train_img_path, train_mask_path, num_class):
img_data_gen_args = dict(horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect')
image_datagen = ImageDataGenerator(**img_data_gen_args)
mask_datagen = ImageDataGenerator(**img_data_gen_args)
image_generator = image_datagen.flow_from_directory(
train_img_path,
class_mode = None,
target_size=(256,256),
batch_size = batch_size,
seed = seed)
mask_generator = mask_datagen.flow_from_directory(
train_mask_path,
class_mode = None,
target_size=(256,256),
batch_size = batch_size,
seed = seed)
train_generator = zip(image_generator, mask_generator)
for (img, mask) in train_generator:
img, mask = preprocess_data(img, mask, num_class)
yield (img, mask)`
model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
model.fit(train_img_gen,
steps_per_epoch=steps_per_epoch,
epochs=3,
verbose=1,
validation_data=val_img_gen,
validation_steps=val_steps_per_epoch)
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call
last) Input In [131], in <cell line: 1>()
----> 1 history=model.fit(train_img_gen,
2 steps_per_epoch=steps_per_epoch,
3 epochs=3,
4 verbose=1,
5 validation_data=val_img_gen,
6 validation_steps=val_steps_per_epoch)
File
/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py:67,
in filter_traceback..error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.traceback)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File
/opt/conda/lib/python3.9/site-packages/tensorflow/python/eager/execute.py:54,
in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node
'categorical_crossentropy/remove_squeezable_dimensions/Squeeze'
defined at (most recent call last):
File "/opt/conda/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/opt/conda/lib/python3.9/site-packages/ipykernel_launcher.py", line
17, in
app.launch_new_instance()
File "/opt/conda/lib/python3.9/site-packages/traitlets/config/application.py",
line 972, in launch_instance
app.start()
File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelapp.py", line
712, in start
self.io_loop.start()
File "/opt/conda/lib/python3.9/site-packages/tornado/platform/asyncio.py",
line 199, in start
self.asyncio_loop.run_forever()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 596, in run_forever
self._run_once()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once
handle._run()
File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line
504, in dispatch_queue
await self.process_one()
File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line
493, in process_one
await dispatch(*args)
File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line
400, in dispatch_shell
await result
File "/opt/conda/lib/python3.9/site-packages/ipykernel/kernelbase.py", line
724, in execute_request
reply_content = await reply_content
File "/opt/conda/lib/python3.9/site-packages/ipykernel/ipkernel.py", line
383, in do_execute
res = shell.run_cell(
File "/opt/conda/lib/python3.9/site-packages/ipykernel/zmqshell.py", line
528, in run_cell
return super().run_cell(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py",
line 2881, in run_cell
result = self._run_cell(
File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py",
line 2936, in _run_cell
return runner(coro)
File "/opt/conda/lib/python3.9/site-packages/IPython/core/async_helpers.py",
line 129, in pseudo_sync_runner
coro.send(None)
File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py",
line 3135, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py",
line 3338, in run_ast_nodes
if await self.run_code(code, result, async=asy):
File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py",
line 3398, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_68/434058273.py", line 1, in <cell line: 1>
history=model.fit(train_img_gen,
File "/opt/conda/lib/python3.9/site-packages/keras/utils/traceback_utils.py",
line 64, in error_handler
return fn(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 1384, in fit
tmp_logs = self.train_function(iterator)
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 1021, in train_function
return step_function(self, iterator)
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 1010, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 1000, in run_step
outputs = model.train_step(data)
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 860, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/opt/conda/lib/python3.9/site-packages/keras/engine/training.py",
line 918, in compute_loss
return self.compiled_loss(
File "/opt/conda/lib/python3.9/site-packages/keras/engine/compile_utils.py",
line 201, in call
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/opt/conda/lib/python3.9/site-packages/keras/losses.py", line 141, in call
losses = call_fn(y_true, y_pred)
File "/opt/conda/lib/python3.9/site-packages/keras/losses.py", line 242, in call
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
File "/opt/conda/lib/python3.9/site-packages/keras/utils/losses_utils.py",
line 187, in squeeze_or_expand_dimensions
y_true, y_pred = remove_squeezable_dimensions(
File "/opt/conda/lib/python3.9/site-packages/keras/utils/losses_utils.py",
line 130, in remove_squeezable_dimensions
labels = tf.squeeze(labels, [-1]) Node: 'categorical_crossentropy/remove_squeezable_dimensions/Squeeze' Can
not squeeze dim[4], expected a dimension of 1, got 2 [[{{node
categorical_crossentropy/remove_squeezable_dimensions/Squeeze}}]]
[Op:__inference_train_function_34035]

tensorflow training with feature_column_lib.input_layer

I'm trying to train a model with an input layer similar to DNNClassifier.
I used feature_column_lib.input_layer, which is how DNNClassifier constructs its input layer.
However, I got an error when I tried to optimize the loss of my graph. I feel it has something to do with the categorical feature spec, when I removed the categorical feature, it works fine. Is there a way to solve this?
Thanks.
tensorflow/core/framework/op_kernel.cc:1318] OP_REQUIRES failed at sparse_to_dense_op.cc:126 : Invalid argument: indices[1] = [1,0] is out of bounds: need 0 <= index < [1,1]
Traceback (most recent call last):
File "tf_exp.py", line 120, in <module>
print sess.run(loss)
File "lib/python2.7/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[1] = [1,0] is out of bounds: need 0 <= index < [1,1]
[[Node: input_layer/a4_indicator/SparseToDense = SparseToDense[T=DT_INT64, Tindices=DT_INT64, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input_layer/a4_indicator/to_sparse_input/indices, input_layer/a4_indicator/to_sparse_input/dense_shape, input_layer/a4_indicator/Select, input_layer/a4_indicator/SparseToDense/default_value)]]
Caused by op u'input_layer/a4_indicator/SparseToDense', defined at:
File "tf_exp.py", line 102, in <module>
features=features, feature_columns=feature_columns)
File "lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 277, in input_layer
trainable, cols_to_vars)
File "lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 202, in _internal_input_layer
trainable=trainable)
File "lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 3332, in _get_dense_tensor
return inputs.get(self)
File "lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 2175, in get
transformed = column._transform_feature(self) # pylint: disable=protected-access
File "lib/python2.7/site-packages/tensorflow/python/feature_column/feature_column.py", line 3277, in _transform_feature
id_tensor, default_value=-1)
File "lib/python2.7/site-packages/tensorflow/python/ops/sparse_ops.py", line 996, in sparse_tensor_to_dense
name=name)
File "lib/python2.7/site-packages/tensorflow/python/ops/sparse_ops.py", line 776, in sparse_to_dense
name=name)
File "lib/python2.7/site-packages/tensorflow/python/ops/gen_sparse_ops.py", line 2824, in sparse_to_dense
name=name)
File "lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
op_def=op_def)
File "lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): indices[1] = [1,0] is out of bounds: need 0 <= index < [1,1]
[[Node: input_layer/a4_indicator/SparseToDense = SparseToDense[T=DT_INT64, Tindices=DT_INT64, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input_layer/a4_indicator/to_sparse_input/indices, input_layer/a4_indicator/to_sparse_input/dense_shape, input_layer/a4_indicator/Select, input_layer/a4_indicator/SparseToDense/default_value)]]
Here is my program
dataset = tf.data.TextLineDataset('test_input.txt')
dataset = dataset.shuffle(5)
dataset = dataset.batch(5)
dataset = dataset.repeat(configs.get('epochs',10))
train_data_iterator = dataset.make_one_shot_iterator()
features, labels= _parse_example(train_data_iterator.get_next())
feature_columns = []
feature_columns.append(tf.feature_column.numeric_column(key='f0', dtype=tf.float32))
feature_columns.append(tf.feature_column.numeric_column(key='f1', dtype=tf.float32))
feature_columns.append(tf.feature_column.numeric_column(key='f2', dtype=tf.float32))
feature_columns.append(tf.feature_column.numeric_column(key='f3', dtype=tf.float32))
feature_columns.append(tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_identity(key='a4', num_buckets=11,
default_value=0)))
input_layer = feature_column_lib.input_layer(
features=features, feature_columns=feature_columns)
logits = tf.layers.dense(inputs=input_layer, units=1)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss, global_step = global_step)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print sess.run(loss)
Figured it out, the real problem is labels dimension didn't match with logits.

Error while adding Conv1D layer

I am trying to train on a data containing sequences of 43 records of 3-dimensional vectors. While trying to add this Conv1D layer here:
model = Sequential()
model.add(Conv1D(input_shape=(43, 3),
filters=16,
kernel_size=4,
padding='same')) # This is line 24 of bcl_model_builder.py
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))
I am getting the following error. And I got no clue what went wrong here:
Traceback (most recent call last):
File "/home/shx/programs/pycharm-community-2017.1.3/helpers/pydev/pydevd.py", line 1585, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "/home/shx/programs/pycharm-community-2017.1.3/helpers/pydev/pydevd.py", line 1015, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/home/shx/programs/pycharm-community-2017.1.3/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/home/shx/PycharmProjects/FBS/bcl/bcl_train_model.py", line 34, in <module>
model = mb.build_model()
File "/home/shx/PycharmProjects/FBS/bcl/bcl_model_builder.py", line 24, in build_model
padding='same'))
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/keras/models.py", line 442, in add
layer(x)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/keras/engine/topology.py", line 602, in __call__
output = self.call(inputs, **kwargs)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/keras/layers/convolutional.py", line 156, in call
dilation_rate=self.dilation_rate[0])
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 3124, in conv1d
data_format=tf_data_format)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 672, in convolution
op=op)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 338, in with_space_to_batch
return op(input, num_spatial_dims, padding)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 664, in op
name=name)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 116, in _non_atrous_convolution
name=scope)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 2013, in conv1d
data_format=data_format)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 397, in conv2d
data_format=data_format, name=name)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 589, in apply_op
param_name=input_name)
File "/home/shx/pyenvs/finainpy_env1/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'input' has DataType float64 not in list of allowed values: float16, float32
After giving so much thought on float64 in the error log, I realized that keras itself was configured that way in my machine
{
"floatx": "float64",
"epsilon": 1e-07,
"backend": "tensorflow",
"image_data_format": "channels_last"
}
I just had to update the property floatx to float32.

Training model from logits and checkpoint

I am new in tensor flow and I am trying to train the mobile net_v1. To do that, I first created the tfrecords' file for multi-class from a txt file.( example : namefile label1 label2 ...)
import sys, os
import tensorflow as tf
import cv2
import numpy as np
import matplotlib.pyplot as plt
# function
def load_image(addr):
# read an image and resize to (224, 224)
# cv2 load images as BGR, convert it to RGB
img = cv2.imread(addr)
img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
return img
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[*value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def loadData(inputs):
addrs = []
labels = []
f = open(inputs, 'r')
data = [ln.split(' ') for ln in f ]
f.close()
print(data)
for i in range(0, len(data)):
addrs.append(data[i][0].rstrip())
l = []
for j in range(1,len(data[i])):
if(data[i][j].rstrip().isdigit() == True):
l.append(int(data[i][j].rstrip()))
print(l)
labels.append(l)
return addrs, labels
def CreateTrainFile(input_filename, train_filename,):
path = '/home/rd/Documents/RD2/Databases/Faces/'
# load file and label
train_addrs, train_labels = loadData(input_filename)
print(train_labels)
# open the TFRecords file
writer = tf.python_io.TFRecordWriter(train_filename)
for i in range(len(train_addrs)):
# print how many images are saved every 1000 images
if not i % 1000:
print('Train data: {}/{}'.format(i, len(train_addrs)))
sys.stdout.flush()
# Load the image
img = load_image(train_addrs[i])
label = train_labels[i]
print('label : ', _int64_feature(label))
# Create a feature
feature = {'train/label': _int64_feature(label),
'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
sys.stdout.flush()
# open the TFRecords file
def CreateValidationFile(val_filename):
writer = tf.python_io.TFRecordWriter(val_filename)
for i in range(len(val_addrs)):
# print how many images are saved every 1000 images
if not i % 1000:
print('Val data: {}/{}'.format(i, len(val_addrs)))
sys.stdout.flush()
# Load the image
img = load_image(val_addrs[i])
label = val_labels[i]
# Create a feature
feature = {'val/label': _int64_feature(label),
'val/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
sys.stdout.flush()
# open the TFRecords file
def CreateTestFile(test_filename):
writer = tf.python_io.TFRecordWriter(test_filename)
for i in range(len(test_addrs)):
# print how many images are saved every 1000 images
if not i % 1000:
print('Test data: {}/{}'.format(i, len(test_addrs)))
sys.stdout.flush()
# Load the image
img = load_image(test_addrs[i])
label = test_labels[i]
# Create a feature
feature = {'test/label': _int64_feature(label),
'test/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
sys.stdout.flush()
def ReadRecordFileTrain(data_path):
#data_path = 'train.tfrecords' # address to save the hdf5 file
with tf.Session() as sess:
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([], tf.int64)}
# Create a list of filenames and pass it to a queue
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
# Define a reader and read the next record
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Decode the record read by the reader
features = tf.parse_single_example(serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.decode_raw(features['train/image'], tf.float32)
# Cast label data into int32
label = tf.cast(features['train/label'], tf.int32)
# Reshape image data into the original shape
image = tf.reshape(image, [224, 224, 3])
# Any preprocessing here ...
# Creates batches by randomly shuffling tensors
images, labels = tf.train.shuffle_batch([image, label], batch_size=2, capacity=30, num_threads=1, min_after_dequeue=10)
return images, labels
def main():
train_filename = 'train.tfrecords' # address to save the TFRecords file
#test_filename = 'test.tfrecords' # address to save the TFRecords file
#val_filename = 'val.tfrecords' # address to save the TFRecords file
CreateTrainFile("data.txt", train_filename)
main()
and to read the tf records :
def ReadRecordFileTrain(data_path):
#data_path = 'train.tfrecords' # address to save the hdf5 file
with tf.Session() as sess:
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([2], tf.int64)}
# Create a list of filenames and pass it to a queue
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
# Define a reader and read the next record
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Decode the record read by the reader
features = tf.parse_single_example(serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.decode_raw(features['train/image'], tf.float32)
print('label1 :', features['train/label'] )
# Cast label data into int32
label = tf.cast(features['train/label'], tf.int32)
print('label load:', label)
# Reshape image data into the original shape
image = tf.reshape(image, [224, 224, 3])
# Any preprocessing here ...
# Creates batches by randomly shuffling tensors
images, labels = tf.train.batch([image, label], batch_size=2, capacity=30, num_threads=1)
return images, labels
I suppose it works but I am not sure ( I don't have any errors when I called these functions.)
Then, I load the model and its weight. Call the loss function and try to start the training, but it fails at this moment.
g = tf.Graph()
with g.as_default():
# size of the folder
inputs = tf.placeholder(tf.float32, [1, 224, 224, 3])
# load dataset
images, labels = ReadRecordFileTrain('train.tfrecords')
print('load dataset done')
print('labels = ', labels)
print('data = ', images)
print(tf.shape(labels))
# load network
network, end_points= mobilenet.mobilenet_v1(images, num_classes=2, depth_multiplier=0.25 )
print('load network done')
print('network : ', network)
variables_to_restore = slim.get_variables_to_restore(exclude=["MobilenetV1/Logits/Conv2d_1c_1x1"])
load_checkpoint = "modele_mobilenet_v1_025/mobilenet_v1_0.25_224.ckpt"
init_fn = slim.assign_from_checkpoint_fn(load_checkpoint, variables_to_restore)
print('custom network done')
# Specify the loss function:
tf.losses.softmax_cross_entropy(labels, network)
total_loss = tf.losses.get_total_loss()
#tf.scalar_summary('losses/total_loss', total_loss)
# Specify the optimization scheme:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=.001)
# create_train_op that ensures that when we evaluate it to get the loss,
# the update_ops are done and the gradient updates are computed.
train_tensor = slim.learning.create_train_op(total_loss, optimizer)
print('loss and optimizer chosen')
# Actually runs training.
save_checkpoint = 'model/modelcheck'
# start training
learning = slim.learning.train(train_tensor, save_checkpoint, init_fn=init_fn, number_of_steps=1000)
The error message :
label1 : Tensor("ParseSingleExample/Squeeze_train/label:0", shape=(2,), dtype=int64)
label load: Tensor("Cast:0", shape=(2,), dtype=int32)
load dataset done
labels = Tensor("batch:1", shape=(2, 2), dtype=int32)
data = Tensor("batch:0", shape=(2, 224, 224, 3), dtype=float32)
Tensor("Shape:0", shape=(2,), dtype=int32)
load network done
network : Tensor("MobilenetV1/Logits/SpatialSqueeze:0", shape=(2, 2), dtype=float32)
custom network done
loss and optimizer chosen
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [1,1,256,2] rhs shape= [1,1,256,1]
[[Node: save_1/Assign_109 = Assign[T=DT_FLOAT, _class=["loc:#MobilenetV1/Logits/Conv2d_1c_1x1/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Logits/Conv2d_1c_1x1/weights, save_1/RestoreV2_109)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 106, in <module>
main()
File "test.py", line 103, in main
learning = slim.learning.train(train_tensor, save_checkpoint, init_fn=init_fn, number_of_steps=1000)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/slim/python/slim/learning.py", line 725, in train
master, start_standard_services=False, config=session_config) as sess:
File "/usr/lib/python3.5/contextlib.py", line 59, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/supervisor.py", line 960, in managed_session
self.stop(close_summary_writer=close_summary_writer)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/supervisor.py", line 788, in stop
stop_grace_period_secs=self._stop_grace_secs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/coordinator.py", line 389, in join
six.reraise(*self._exc_info_to_raise)
File "/usr/lib/python3/dist-packages/six.py", line 686, in reraise
raise value
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/supervisor.py", line 949, in managed_session
start_standard_services=start_standard_services)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/supervisor.py", line 706, in prepare_or_wait_for_session
init_feed_dict=self._init_feed_dict, init_fn=self._init_fn)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/session_manager.py", line 256, in prepare_session
config=config)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/session_manager.py", line 188, in _restore_checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1457, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [1,1,256,2] rhs shape= [1,1,256,1]
[[Node: save_1/Assign_109 = Assign[T=DT_FLOAT, _class=["loc:#MobilenetV1/Logits/Conv2d_1c_1x1/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Logits/Conv2d_1c_1x1/weights, save_1/RestoreV2_109)]]
Caused by op 'save_1/Assign_109', defined at:
File "test.py", line 106, in <module>
main()
File "test.py", line 103, in main
learning = slim.learning.train(train_tensor, save_checkpoint, init_fn=init_fn, number_of_steps=1000)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/slim/python/slim/learning.py", line 642, in train
saver = saver or tf_saver.Saver()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1056, in __init__
self.build()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 1086, in build
restore_sequentially=self._restore_sequentially)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 691, in build
restore_sequentially, reshape)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 419, in _AddRestoreOps
assign_ops.append(saveable.restore(tensors, shapes))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py", line 155, in restore
self.op.get_shape().is_fully_defined())
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/state_ops.py", line 270, in assign
validate_shape=validate_shape)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_state_ops.py", line 47, in assign
use_locking=use_locking, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match. lhs shape= [1,1,256,2] rhs shape= [1,1,256,1]
[[Node: save_1/Assign_109 = Assign[T=DT_FLOAT, _class=["loc:#MobilenetV1/Logits/Conv2d_1c_1x1/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Logits/Conv2d_1c_1x1/weights, save_1/RestoreV2_109)]]
I don't understand where the problem comes from and how to solve it.
InvalidArgumentError: Assign requires shapes of both tensors to match.
lhs shape= [1,1,256,2] rhs shape= [1,1,256,1]
I used to get this error when my model saved in the model directory has conflicts with my current running model. Try deleting your model directory and start training again.
It seems to solve the error but now, when I want to execute it with a tf.Session it failed. I was wondering if the problem comes from my graph or am I doing something wrong in the tf.Session ?
def evaluation(logits, labels):
with tf.name_scope('Accuracy'):
# Operation comparing prediction with true label
correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels, 1))
# Operation calculating the accuracy of the predictions
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Summary operation for the accuracy
#tf.scalar_summary('train_accuracy', accuracy)
return accuracy
g = tf.Graph()
with g.as_default():
# size of the folder
inputs = tf.placeholder(tf.float32, [1, 224, 224, 3])
# load dataset
images, labels = ReadRecordFileTrain('train.tfrecords')
print('load dataset done')
print('labels = ', labels)
print('data = ', images)
print(tf.shape(labels))
# load network
network, end_points= mobilenet.mobilenet_v1(images, num_classes=2, depth_multiplier=0.25 )
print('load network done')
print('network : ', network)
variables_to_restore = slim.get_variables_to_restore(exclude=["MobilenetV1/Logits/Conv2d_1c_1x1"])
load_checkpoint = "modele_mobilenet_v1_025/mobilenet_v1_0.25_224.ckpt"
init_fn = slim.assign_from_checkpoint_fn(load_checkpoint, variables_to_restore)
print('custom network done')
# Specify the loss function:
tf.losses.softmax_cross_entropy(labels, network)
total_loss = tf.losses.get_total_loss()
#tf.scalar_summary('losses/total_loss', total_loss)
# Specify the optimization scheme:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=.001)
# create_train_op that ensures that when we evaluate it to get the loss,
# the update_ops are done and the gradient updates are computed.
train_tensor = slim.learning.create_train_op(total_loss, optimizer)
print('loss and optimizer chosen')
# Actually runs training.
save_checkpoint = 'model/modelcheck'
# start training
learning = slim.learning.train(train_tensor, save_checkpoint, init_fn=init_fn, number_of_steps=1000)
accuracy = evaluation(network, labels)
with tf.Session(graph=g) as sess:
sess.run(network)
print('network load')
sess.run(total_loss)
sess.run(accuracy)
sess.run(train_tensor)
sess.run(learning)
The error :
label1 : Tensor("ParseSingleExample/Squeeze_train/label:0", shape=(2,), dtype=int64)
label load: Tensor("Cast:0", shape=(2,), dtype=int32)
load dataset done
labels = Tensor("batch:1", shape=(4, 2), dtype=int32)
data = Tensor("batch:0", shape=(4, 224, 224, 3), dtype=float32)
Tensor("Shape:0", shape=(2,), dtype=int32)
load network done
network : Tensor("MobilenetV1/Logits/SpatialSqueeze:0", shape=(4, 2), dtype=float32)
custom network done
loss and optimizer chosen
end of graph
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta
[[Node: MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta/read = Identity[T=DT_FLOAT, _class=["loc:#MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta"], _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 113, in <module>
main()
File "test.py", line 105, in main
sess.run(network)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta
[[Node: MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta/read = Identity[T=DT_FLOAT, _class=["loc:#MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta"], _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta)]]
Caused by op 'MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta/read', defined at:
File "test.py", line 113, in <module>
main()
File "test.py", line 67, in main
network, end_points= mobilenet.mobilenet_v1(images, num_classes=2, depth_multiplier=0.25 )
File "/home/rd/Documents/RD2/users/Ludovic/tensorflow_mobilenet/mobilenet_v1.py", line 301, in mobilenet_v1
conv_defs=conv_defs)
File "/home/rd/Documents/RD2/users/Ludovic/tensorflow_mobilenet/mobilenet_v1.py", line 228, in mobilenet_v1_base
scope=end_point)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1891, in separable_convolution2d
outputs = normalizer_fn(outputs, **normalizer_params)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 528, in batch_norm
outputs = layer.apply(inputs, training=is_training)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 320, in apply
return self.__call__(inputs, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 286, in __call__
self.build(input_shapes[0])
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/normalization.py", line 125, in build
trainable=True)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 1049, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 948, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 349, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 1389, in wrapped_custom_getter
*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 275, in variable_getter
variable_getter=functools.partial(getter, **kwargs))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 228, in _add_variable
trainable=trainable and self.trainable)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 1389, in wrapped_custom_getter
*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1334, in layer_variable_getter
return _model_variable_getter(getter, *args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1326, in _model_variable_getter
custom_getter=getter, use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 262, in model_variable
use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 217, in variable
use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1334, in layer_variable_getter
return _model_variable_getter(getter, *args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1326, in _model_variable_getter
custom_getter=getter, use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 262, in model_variable
use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 217, in variable
use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 341, in _true_getter
use_resource=use_resource)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variable_scope.py", line 714, in _get_single_variable
validate_shape=validate_shape)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 197, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta
[[Node: MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta/read = Identity[T=DT_FLOAT, _class=["loc:#MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta"], _device="/job:localhost/replica:0/task:0/cpu:0"](MobilenetV1/Conv2d_3_depthwise/BatchNorm/beta)]]