ResNet model in Tensorflow Federated - tensorflow

I tried to customize the model in "Image classification" tutorial in Tensorflow Federated. (It originally used a sequential model)
I use Keras ResNet50 but when it began to train, there is always an error "Incompatible shapes"
Here are my codes:
NUM_CLIENTS = 4
NUM_EPOCHS = 10
BATCH_SIZE = 2
SHUFFLE_BUFFER = 5
def create_compiled_keras_model():
model = tf.keras.applications.resnet.ResNet50(include_top=False, weights='imagenet',
input_tensor=tf.keras.layers.Input(shape=(100,
300, 3)), pooling=None)
model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
optimizer=tf.keras.optimizers.SGD(learning_rate=0.02),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
def model_fn():
keras_model = create_compiled_keras_model()
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
Error information:
enter image description here
I feel that the shape is incompatible because the epoch and clients information were somehow missing. Would be very thankful if someone could give me a hint.
Updates:
The Assertion error happened during tff.learning.build_federated_averaging_process
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-164-dac26193d9d8> in <module>()
----> 1 iterative_process = tff.learning.build_federated_averaging_process(model_fn)
2
3 # iterative_process = build_federated_averaging_process(model_fn)
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_federated/python/learning/federated_averaging.py in build_federated_averaging_process(model_fn, server_optimizer_fn, client_weight_fn, stateful_delta_aggregate_fn, stateful_model_broadcast_fn)
165 return optimizer_utils.build_model_delta_optimizer_process(
166 model_fn, client_fed_avg, server_optimizer_fn,
--> 167 stateful_delta_aggregate_fn, stateful_model_broadcast_fn)
/usr/local/lib/python3.6/dist-packages/tensorflow_federated/python/learning/framework/optimizer_utils.py in build_model_delta_optimizer_process(model_fn, model_to_client_delta_fn, server_optimizer_fn, stateful_delta_aggregate_fn, stateful_model_broadcast_fn)
349 # still need this.
350 with tf.Graph().as_default():
--> 351 dummy_model_for_metadata = model_utils.enhance(model_fn())
352
353 # ===========================================================================
<ipython-input-159-b2763ace8e5b> in model_fn()
1 def model_fn():
2 keras_model = model
----> 3 return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
/usr/local/lib/python3.6/dist-packages/tensorflow_federated/python/learning/keras_utils.py in from_compiled_keras_model(keras_model, dummy_batch)
211 # Model.test_on_batch() once before asking for metrics.
212 if isinstance(dummy_tensors, collections.Mapping):
--> 213 keras_model.test_on_batch(**dummy_tensors)
214 else:
215 keras_model.test_on_batch(*dummy_tensors)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in test_on_batch(self, x, y, sample_weight, reset_metrics)
1007 sample_weight=sample_weight,
1008 reset_metrics=reset_metrics,
-> 1009 standalone=True)
1010 outputs = (
1011 outputs['total_loss'] + outputs['output_losses'] + outputs['metrics'])
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in test_on_batch(model, x, y, sample_weight, reset_metrics, standalone)
503 y,
504 sample_weights=sample_weights,
--> 505 output_loss_metrics=model._output_loss_metrics)
506
507 if reset_metrics:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
568 xla_context.Exit()
569 else:
--> 570 result = self._call(*args, **kwds)
571
572 if tracing_count == self._get_tracing_count():
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
606 # In this case we have not created variables on the first call. So we can
607 # run the first trace but we should fail if variables are created.
--> 608 results = self._stateful_fn(*args, **kwds)
609 if self._created_variables:
610 raise ValueError("Creating variables on a non-first call to a function"
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
2407 """Calls a graph function specialized to the inputs."""
2408 with self._lock:
-> 2409 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
2410 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2411
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2765
2766 self._function_cache.missed.add(call_context_key)
-> 2767 graph_function = self._create_graph_function(args, kwargs)
2768 self._function_cache.primary[cache_key] = graph_function
2769 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2655 arg_names=arg_names,
2656 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2657 capture_by_value=self._capture_by_value),
2658 self._function_attributes,
2659 # Tell the ConcreteFunction to clean up its graph once it goes out of
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
979 _, original_func = tf_decorator.unwrap(python_func)
980
--> 981 func_outputs = python_func(*func_args, **func_kwargs)
982
983 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
AssertionError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py:345 test_on_batch *
with backend.eager_learning_phase_scope(0):
/usr/lib/python3.6/contextlib.py:81 __enter__
return next(self.gen)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/backend.py:425 eager_learning_phase_scope
assert ops.executing_eagerly_outside_functions()
AssertionError:

Ah, I believe this issue is coming from mismatched expectations on sample_batch. TFF passes sample_batch to Keras, which calls a forward pass with this sample batch to initialize various attributes of the keras model. sample_batch should be either a sample from the literal data you are going to be feeding the model as on the server side, or a batch of fake data which matches the shape and type of the data you will be passing in.
An example of the former can be found here (this uses tf.data.Dataset), and there are several examples of the latter in test code, like here.
From what I see of the definition of the model, likely the x element of your sample_batch should be an ndarray of shape [2, 100, 300, 3] (where 2 is for the batch size, but technically this can be any nonzero dimension), and the y element should also match the expected y structure in the data you are using.
I hope this helps, just ping back if there are any problems!
One thing to note, that may be helpful in thinking about TFF--TFF is building a syntax tree representing the distributed computation you are defining via build_federated_averaging_process. This error actually occurs during construction of this object. TFF must trace the computation you pass it in order to know what structure to generate, and this is what is raising here. Actual training of the model happens when you call next on the returned IterativeProcess.

I have same problem:
if I execute this line
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
I find this error
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Default MaxPoolingOp only supports NHWC on device type CPU
[[{{node StatefulPartitionedCall/StatefulPartitionedCall/sequential/vgg16/block1_pool/MaxPool}}]]
[[subcomputation/StatefulPartitionedCall_1/ReduceDataset]]
[[subcomputation/StatefulPartitionedCall_1/ReduceDataset/_140]]
(1) Invalid argument: Default MaxPoolingOp only supports NHWC on device type CPU
[[{{node StatefulPartitionedCall/StatefulPartitionedCall/sequential/vgg16/block1_pool/MaxPool}}]]
[[subcomputation/StatefulPartitionedCall_1/ReduceDataset]]
0 successful operations.
0 derived errors ignored.
knowin that I employe VGG16
have you any idea on this type of error

Related

How to fix TypeError: x and y must have the same dtype, got tf.int32 != tf.float32 with tensorflow using PyGad?

I have defined a new layer to be put as an input layer using Keras. The code is:
class layer(tensorflow.keras.layers.Layer):
def __init__(self):
super(layer, self).__init__()
H_init = tf.random_normal_initializer()
self.H = tf.Variable(
initial_value=H_init(shape=(1,), dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value=b_init(shape=(1,), dtype="float32"), trainable=True
)
n_init = tf.zeros_initializer()
self.n = tf.Variable(
initial_value=n_init(shape=(1,), dtype="float32"), trainable=True
)
def call(self, z):
return self.H**2 / (1+(1/self.b)**(2/self.n)) *(1+((1+z)/self.b)**(2/self.n))
I intend to put this layer as input in my generation callback function, which is
`def callback_generation(ga_instance):
print("Generation"= {generation}".format(generation=ga_instance.generations_completed))
print("Fitness =
{fitness}".format(fitness=ga_instance.best_solution()[1]))
inputs = tensorflow.keras.Input(shape=(1,), name="inputs")
targets = tensorflow.keras.Input(shape=(1,), name="targets")
logits = tensorflow.keras.layers.Dense(15)(inputs)
predictions = layer(name="predictions")(logits, targets)
model = keras.Model(inputs=[inputs, targets],
outputs=predictions)
data = {
"inputs": z,
"targets": H_z,
}
model = tensorflow.keras.Sequential(inputs=input_layer,
outputs=output_layer)
weights_vector =
tensorflow.pygad.kerasga.model_weights_as_vector(model=model)
keras_ga = pygad.kerasga.KerasGA(model=model,
num_solutions=12)
model.summary()`
This gives me an error reporting that the tf__call() method takes 2 positional arguments but 3 were given.
This is the detailed error traceback:
TypeError Traceback (most
recent call last)
<ipython-input-101-e09c7d4f3228> in <module>
20 targets = tensorflow.keras.Input(shape=(1,),
name="targets")
21 logits = tensorflow.keras.layers.Dense(15)(inputs)
---> 22 predictions = layer(name="predictions")(logits,
targets)
23
24 model = keras.Model(inputs=[inputs, targets],
outputs=predictions)
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in
__call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs,
args, kwargs, input_list):
951 return self._functional_construction_call(inputs,
args, kwargs,
--> 952
input_list)
953
954 # Maintains info about the `Layer.call` stack.
~/anaconda3/lib/python3.7/site-
packages/tensorflow/python/keras/engine/base_layer.py in
_functional_construction_call(self, inputs, args, kwargs,
input_list)
1089 # Check input assumptions set after layer
building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
~/anaconda3/lib/python3.7/site-
packages/tensorflow/python/keras/engine/base_layer.py in
_keras_tensor_symbolic_call(self, inputs, input_masks, args,
kwargs)
820 return
nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args,
kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args,
kwargs, input_masks):
~/anaconda3/lib/python3.7/site-
packages/tensorflow/python/keras/engine/base_layer.py in
_infer_output_signature(self, inputs, args, kwargs,
input_masks)
861 # TODO(kaftan): do we maybe_build here, or
have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs,
outputs)
~/anaconda3/lib/python3.7/site-
packages/tensorflow/python/autograph/impl/api.py in
wrapper(*args, **kwargs)
668 except Exception as e: # pylint:disable=broad-
except
669 if hasattr(e, 'ag_error_metadata'):
--> 670 raise e.ag_error_metadata.to_exception(e)
671 else:
672 raise
TypeError: in user code:
TypeError: tf__call() takes 2 positional arguments but 3 were given

How to deal with tf.saved_model.save(model, filepath) Error?

I use TFRS build a custom hybrid recommender; the model was trained and is able to make prediction well. But when I save it:
model = ...
filepath = 'saved_model/model0'
tf.saved_model.save(model, filepath)
error:
--------------------------------------------------------------------------- FailedPreconditionError Traceback (most recent call last) /tmp/ipykernel_2678112/2773535809.py in <module>
5 #!mkdir -p saved_model
6 filepath = 'saved_model/model0'
----> 7 tf.saved_model.save(model, filepath)
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in save(obj, export_dir, signatures, options) 1278 # pylint: enable=line-too-long 1279 metrics.IncrementWriteApi(_SAVE_V2_LABEL)
-> 1280 save_and_return_nodes(obj, export_dir, signatures, options) 1281 metrics.IncrementWrite(write_version="2") 1282
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in save_and_return_nodes(obj, export_dir, signatures, options, experimental_skip_checkpoint) 1313 1314 _, exported_graph, object_saver, asset_info, saved_nodes, node_paths = (
-> 1315 _build_meta_graph(obj, signatures, options, meta_graph_def)) 1316 saved_model.saved_model_schema_version = ( 1317 constants.SAVED_MODEL_SCHEMA_VERSION)
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in _build_meta_graph(obj, signatures, options, meta_graph_def) 1485 1486 with save_context.save_context(options):
-> 1487 return _build_meta_graph_impl(obj, signatures, options, meta_graph_def)
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in _build_meta_graph_impl(obj, signatures, options, meta_graph_def) 1434 1435 # Use _SaveableView to provide a frozen listing of properties and functions.
-> 1436 saveable_view = _SaveableView(checkpoint_graph_view, options, 1437 wrapped_functions) 1438 object_saver = util.TrackableSaver(checkpoint_graph_view)
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in __init__(self, checkpoint_view, options, wrapped_functions)
200 # Run through the nodes in the object graph first for side effects of
201 # creating variables.
--> 202 self._trace_all_concrete_functions()
203
204 (self._trackable_objects, self.node_paths, self._node_ids,
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/saved_model/save.py in _trace_all_concrete_functions(self)
304 for function in self.checkpoint_view.list_functions(obj).values():
305 if isinstance(function, def_function.Function):
--> 306 function._list_all_concrete_functions_for_serialization() # pylint: disable=protected-access
307
308 #property
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in _list_all_concrete_functions_for_serialization(self) 1194 A list of instances of `ConcreteFunction`. 1195 """
-> 1196 concrete_functions = self._list_all_concrete_functions() 1197 seen_signatures = [] 1198 for concrete_function in concrete_functions:
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in _list_all_concrete_functions(self) 1176 """Returns all concrete functions.""" 1177 if self.input_signature is not None:
-> 1178 self.get_concrete_function() 1179 concrete_functions = [] 1180 # pylint: disable=protected-access
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in get_concrete_function(self, *args, **kwargs) 1257 def get_concrete_function(self, *args, **kwargs): 1258 # Implements GenericFunction.get_concrete_function.
-> 1259 concrete = self._get_concrete_function_garbage_collected(*args, **kwargs) 1260 concrete._garbage_collector.release() # pylint: disable=protected-access 1261 return concrete
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs) 1237 if self._stateful_fn is None: 1238 initializers
= []
-> 1239 self._initialize(args, kwargs, add_initializers_to=initializers) 1240 self._initialize_uninitialized_variables(initializers) 1241
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
778 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
779 self._concrete_stateful_fn = (
--> 780 self._stateful_fn._get_concrete_function_internal_garbage_collected(
# pylint: disable=protected-access
781 *args, **kwds))
782
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs) 3155 args, kwargs = None, None 3156 with self._lock:
-> 3157 graph_function, _ = self._maybe_define_function(args, kwargs) 3158 return graph_function 3159
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3555 3556 self._function_cache.missed.add(call_context_key)
-> 3557 graph_function = self._create_graph_function(args, kwargs) 3558 self._function_cache.primary[cache_key] = graph_function 3559
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3390 arg_names = base_arg_names + missing_arg_names 3391 graph_function = ConcreteFunction(
-> 3392 func_graph_module.func_graph_from_py_func( 3393 self._name, 3394 self._python_function,
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses) 1141
_, original_func = tf_decorator.unwrap(python_func) 1142
-> 1143 func_outputs = python_func(*func_args, **func_kwargs) 1144 1145 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
670 # the function a weak reference to itself to avoid a reference cycle.
671 with OptionalXlaContext(compile_with_xla):
--> 672 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
673 return out
674
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/training/tracking/tracking.py in _creator()
279 #def_function.function(input_signature=[], autograph=False)
280 def _creator():
--> 281 resource = self._create_resource()
282 return resource
283
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py in <lambda>()
241 # to have a weak reference to the Dataset to avoid creating
242 # reference cycles and making work for the garbage collector.
--> 243 lambda: weak_self._trace_variant_creation()()), # pylint: disable=unnecessary-lambda,protected-access
244 name="_variant_tracker")
245 self._graph_attr = ops.get_default_graph()
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py in _trace_variant_creation(self)
363 # pylint: disable=protected-access
364 graph_def = graph_pb2.GraphDef().FromString(
--> 365 self._as_serialized_graph(external_state_policy=options_lib
366 .ExternalStatePolicy.FAIL).numpy())
367 output_node_names = []
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
550 'in a future version' if date is None else ('after %s' % date),
551 instructions)
--> 552 return func(*args, **kwargs)
553
554 doc = _add_deprecated_arg_notice_to_docstring(
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py in _as_serialized_graph(self, allow_stateful, strip_device_assignment, external_state_policy)
302 if external_state_policy:
303 policy = external_state_policy.value
--> 304 return gen_dataset_ops.dataset_to_graph_v2(
305 self._variant_tensor,
306 external_state_policy=policy,
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/ops/gen_dataset_ops.py in dataset_to_graph_v2(input_dataset, external_state_policy, strip_device_assignment, name) 1158 return _result 1159 except _core._NotOkStatusException as e:
-> 1160 _ops.raise_from_not_ok_status(e, name) 1161 except _core._FallbackException: 1162 pass
/napi/k_working_dir/ds_test/shared-space/venv/lib/python3.9/site-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name) 7105 def raise_from_not_ok_status(e, name): 7106 e.message += (" name: " + name if name is not None else "")
-> 7107 raise core._status_to_exception(e) from None # pylint: disable=protected-access 7108 7109
FailedPreconditionError: Failed to serialize the input pipeline graph: ResourceGather is stateful. [Op:DatasetToGraphV2]
Solution:
# define create_model()
def create_model():
"""instantiate a model and compile it"""
model = MyTFModel(...)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
return model
# instantiate & compile your model by custom function create_model()
model = create_model()
# set model checkpoint to record the weight during & after fitting
model_checkpoint=tf.keras.callbacks.ModelCheckpoint('DeepCrossMultitaskModel{epoch:02d}',period=2,save_weights_only=True)
# fit the model & save weights during & after the fitting using model_checkpoint
history = model.fit(
cached_train,
validation_data=cached_test,
validation_freq=5,
epochs=epochs,
callbacks=[model_checkpoint],
verbose=0)
#!mkdir -p saved_model
filepath = 'saved_model/model_weight'
model.save_weights(filepath=filepath, save_format='tf')
# newly instantiate & compile a model
new_model = create_model()
# load the weights back to the new model
new_model.load_weights(filepath)
#double check: expect to produce the same thing as model(dict(test_df.head(3)))
new_model(dict(test_df.head(3)))
References & Credits
The cause of the error, check out this post: Can't save custom subclassed model
Solution: https://androidkt.com/how-to-save-and-load-model-weights-in-keras/
I had the same problem in my multitask model (TFRS) and following this solution posted here resolved the issue.
The metrics used in the retrieval task is the issue.
After training and evaluating the model I run this code to save it:
model.retrieval_task = tfrs.tasks.Retrieval() # Removes the metrics.
model.compile()
model.save("./models/my_rec_model")
and when I need to perform inference, I use this:
model = tf.keras.models.load_model("./models/my_rec_model")

tf.data input pipeline, dynamic shaped tensors and slicing: NotImplementedError: Cannot convert a symbolic Tensor (args_0:0) to a numpy array

I am trying to write an efficient data input using tensorflow and tf.data.
I want to replicate the functionality of PIL.Image.Image.crop, where one can pass negative bounding box values such that the crop is expanded with zeros.
For example, if I call PIL.Image.Image.crop([-10, 0, img_height, img_width]), the image has 10 additional rows at the beginning filled with zeros.
As far as I understood, using python functions in the tf.data input pipeline can slow down the code significantly, therefore I try to write everything using tensorflow functions. I also want to use prefetching, batching, shuffling etc. which is already provided by the API.
My plan to implement the PIL crop using tensorflow functions is to preallocate a (dynamically shaped) zero tensor and assign the cropped values using slicing.
This is the error I ran into: NotImplementedError: Cannot convert a symbolic Tensor (args_2:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported
Here is minimal code to replicate the issue:
import tensorflow as tf
ds2 = tf.data.Dataset.from_tensor_slices(np.arange(10))
def preproc(number):
o = tf.zeros((number,number))
# ...
ds2.map(preproc)
Question 1: How can I solve that issue?
Question 2: I am coming from a PyTorch background and I am confused about the complexity of the whole tf.data pipeline. Why am I restricted to tensorflow functions in order to use all the nice features?
For reference, here is my complete code so far.
class MyPreprocessing:
def __call__(self, *data):
# Complete code omitted for simplicity.
# This is called somewhere:
self._crop(...)
def _crop(self, img, center, body_size, res):
tl = tf.cast(center - body_size/2, dtype=tf.int32)
br = tf.cast(center + body_size/2, dtype=tf.int32)
height, width = tf.shape(img)[0], tf.shape(img)[1]
crop_tl = tf.stack([
tf.cond(tl[0] < 0, lambda: tf.constant(0, dtype=tf.int32), lambda: tl[0]),
tf.cond(tl[1] < 0, lambda: tf.constant(0, dtype=tf.int32), lambda: tl[1])])
crop_br = tf.stack([
tf.cond(br[0] > height, lambda: height, lambda: br[0]),
tf.cond(br[1] > width, lambda: width, lambda: br[1])])
crop = tf.image.crop_to_bounding_box(
img,
crop_tl[0],
crop_tl[1],
crop_br[0] - crop_tl[1],
crop_br[1] - crop_tl[1])
new_tl = crop_tl - tl
new_br = crop_br - tl
# Error:
new_img = tf.zeros((body_size, body_size, tf.constant(3)), dtype=tf.float32)
new_img[new_tl[0]:new_br[0], new_tl[1]:new_br[1]].assign(crop)
return tf.image.resize(new_img, (res, res))
Edit: Full Stacktrace
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-2-84d20c31b9c9> in <module>
8 # ...
9
---> 10 ds2.map(preproc)
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls, deterministic)
1923 warnings.warn("The `deterministic` argument has no effect unless the "
1924 "`num_parallel_calls` argument is specified.")
-> 1925 return MapDataset(self, map_func, preserve_cardinality=True)
1926 else:
1927 return ParallelMapDataset(
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, use_inter_op_parallelism, preserve_cardinality, use_legacy_function)
4481 self._use_inter_op_parallelism = use_inter_op_parallelism
4482 self._preserve_cardinality = preserve_cardinality
-> 4483 self._map_func = StructuredFunctionWrapper(
4484 map_func,
4485 self._transformation_name(),
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
3710 resource_tracker = tracking.ResourceTracker()
3711 with tracking.resource_tracker_scope(resource_tracker):
-> 3712 self._function = fn_factory()
3713 # There is no graph to add in eager mode.
3714 add_to_graph &= not context.executing_eagerly()
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py in get_concrete_function(self, *args, **kwargs)
3132 or `tf.Tensor` or `tf.TensorSpec`.
3133 """
-> 3134 graph_function = self._get_concrete_function_garbage_collected(
3135 *args, **kwargs)
3136 graph_function._garbage_collector.release() # pylint: disable=protected-access
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs)
3098 args, kwargs = None, None
3099 with self._lock:
-> 3100 graph_function, _ = self._maybe_define_function(args, kwargs)
3101 seen_names = set()
3102 captured = object_identity.ObjectIdentitySet(
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3442
3443 self._function_cache.missed.add(call_context_key)
-> 3444 graph_function = self._create_graph_function(args, kwargs)
3445 self._function_cache.primary[cache_key] = graph_function
3446
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3277 arg_names = base_arg_names + missing_arg_names
3278 graph_function = ConcreteFunction(
-> 3279 func_graph_module.func_graph_from_py_func(
3280 self._name,
3281 self._python_function,
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
997 _, original_func = tf_decorator.unwrap(python_func)
998
--> 999 func_outputs = python_func(*func_args, **func_kwargs)
1000
1001 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in wrapped_fn(*args)
3685 attributes=defun_kwargs)
3686 def wrapped_fn(*args): # pylint: disable=missing-docstring
-> 3687 ret = wrapper_helper(*args)
3688 ret = structure.to_tensor_list(self._output_structure, ret)
3689 return [ops.convert_to_tensor(t) for t in ret]
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in wrapper_helper(*args)
3615 if not _should_unpack(nested_args):
3616 nested_args = (nested_args,)
-> 3617 ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
3618 if _should_pack(ret):
3619 ret = tuple(ret)
~/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
693 except Exception as e: # pylint:disable=broad-except
694 if hasattr(e, 'ag_error_metadata'):
--> 695 raise e.ag_error_metadata.to_exception(e)
696 else:
697 raise
NotImplementedError: in user code:
<ipython-input-2-84d20c31b9c9>:7 preproc *
o = tf.zeros((number,number))
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper **
return target(*args, **kwargs)
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2911 wrapped
tensor = fun(*args, **kwargs)
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2960 zeros
output = _constant_if_small(zero, shape, dtype, name)
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2896 _constant_if_small
if np.prod(shape) < 1000:
<__array_function__ internals>:5 prod
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3030 prod
return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out,
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:87 _wrapreduction
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
/home/benjs/Documents/projects/hpe/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:867 __array__
raise NotImplementedError(
NotImplementedError: Cannot convert a symbolic Tensor (args_0:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

Problem with Logits using Densenet121 in Tensorflow 2.4

I was trying to reproach the examples Transfer learning and fine-tuning adapting for my problem My colab with GPU. When I use a softmax in the last Dense Layers, this error not ocorrer. But with 'from_logits=True', this error ocorrer, my imagens are jpg and they are divided in folders:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-85-7ea61d5df8ec> in <module>()
----> 1 loss0, accuracy0, auc0, precision0, recall0 = model.evaluate(val_ds)
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in evaluate(self, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing, return_dict)
1387 with trace.Trace('test', step_num=step, _r=1):
1388 callbacks.on_test_batch_begin(step)
-> 1389 tmp_logs = self.test_function(iterator)
1390 if data_handler.should_sync:
1391 context.async_wait()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
893 # If we did not create any variables the trace we have is good enough.
894 return self._concrete_stateful_fn._call_flat(
--> 895 filtered_flat_args, self._concrete_stateful_fn.captured_inputs) # pylint: disable=protected-access
896
897 def fn_with_cond(inner_args, inner_kwds, inner_filtered_flat_args):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1917 # No tape is watching; skip to running the function.
1918 return self._build_call_outputs(self._inference_function.call(
-> 1919 ctx, args, cancellation_manager=cancellation_manager))
1920 forward_backward = self._select_forward_and_backward_functions(
1921 args,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
558 inputs=args,
559 attrs=attrs,
--> 560 ctx=ctx)
561 else:
562 outputs = execute.execute_with_cancellation(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (Dense121/dense_1/BiasAdd:0) = ] [[0.853173912 1.97515857 0.608713508...]...] [y (Cast_4/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_1/assert_greater_equal/Assert/AssertGuard/Assert}}]]
(1) Invalid argument: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (Dense121/dense_1/BiasAdd:0) = ] [[0.853173912 1.97515857 0.608713508...]...] [y (Cast_4/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_1/assert_greater_equal/Assert/AssertGuard/Assert}}]]
[[assert_greater_equal_2/Assert/AssertGuard/branch_executed/_65/_167]]
0 successful operations.
0 derived errors ignored. [Op:__inference_test_function_61870]
Function call stack:
test_function -> test_function
I tried several things to solve and nothing

Fail to run trainer.train() with huggingface transformer

I am trying to set up a TensorFlow fine-tune framework for a question-answering project. Using hugging-face/transformer as the prototype, but cannot run through the trainer.
The experiment is conducted at Databricks, the pre-trained model loaded is base-bert, train and dev sets are downloaded from hugging-face examples SQUAD 2.0 https://github.com/huggingface/transformers/tree/master/examples/question-answering
The error log complains about the unexpected keyword argument 'is_impossible', which is a SQUAD 2 data format feature.
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = TFAutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
processor = SquadV2Processor()
train_examples = processor.get_train_examples(data_dir, train_file_name)
eval_examples = processor.get_dev_examples(data_dir, dev_file_name)
train_dataset = (squad_convert_examples_to_features(
examples=train_examples,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
doc_stride=doc_stride,
max_query_length=max_query_length,
is_training=True,
return_dataset="tf"
))
eval_dataset = (squad_convert_examples_to_features(
examples=eval_examples,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
doc_stride=doc_stride,
max_query_length=max_query_length,
is_training=False,
return_dataset="tf"
))
training_args = (TFTrainingArguments(
output_dir=output_dir,
num_train_epochs=2,
do_train=True,
per_device_train_batch_size = 8,
per_device_eval_batch_size = 16,
logging_steps=10,
learning_rate=3e-5))
trainer = TFTrainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset)
trainer.train()
Error Log as below:
TypeError Traceback (most recent call
last) in
----> 1 trainer.train()
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py
in train(self)
410 if self.args.past_index >= 0:
411 self._past = None
--> 412 for step, training_loss in enumerate(self._training_steps(train_ds, optimizer)):
413 self.global_step = iterations.numpy()
414 self.epoch_logging = epoch_iter - 1 + (step + 1) / steps_per_epoch
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py
in _training_steps(self, ds, optimizer)
457 Returns a generator over training steps (i.e. parameters update).
458 """
--> 459 for i, loss in enumerate(self._accumulate_next_gradients(ds)):
460 if i % self.args.gradient_accumulation_steps == 0:
461 self._apply_gradients(optimizer)
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py
in _accumulate_next_gradients(self, ds)
490 while True:
491 try:
--> 492 yield _accumulate_next()
493 except tf.errors.OutOfRangeError:
494 break
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in call(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in _call(self, *args, **kwds)
613 # This is the first call of call, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected(
pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py
in _create_graph_function(self, args, kwargs,
override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py
in func_graph_from_py_func(name, python_func, args, kwargs, signature,
func_graph, autograph, autograph_options, add_control_dependencies,
arg_names, op_return_value, collections, capture_by_value,
override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: func_outputs contains only Tensors, CompositeTensors,
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py
in wrapped_fn(*args, **kwds)
437 # wrapped allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().wrapped(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py
in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
TypeError: in converted code:
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py:488
_accumulate_next *
return self._accumulate_gradients(per_replica_features, per_replica_labels)
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py:498
_accumulate_gradients *
per_replica_loss = self.args.strategy.experimental_run_v2(
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/distribute/one_device_strategy.py:180 experimental_run_v2
return super(OneDeviceStrategy, self).experimental_run_v2(fn, args, kwargs)
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py:511
_forward *
per_example_loss, _ = self._run_model(features, labels, True)
/databricks/python/lib/python3.7/site-packages/transformers/trainer_tf.py:532
_run_model *
outputs = self.model(features, training=training, **labels)[:2]
/local_disk0/pythonVirtualEnvDirs/virtualEnv-f56565e5-e45b-447a-b7df-50daf9109495/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:778
call
outputs = call_fn(cast_inputs, *args, **kwargs)
TypeError: tf__call() got an unexpected keyword argument 'is_impossible'