As always, tensorflow the weird dumb framework is going unintuitive haywire piece of crap on me. Can someone please be kind enough to help me out with this? I am able to run the checkpointing (how much of a mess can saving a model be? leave it to tensorflow to make a mountain out of a molehill) tutorial as given on the tutorial page, but, dare i make a little modification here a little modification there. The sticks and stones contraption called tensorflow comes crumbling down.
As you can clearly see i am running the build method but i am getting the error that i must run the build method with an input shape. In tutorial the build method is not there at all and the one layer self.l1 is built in the __init__ itself which, they themselves advice against at several other places
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
#self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1 = tf.keras.layers.Dense(5)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=input_shape,dtype=tf.float32))
print('built layers')
def call(self, x):
return self.l1(x)
net = Net()
net.build([1,])
net.save_weights('easy_checkpoint')
The output and trace i am getting is:
built layers
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-31-3b54dc506ffd> in <module>
1 net = Net()
2 net.build([1,])
----> 3 net.save_weights('easy_checkpoint')
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in save_weights(self, filepath, overwrite, save_format)
1111 ValueError: For invalid/unknown format arguments.
1112 """
-> 1113 self._assert_weights_created()
1114 filepath_is_h5 = _is_hdf5_filepath(filepath)
1115 if save_format is None:
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _assert_weights_created(self)
1560 'Weights are created when the Model is first called on '
1561 'inputs or `build()` is called with an `input_shape`.' %
-> 1562 self.name)
1563
1564 def _graph_network_add_loss(self, symbolic_loss):
ValueError: Weights for model net_10 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.
Edit: Here is my hunch: The problem with my code is that the build does not execute the build of self.l1 but just creates it. Things do work out fine if i add self.l1 creation in __init__ and call super().__build__() as the first line in Net's build. Things make sense so far but, the code fails again if i replace super().build(input_shape) with self.l1.build(input_shape). Also, the code belows shows that all the variables are actually there. So, i am lost again. Any help is much appreciated
tf.random.set_seed(42)
class Net1(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net1, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
super().build(input_shape)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print(self.variables)
def call(self, x):
return self.l1(x)
net = Net1()
net.build((10,1))
print('*'*50)
print(net.variables)
output:
[<tf.Variable 'dense_56/kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'dense_56/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>]
**************************************************
[<tf.Variable 'dense_56/kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'dense_56/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>]
whereas,
tf.random.set_seed(42)
class Net1(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net1, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1.build(input_shape)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print('variables',self.l1.variables,self.dummy)
def call(self, x):
return self.l1(x)
net = Net1()
net.build((10,1))
print(net.variables)
output:
variables [<tf.Variable 'kernel:0' shape=(1, 5) dtype=float32, numpy=
array([[ 0.3291242 , -0.11798644, -0.294235 , -0.07103491, -0.9326792 ]],
dtype=float32)>, <tf.Variable 'bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>] <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.09575049], dtype=float32)>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-77-35561efcdc2f> in <module>
15 net = Net1()
16 net.build((10,1))
---> 17 print(net.variables)
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in variables(self)
1965 A list of variables.
1966 """
-> 1967 return self.weights
1968
1969 #property
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in weights(self)
498 A list of variables.
499 """
--> 500 return self._dedup_weights(self._undeduplicated_weights)
501
502 #property
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _undeduplicated_weights(self)
503 def _undeduplicated_weights(self):
504 """Returns the undeduplicated list of all layer variables/weights."""
--> 505 self._assert_weights_created()
506 weights = []
507 for layer in self._layers:
~/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py in _assert_weights_created(self)
1560 'Weights are created when the Model is first called on '
1561 'inputs or `build()` is called with an `input_shape`.' %
-> 1562 self.name)
1563
1564 def _graph_network_add_loss(self, symbolic_loss):
ValueError: Weights for model net1_40 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.
TL/DR: This is not a problem with save_weight method. In order to build a subclassed model, you need to run the subclassed model on a real input. I only added two lines to the end of your code as shown below.
#net.build(input_shape=[1,]) # don't need it. When you call the model with real input, `build` method will be executed
x_train = tf.random.normal(shape=(100,1),dtype=tf.float32)
output=net.predict(x_train)
Please check below for more details.
import tensorflow as tf
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
#self.l1 = tf.keras.layers.Dense(5)
def build(self,input_shape):
self.l1 = tf.keras.layers.Dense(5)
self.dummy = tf.Variable(trainable=True,initial_value=tf.keras.initializers.glorot_normal()(shape=(1,),dtype=tf.float32))
print('built layers')
def call(self, x):
return self.l1(x)
net = Net()
#net.build(input_shape=[1,]) # don't need it. When you call the model with real input, `build` method will be executed
x_train = tf.random.normal(shape=(100,1),dtype=tf.float32)
output=net.predict(x_train)
net.save_weights('easy_checkpoint')
A subclassed model is a piece of Python code (a call method). There is no graph of layers here. We cannot know how layers are connected to each other (because that's defined in the body of call, not as an explicit data structure), so we cannot infer input / output shapes. You can try printing model.summary after instantiating the subclass model. It will throw same error as you reported.
In contrast to subclassed models, You can do all these things (printing summary, input / output shapes) in a Functional or Sequential model because these models are static graphs of layers.
With that simple modification, your code is working as expected. I can print the weights, shapes etc., and can save weights also.
Related
I'm trying to get batch_size in call() function in TF2 model.
However, I cannot get it because all the methods I know returns None or Tensor instead of dimension tuple.
Here is a short example
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
def call(self, x):
print(len(x))
print(x.shape)
print(tf.size(x))
print(np.shape(x))
print(x.get_shape())
print(x.get_shape().as_list())
print(tf.rank(x))
print(tf.shape(x))
print(tf.shape(x)[0])
print(tf.shape(x)[1])
return tf.random.uniform((2, 10))
m = MyModel()
m.compile(optimizer="Adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])
m.fit(np.array([[1,2,3,4], [5,6,7,8]]), np.array([0, 1]), epochs=1)
The output is:
Tensor("my_model_26/strided_slice:0", shape=(), dtype=int32)
(None, 4)
Tensor("my_model_26/Size:0", shape=(), dtype=int32)
(None, 4)
(None, 4)
[None, 4]
Tensor("my_model_26/Rank:0", shape=(), dtype=int32)
Tensor("my_model_26/Shape_2:0", shape=(2,), dtype=int32)
Tensor("my_model_26/strided_slice_1:0", shape=(), dtype=int32)
Tensor("my_model_26/strided_slice_2:0", shape=(), dtype=int32)
1/1 [==============================] - 0s 1ms/step - loss: 3.1796 - accuracy: 0.0000e+00
I fed (2,4) numpy array as input and (2, ) as target to the model in this example.
But as you can see, I cannot get batch_size in call() function.
The reason I need it is because I have to iterate tensors for batch_size which is dynamic in my real model.
For example, if the dataset size is 10 and batch size is 3, then the last batch size in last batch would be 1. So, I have to know batch size dynamically.
Can anyone help me?
Tensorflow 2.3.3
CUDA 10.2
python 3.6.9
It's because you're using TensorFlow (that's mandatory since Keras is now inside TensorFlow), and by using TensorFlow you need to be aware of the "compilation" of the dynamic graph into a static-graph.
In short, your call method is (under the hood) decorated with the #tf.function decorator.
This decorator:
Traces the python function execution
Converts the python operation in TensorFlow operations (e.g. if a > b becomes tf.cond(tf.greater(a,b), something, something_else))
Creates a tf.Graph (the static graph)
Executes the static graph just created.
Al your print calls are executed during the first step (the python execution tracing), that's why even if you train your model you see the output only 1 time.
That said, to get the runtime (dynamic shape) of a tensor, you must use tf.shape(x), the batch size is just batch_size = tf.shape(x)[0]
Please note that if you want to see the shape (using print) you can't use print, but you must use tf.print.
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
def call(self, x):
shape = tf.shape(x)
batch_size = shape[0]
tf.print(shape, batch_size)
return tf.random.uniform((2, 10))
m = MyModel()
m.compile(
optimizer="Adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
m.fit(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]), np.array([0, 1]), epochs=1)
More information about static and dynamic shapes: https://pgaleone.eu/tensorflow/2018/07/28/understanding-tensorflow-tensors-shape-static-dynamic/
More info about the tf.function behavior: https://pgaleone.eu/tensorflow/tf.function/2019/03/21/dissecting-tf-function-part-1/
Note: I wrote these articles.
If you want to get exactly the data and shapes, you may turn eager run true, but it is not a good solution, since it makes training slow.
Set it like this:
m.compile(optimizer="Adam", loss="sparse_categorical_crossentropy",
metrics=['accuracy'], run_eagerly=True)
Then the output will be:
(2, 4)
tf.Tensor(8, shape=(), dtype=int32)
(2, 4)
(2, 4)
[2, 4]
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor([2 4], shape=(2,), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
I try to multiply scalar values to each channel in a tensor:
import tensorflow as tf
t = tf.ones([2,3,3,4])
w = tf.constant([1,2,3,4], dtype=tf.float32)
tf.multiply(t,w)
yields
<tf.Tensor: shape=(2, 3, 3, 4), dtype=float32, numpy=
array([[[[1., 2., 3., 4.],
[1., 2., 3., 4.],
[1., 2., 3., 4.]],
...
which is correct.
Now I am trying to wrap that operation inside a keras.layers.Layer, whereby w is a learnable parameter. I also try to plot my model using tf.keras.utils.plot_model(m). I encounter several problems.
Method 1
from tensorflow.keras import Model, Input
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
def call(self, x):
return x * self.w
I plot this model using
mm = MyModel()
x = Input(shape=(64, 64, 256), batch_size=10, name='Input')
m = Model(inputs=[x], outputs=mm.call(x))
tf.keras.utils.plot_model(m)
Problem: I encountered the following warning:
WARNING:tensorflow:
The following Variables were used a Lambda layer's call (tf.math.multiply_2), but
are not present in its tracked objects:
<tf.Variable 'Variable:0' shape=(256,) dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
Question: Can I savely ignore the warning and the weights are still learned? If yes, how can I suppress this warning?
Method 2
As suggested in the warning, I wrap the multiplication in its own subclassed layer:
class MyMultiply(Layer):
def __init__(self):
super(MyMultiply, self).__init__()
def call(self, x):
return tf.multiply(x[0], x[1])
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
self.mul = MyMultiply()
def call(self, x):
return self.mul([x, self.w])
Problem: This works until the model is plotted. Then I encounter the following error: AttributeError: 'ResourceVariable' object has no attribute '_keras_history'
Traceback:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-18-e4cc5cc97726> in <module>()
21 x = Input(shape=(64, 64, 256), batch_size=10, name='Input')
22 m = Model(inputs=[x], outputs=mm.call(x))
---> 23 tf.keras.utils.plot_model(m)
---------------------------------------------------------------------------
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/node.py in <lambda>(t)
259 if self.is_input:
260 return []
--> 261 inbound_layers = nest.map_structure(lambda t: t._keras_history.layer,
262 self.call_args[0])
263 return inbound_layers
AttributeError: 'ResourceVariable' object has no attribute '_keras_history'
Question: How do I resolve that error? Is this a bug (I submitted an issue to the tf github repo, however it was deleted immediately)?
Method 3
I try to use keras.layers.Multiply instead:
from tensorflow.keras.layers import Multiply
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.w = self.add_weight(shape=(256,), trainable=True)
self.mul = Multiply()
def call(self, x):
return self.mul([x, self.w])
Problem: ValueError: Can not merge tensors with different batch sizes. Got tensors with shapes : [(10, 64, 64, 256), (256,)]
To my understanding, the ValueError occurs because the internal _Merge layer checks for equal batch sizes. The internal Multiply layer however implements the multiplication with broadcasting (which should work!):
# from tensorflow/python/keras/layers/merge.py line 316-320
def _merge_function(self, inputs):
output = inputs[0]
for i in range(1, len(inputs)):
output = output * inputs[i]
return output
I could use tf.broadcast_to and so on, however, to my understanding this would materialize the tensor and would occupy more memory which I try to avoid.
Question: Is there another way to make keras.layers.Multiply work, so ultimately the model plotting works?
You can avoid the warning in Method 1 by creating a Keras Layer instead of a Model.
import tensorflow as tf
class MyLayer(tf.keras.layers.Layer):
def __init__(self):
super(MyLayer, self).__init__()
self.w = self.add_weight(name='multiply_weight', shape=(256,), trainable=True)
def call(self, x):
return tf.multiply(x, self.w)
mul_layer = MyLayer()
x = tf.keras.Input(shape=(64, 64, 256), batch_size=10, name='Input')
output = mul_layer(x)
m = tf.keras.Model(inputs=[x], outputs=output)
tf.keras.utils.plot_model(m)
I’m trying to re-define keras’s binary_crossentropy loss function so that I can customize it but it’s not giving me the same results as the existing one.
I'm using TF 1.13.1 with Keras 2.2.4.
I went through Keras’s github code. My understanding is that the loss in model.compile(optimizer='adam', loss='binary_crossentropy', metrics =['accuracy']), is defined in losses.py, using binary_crossentropy defined in tensorflow_backend.py.
I ran a dummy data and model to test it. Here are my findings:
The custom loss function outputs the same results as keras’s one
Using the custom loss in a keras model gives different accuracy results
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
import tensorflow as tf
from keras import losses
import keras.backend as K
import keras.backend.tensorflow_backend as tfb
from keras.layers import Dense
from keras import Sequential
#Dummy check of loss output
def binary_crossentropy_custom(y_true, y_pred):
return K.mean(binary_crossentropy_custom_tf(y_true, y_pred), axis=-1)
def binary_crossentropy_custom_tf(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
# Returns
A tensor.
"""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target,
logits=output)
logits = tf.constant([[-3., -2.11, -1.22],
[-0.33, 0.55, 1.44],
[2.33, 3.22, 4.11]])
labels = tf.constant([[1., 1., 1.],
[1., 1., 0.],
[0., 0., 0.]])
custom_sigmoid_cross_entropy_with_logits = binary_crossentropy_custom(labels, logits)
keras_binary_crossentropy = losses.binary_crossentropy(y_true=labels, y_pred=logits)
with tf.Session() as sess:
print('CUSTOM sigmoid_cross_entropy_with_logits: ', sess.run(custom_sigmoid_cross_entropy_with_logits), '\n')
print('KERAS keras_binary_crossentropy: ', sess.run(keras_binary_crossentropy), '\n')
#CUSTOM sigmoid_cross_entropy_with_logits: [16.118095 10.886106 15.942386]
#KERAS keras_binary_crossentropy: [16.118095 10.886106 15.942386]
#Dummy check of model accuracy
X_train = tf.random.uniform((3, 5), minval=0, maxval=1, dtype=tf.dtypes.float32)
labels = tf.constant([[1., 0., 0.],
[0., 0., 1.],
[1., 0., 0.]])
model = Sequential()
#First Hidden Layer
model.add(Dense(5, activation='relu', kernel_initializer='random_normal', input_dim=5))
#Output Layer
model.add(Dense(3, activation='sigmoid', kernel_initializer='random_normal'))
#I ran model.fit for each model.compile below 10 times using the same X_train and provide the range of accuracy measurement
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics =['accuracy']) #0.748 < acc < 0.779
# model.compile(optimizer='adam', loss=losses.binary_crossentropy, metrics =['accuracy']) #0.761 < acc < 0.778
model.compile(optimizer='adam', loss=binary_crossentropy_custom, metrics =['accuracy']) #0.617 < acc < 0.663
history = model.fit(X_train, labels, steps_per_epoch=100, epochs=1)
I'd expect the custom loss function to give similar model accuracy output but it does not. Any idea? Thanks!
Keras automatically selects which accuracy implementation to use according to the loss, and this won't work if you use a custom loss. But in this case you can just explictly use the right accuracy, which is binary_accuracy:
model.compile(optimizer='adam', loss=binary_crossentropy_custom, metrics =['binary_accuracy'])
It took me more than a day, so frustrated. I doubt this is a bug in Tensorflow 1.13.1 (a stable version).
In summary, I created a custom model in Model Subclassing style, which contained only 1 custom layer. After initial, I dumped its trainable weights to file and restore it, by using save_weights and load_weights functions. The trainable weights before and after saving were different.
I also ran the same test on Tensorflow 2.0.0a0, and it turned out this version did not get this phenomenon.
My custom layer:
class EncodingLayer(tf.keras.layers.Layer):
def __init__(self, out_size):
super().__init__()
self.rnn_layer = tf.keras.layers.GRU(out_size, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
def call(self, X, **kwargs):
output, state = self.rnn_layer(X)
return output, state
This is the main part:
class EncodingModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.encoder_layer = EncodingLayer(out_size=1)
def infer(self, inputs):
output, state = self.encoder_layer(inputs)
return output
if __name__ == '__main__':
# Comment line below for running in TF 2.0
tf.enable_eager_execution()
# shape == (2, 3, 2)
inputs = tf.convert_to_tensor([
[[1., 2.], [2., 3.], [4., 4.]],
[[1., 2.], [2., 3.], [4., 4.]],
])
model = EncodingModel()
# Just for building the graph
model.infer(inputs)
print('Before saving model: ', model.trainable_weights[0].numpy().mean())
model.save_weights('weight')
new_model = EncodingModel()
new_model.infer(inputs)
new_model.load_weights('weight')
print('Loaded model: ', new_model.trainable_weights[0].numpy().mean())
The result when running in TF 1.13.1:
Before saving model: 0.28864467
Loaded model: 0.117300846
The result when running in TF 2.0.0a0:
Before saving model: -0.06922924
Loaded model: -0.06922924
Although the result suggests that it might be a bug, I was not so sure. Since the code is very basic, if a bug like that exists, it should be discovered easily. I did a lot of searching but found no one mentioning about it. Thus, I'd guess there's something I misunderstood :)
I have created a custom layer (called GraphGather) in Keras, yet the output tensor prints as :
Tensor("graph_gather/Tanh:0", shape=(?, ?), dtype=float32)
For some reason the shape is being returned as (?,?), which is causing the next dense layer to raise the following error:
ValueError: The last dimension of the inputs to Dense should be defined. Found None.
The GraphGather layer code is as follows:
class GraphGather(tf.keras.layers.Layer):
def __init__(self, batch_size, num_mols_in_batch, activation_fn=None, **kwargs):
self.batch_size = batch_size
self.num_mols_in_batch = num_mols_in_batch
self.activation_fn = activation_fn
super(GraphGather, self).__init__(**kwargs)
def build(self, input_shape):
super(GraphGather, self).build(input_shape)
def call(self, x, **kwargs):
# some operations (most of def call omitted)
out_tensor = result_of_operations() # this line is pseudo code
if self.activation_fn is not None:
out_tensor = self.activation_fn(out_tensor)
out_tensor = out_tensor
return out_tensor
def compute_output_shape(self, input_shape):
return (self.num_mols_in_batch, 2 * input_shape[0][-1])}
I have also tried hardcoding compute_output_shape to be:
python
def compute_output_shape(self, input_shape):
return (64, 150)
```
Yet the output tensor when printed is still
Tensor("graph_gather/Tanh:0", shape=(?, ?), dtype=float32)
which causes the ValueError written above.
System information
Have written custom code
**OS Platform and Distribution*: Linux Ubuntu 16.04
TensorFlow version (use command below): 1.5.0
Python version: 3.5.5
I had the same problem. My workaround was to add the following lines to the call method:
input_shape = tf.shape(x)
and then:
return tf.reshape(out_tensor, self.compute_output_shape(input_shape))
I haven't run into any problems with it yet.
If Johnny's answer doesn't work, I found another way to get around this is to follow advice here https://github.com/tensorflow/tensorflow/issues/38296#issuecomment-623698709
which is to call the set_shape method on the output of your layer.
E.g.
l=GraphGather(...)
y=l(x)
y.set_shape( l.compute_output_shape(x.shape) )
This only works if you are using the functional API.