I started making a sequential network using tensorflow for food classification.
When I created the simplest model I faced a following issue: model.predict(images[99]) was giving me an issue :
Input 0 of layer "dense_2" is incompatible with the layer: expected axis -1 of input shape to have value 4096, but received input with shape (32, 64).
It happened even though
images[99].shape 99
images is a data, where every element of the list is an image with one channel.
images.shape (10099, 64, 64)
Model:
`
model = keras.Sequential([
keras.layers.Flatten(input_shape=(64,64)),
keras.layers.Dense(4096, activation=tf.nn.relu),
keras.layers.Dense(101, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss = tf.keras.losses.MeanSquaredError(),
metrics = \['accuracy'\])
model.fit(images_tr, categories_tr, epochs=2)
it also looks absurd to me because when I try:
model.predict(np.zeros((64, 64))`
I get the same issue
Also when I do evaluation model.evaluate(images) it works perfectly fine.
I have tried to change version of tensorflow from 2.9.0 to 2.2.2, that didn't help.
that is because it selected one from the received value shape and the smallest that can be filled is 32, you can do something as this for creating a flexible layer the shape is by your conditions.
Sample: You may calculate the input shape for the target layer as in the sample.
import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 12291
delta = 3
# Create DATA
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.int64 ).numpy()
sample = tf.constant( [sample, sample], shape=( 2, 4096, 1 ) )
label = tf.constant([[0.2, 0.8, 0.8], [0.0, 0.0, 0.8]], dtype=tf.float32)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs]) # (4096, 1)
def call(self, inputs):
temp = tf.matmul(inputs, self.kernel)
return temp
input_layer = tf.keras.layers.InputLayer(input_shape=( int(sample.shape[-2] / 64), 64, 1 ))
layer_01 = MyDenseLayer(3)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
input_layer,
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(36, activation='relu'),
layer_01,
])
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
dataset = tf.data.Dataset.from_tensor_slices((tf.constant(tf.cast(sample, dtype=tf.int64), shape=(2, 1, 64, 64), dtype=tf.int64),tf.constant(label, shape=(2, 3, 1), dtype=tf.float32)))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
label_smoothing=0.0,
axis=-1,
reduction=tf.keras.losses.Reduction.AUTO,
name='binary_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=10, epochs=5 )
predictions = model.predict(tf.constant(sample[1,:,:], shape=(1, int(sample.shape[-2] / 64), 64, 1)))
print( predictions )
Output: 3 dots controls rotor communication wireless.
Epoch 1/10000
2/2 [==============================] - 1s 4ms/step - loss: 10.8326 - accuracy: 0.0000e+00
Epoch 2/10000
2/2 [==============================] - 0s 5ms/step - loss: 10.8326 - accuracy: 0.0000e+00
[[ 0.0, 1.0, 0.8 ]]
Output: Application for motors rotors or communication wireless.
Related
Hello i need to build an ANN using binary_alpha_digits from tensorflow but i am unable to pass in the train data inside as it requires 'flatten_input' but I am passing in ['image','label'] dictionary. How do i solve this problem? Appreciate any help on this problem thanks.
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
train_ds, test_ds = tfds.load('BinaryAlphaDigits',
split=['train[:60%]', 'train[60%:]'])
model = tf.keras.Sequential()
model.add(layers.Flatten(input_shape=(28, 28)))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(10, activation=tf.nn.softmax))
model.compile(optimizer= tf.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
epochs = 10
model.fit(train_ds, epochs=epochs)
as you feed images into model, so the input shape must have defined in shape (Height, Width, Channel) which refers to image dimensions and color mode and the second one is that you should preprocess dataset before fitting model on it.
Even notice the output layers units for multi-class classification is not set correctly for this dataset, while there are more than 10 labels, based on dataset it contains 39 labels and so the last layer units would be set to 39.
Here i would implement code which work correctly for you with preprocessing function for images and labels, And even notice the images of the dataset are in shape (20, 16, 1) so you could resize images to set it into (28, 28, 1) or just fed model with the images in their size.
After preprocessing, images are grouped by creating batches or mini-batches, and even shuffle training set to avoid high variance on testing set, so the operations below will be have done cause of that
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds
train_ds, test_ds = tfds.load('BinaryAlphaDigits', split=['train[:60%]', 'train[60%:]'])
def preprocess(data):
image = data['image']
image = tf.image.resize(image, (28, 28))
label = data['label']
return image, label
train_ds = train_ds.map(preprocess)
train_ds = train_ds.shuffle(1024)
train_ds = train_ds.batch(batch_size = 32)
test_ds = test_ds.map(preprocess)
test_ds = test_ds.batch(batch_size = 32)
model = tf.keras.Sequential()
model.add(layers.Flatten(input_shape=(28, 28, 1)))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(39, activation=tf.nn.softmax))
model.compile(optimizer= tf.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
epochs = 10
model.fit(train_ds, epochs=epochs)
tfds.load by default gives a dictionary with image and label as the keys.
train_ds, test_ds = tfds.load('BinaryAlphaDigits',
split=['train[:60%]', 'train[60%:]'])
train_ds = train_ds.shuffle(1024).batch(4)
for x in train_ds.take(1):
print(type(x))
print(x['image'].shape, x['label'])
>>>
<class 'dict'>
(4, 20, 16, 1) tf.Tensor([ 6 32 6 12], shape=(4,), dtype=int64)
There is a setting called as_supervised that gives it as a proper dataset. Check docs here
If you use that setting and use proper input and output sizes, your model works
train_ds, test_ds = tfds.load('BinaryAlphaDigits',
split=['train[:60%]', 'train[60%:]'],as_supervised=True)
train_ds = train_ds.shuffle(1024).batch(4)
for x in train_ds.take(1):
print(type(x))
print(x[0].shape, x[1])
>>>
<class 'tuple'>
(4, 20, 16, 1) tf.Tensor([13 13 22 31], shape=(4,), dtype=int64)
model = tf.keras.Sequential()
model.add(layers.Flatten(input_shape=(20, 16,1)))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(10, activation=tf.nn.relu))
model.add(layers.Dense(36, activation=tf.nn.softmax))
model.compile(optimizer= tf.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
epochs = 10
model.fit(train_ds, epochs=epochs)
>>>
Epoch 1/10
211/211 [==============================] - 1s 3ms/step - loss: 3.5428 - accuracy: 0.0629
Epoch 2/10
211/211 [==============================] - 0s 2ms/step - loss: 3.2828 - accuracy: 0.1105
I would like to know
(1) how often the call() method of tf.keras.losses.Loss
and the update_state() method of tf.keras.metrics.Metric gets called during a training:
are they called per each instance (observation)?
or called per each batch?
(2) the dimension of y_true and y_pred passed to those methods:
are their dimension (batch_size x output_dimension)
or (1 x output_dimension)
The following code snippet comes from
https://www.tensorflow.org/guide/keras/train_and_evaluate
For experiment I insert print(y_true.shape, y_pred.shape) in update_state() and I find that it is only printed once in the first epoch. From the print, it looks like y_true and y_pred have the dimension of
(1 x output_dimension) in this particular example but is it always the case?
So, additionally
(3) I would like to know why it is printed only once and only in the first epoch.
(4) I can't print the value of y_true or y_pred. How can I?
Epoch 1/3
(None, 1) (None, 10)
(None, 1) (None, 10)
782/782 [==============================] - 3s 4ms/step - loss: 0.5666 - categorical_true_positives: 22080.8940
Epoch 2/3
782/782 [==============================] - 3s 4ms/step - loss: 0.1680 - categorical_true_positives: 23877.1162
Epoch 3/3
782/782 [==============================] - 3s 4ms/step - loss: 0.1190 - categorical_true_positives: 24198.2733
<tensorflow.python.keras.callbacks.History at 0x1fb132cde80>
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocess the data (these are NumPy arrays)
x_train = x_train.reshape(60000, 784).astype("float32") / 255
x_test = x_test.reshape(10000, 784).astype("float32") / 255
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")
# Reserve 10,000 samples for validation
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, activation="softmax", name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
class CategoricalTruePositives(keras.metrics.Metric):
def __init__(self, name="categorical_true_positives", **kwargs):
super(CategoricalTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name="ctp", initializer="zeros")
def update_state(self, y_true, y_pred, sample_weight=None):
print(y_true.shape, y_pred.shape) # For experiment
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, "int32") == tf.cast(y_pred, "int32")
values = tf.cast(values, "float32")
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, "float32")
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.0)
model.compile(
optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=[CategoricalTruePositives()],
)
model.fit(x_train, y_train, batch_size=64, epochs=3)
(1) how often the call() method of tf.keras.losses.Loss and the update_state() method of tf.keras.metrics.Metric gets called during a training:
The call method of tf.keras.losses.Loss and the update_state() are used at the end of each batch.
(2) the dimension of y_true and y_pred passed to those methods:
The dimensions of y_true is same as what you pass in y_train. The only change is, the first dimension of y_train will be no_of samples and in the case of y_true it will be batch_size. In your case it is (64, 1) where 64 is batch_size.
The dimensions of y_pred is the shape of output of the model. In your case it is (64, 10) because you have 10 dense units in final layer.
(3) I would like to know why it is printed only once and only in the first epoch.
The print statement is executed only once because tensorflow is executed in graph mode. Print will only work in eager mode. Add run_eagerly = True in model.compile step if you want to execute tensorflow code in eager mode.
(4) I can't print the value of y_true or y_pred. How can I?
Run the code in eager mode.
Code:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocess the data (these are NumPy arrays)
x_train = x_train.reshape(60000, 784).astype("float32") / 255
x_test = x_test.reshape(10000, 784).astype("float32") / 255
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")
# Reserve 10,000 samples for validation
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, activation="softmax", name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
class CategoricalTruePositives(keras.metrics.Metric):
def __init__(self, name="categorical_true_positives", **kwargs):
super(CategoricalTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name="ctp", initializer="zeros")
def update_state(self, y_true, y_pred, sample_weight=None):
print('update_state', y_true.shape, y_pred.shape) # For experiment
y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
values = tf.cast(y_true, "int32") == tf.cast(y_pred, "int32")
values = tf.cast(values, "float32")
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, "float32")
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
# The state of the metric will be reset at the start of each epoch.
self.true_positives.assign(0.0)
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs=None):
print("Start epoch {} of training".format(epoch))
def on_train_batch_begin(self, batch, logs=None):
keys = list(logs.keys())
print("...Training: start of batch {}".format(batch))
def on_train_batch_end(self, batch, logs=None):
print("...Training: end of batch {}".format(batch))
model.compile(
optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=[CategoricalTruePositives()],
run_eagerly = True,
)
model.fit(x_train, y_train, batch_size=64, epochs=3, verbose = 0, callbacks=[CustomCallback()])
Output:
Start epoch 0 of training
...Training: start of batch 0
update_state (64, 1) (64, 10)
...Training: end of batch 0
...Training: start of batch 1
update_state (64, 1) (64, 10)
...Training: end of batch 1
...Training: start of batch 2
update_state (64, 1) (64, 10)
...Training: end of batch 2
...Training: start of batch 3
update_state (64, 1) (64, 10)
...Training: end of batch 3
...Training: start of batch 4
update_state (64, 1) (64, 10)
...Training: end of batch 4
...Training: start of batch 5
update_state (64, 1) (64, 10)
...Training: end of batch 5
The above example will make the answer to your clear.
I am trying to make a simple autoencoder model for Image reconstruction along with MSNIT dataset.
Now if I run this model, it presents me with accuracy.
'60000/60000 [==============================] - 5s 83us/sample - loss: 0.0373 - accuracy: 0.2034 - val_loss: 0.0368 - val_accuracy: 0.217
'.
but I am not sure how its calculated given that the prediction results itself are an image.
I dug deep till function "sparse_categorical_accuracy" but was not able to reach any conclusion about the formula for accuracy.
latent_dim = 64
class Autoencoder(Model):
def __init__(self, latent_dim):
super(Autoencoder, self).__init__()
self.latent_dim = latent_dim
self.encoder = tf.keras.Sequential([
layers.Flatten(),
layers.Dense(latent_dim, activation='relu'),
])
self.decoder = tf.keras.Sequential([
layers.Dense(784, activation='sigmoid'),
layers.Reshape((28, 28))
])
def call(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
autoencoder = Autoencoder(latent_dim)
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError(),
metrics=['accuracy'])
autoencoder.fit(x_train, x_train,
epochs=10,
shuffle=True,
validation_data=(x_test, x_test))
Here is my code:
dataset: mnist;
model: resnet
based on tensorflow 1.15.0
During resnet model training, I noticed this behavior in my experiment. The training loss decreses, and training accuracy is incresing, round 0.99. But during testing, the test data's accuracy is 0.06, which is so different from training accuracy.
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
np.set_printoptions(threshold=np.inf)
np.random.seed(2020)
tf.set_random_seed(2020)
class ModelVGG11(object):
def __init__(self, data_train, data_test, session):
self.data_train = data_train
self.data_test = data_test
self.sess = session
self.num_epochs = 10
self.num_class = 10
self.batch_size = 256
self.learning_rate = 0.01
self.__def_placeholders()
self.__initial_dataset()
self.__def_model()
self.__def_loss()
self.__def_optimizer()
self.__def_metrics()
def __def_placeholders(self):
self.inputs = tf.placeholder(tf.float32, shape=(None, 224, 224, 1), name='model_input')
self.label = tf.placeholder(tf.int64, shape=(None, ), name='model_label')
self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
self.handle = tf.placeholder(tf.string, shape=())
self.global_step = tf.Variable(0, trainable=False)
def __initial_dataset(self):
def __image_resize(data):
image = tf.cast(data['image'], tf.float32)
# image = image / 255
image = tf.image.resize_image_with_pad(image, 224, 224)
return image, data['label']
self.data_train = self.data_train.map(__image_resize).shuffle(buffer_size=1000).batch(self.batch_size).prefetch(1)
self.data_test = self.data_test.map(__image_resize).shuffle(buffer_size=1000).batch(self.batch_size).prefetch(1)
self.iterator_train = self.data_train.make_initializable_iterator()
self.iterator_test = self.data_test.make_initializable_iterator()
self.handle_train = self.iterator_train.string_handle()
self.handle_test = self.iterator_test.string_handle()
iterator = tf.data.Iterator.from_string_handle(self.handle, self.data_train.output_types, self.data_train.output_shapes)
self.next_batch = iterator.get_next()
def __res_block(self, inputs, out_channels, conv1x1=False, strides=1, name=None):
with tf.variable_scope(name):
outputs = tf.layers.Conv2D(out_channels, kernel_size=3, strides=strides, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(inputs)
outputs = tf.layers.BatchNormalization()(outputs, training=self.is_training)
outputs = tf.nn.relu(outputs)
outputs = tf.layers.Conv2D(out_channels, kernel_size=3, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(outputs)
outputs = tf.layers.BatchNormalization()(outputs, training=self.is_training)
# change input channels equal to out_channels
if conv1x1:
inputs = tf.layers.Conv2D(out_channels, kernel_size=1, strides=strides)(inputs)
return tf.nn.relu(outputs + inputs)
def __def_model(self):
with tf.variable_scope('resnet'):
# [batch, 224, 224, 1]
inputs = tf.layers.Conv2D(filters=64, kernel_size=7, strides=2, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(self.inputs) # [batch, 112, 112, 64]
inputs = tf.layers.BatchNormalization()(inputs, training=self.is_training)
inputs = tf.nn.relu(inputs)
inputs = tf.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(inputs) # [batch, 56, 56, 64]
inputs = self.__res_block(inputs, 64, False, 1, 'res_bolock_1') # [batch, 56, 56, 64]
inputs = self.__res_block(inputs, 128, True, 2, 'res_bolock_2') # [batch, 28, 28, 128]
inputs = self.__res_block(inputs, 256, True, 2, 'res_bolock_3') # [batch, 14, 14, 256]
inputs = self.__res_block(inputs, 512, True, 2, 'res_bolock_4') # [batch, 7, 7, 512]
inputs = tf.reduce_mean(inputs, axis=[1, 2])
outputs = tf.layers.Dense(units=10, activation=None)(inputs)
self.outputs = outputs
def __def_loss(self):
y_true = tf.cast(tf.one_hot(self.label, self.num_class), tf.float32)
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=self.outputs)
self.loss = tf.reduce_mean(loss)
tf.summary.scalar('loss', self.loss)
def __def_optimizer(self):
self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)
def __def_metrics(self):
y_pred_label = tf.arg_max(tf.nn.softmax(self.outputs), 1)
# self.acc = tf.metrics.accuracy(labels=self.label, predictions=y_pred_label, name='acc')
self.acc = tf.reduce_mean(tf.cast(tf.equal(y_pred_label, self.label), tf.float32))
tf.summary.scalar('acc', self.acc)
def train_and_evaluate(self):
# merge_summary = tf.summary.merge_all()
# summary_writer = tf.summary.FileWriter(self.summary_file, self.sess.graph)
# summary_writer.add_summary(train_summary, step)
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf.local_variables_initializer())
handle_train, handle_test = self.sess.run([self.handle_train, self.handle_test])
for i in range(self.num_epochs):
epoch_train_metrics = []
try:
self.sess.run(self.iterator_train.initializer)
while True:
batch_x, batch_y = self.sess.run(self.next_batch, feed_dict={self.handle: handle_train})
loss, acc, _ = self.sess.run([self.loss, self.acc, self.optimizer], feed_dict={self.inputs: batch_x, self.label: batch_y, self.is_training: True})
epoch_train_metrics.append((loss, acc))
except tf.errors.OutOfRangeError:
try:
self.sess.run(self.iterator_test.initializer)
while True:
batch_x_test, batch_y_test = self.sess.run(self.next_batch, feed_dict={self.handle: handle_test})
acc = self.sess.run(self.acc, feed_dict={self.inputs: batch_x_test, self.label: batch_y_test, self.is_training: False})
except tf.errors.OutOfRangeError:
print('epoch {} train minibatch loss and acc: {}, test minibatch acc: {}'.format(i + 1, np.mean(epoch_train_metrics, axis=0), acc))
if __name__ == "__main__":
mnist_data = tfds.load('mnist')
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']
with tf.Session() as sess:
model = ModelVGG11(mnist_train, mnist_test, sess)
model.train_and_evaluate()
And here is the result:
epoch 1 train minibatch loss and acc: [0.45032835 0.8764905 ], test minibatch acc: 0.0
epoch 2 train minibatch loss and acc: [0.06525008 0.9811669 ], test minibatch acc: 0.0
epoch 3 train minibatch loss and acc: [0.04049642 0.9874501 ], test minibatch acc: 0.125
epoch 4 train minibatch loss and acc: [0.02956291 0.99075246], test minibatch acc: 0.0
epoch 5 train minibatch loss and acc: [0.02403079 0.99252546], test minibatch acc: 0.0625
epoch 6 train minibatch loss and acc: [0.02128655 0.9933344 ], test minibatch acc: 0.0625
epoch 7 train minibatch loss and acc: [0.01614667 0.9947141 ], test minibatch acc: 0.0625
epoch 8 train minibatch loss and acc: [0.01534516 0.99461436], test minibatch acc: 0.0625
epoch 9 train minibatch loss and acc: [0.01119067 0.9964262 ], test minibatch acc: 0.125
epoch 10 train minibatch loss and acc: [0.0108306 0.9965314], test minibatch acc: 0.0625
I just want to know why this happen? Is there any error in my code?
I am trying to freeze some of the weights of a layer by setting them to a specific value in Keras. How can I achieve this without moving weights to CPU ?
I checked similar questions such as modify layer weights in keras and modify layer parameters in keras
Answers suggest usage of get_weights() and 'set_weights()', however those functions moves weights between CPU and GPU.
I created a custom lambda layer and modified model.trainable_weights inside of that layer, however weights are not updated.
I used tf advanced tutorial, and just added a custom lambda layer that multiplies weights with zero.
Colab notebook with same code
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Lambda
from tensorflow.keras import Model
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
def antirectifier(x):
for i,w in enumerate(model.trainable_weights):
model.trainable_weights[i] = tf.multiply(w,0)
return x
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10, activation='softmax')
self.mask = Lambda(antirectifier)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.mask(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
Since weights are zero, accuracy should be low. However weights are not changed.