Tensorflow keras, no gradients error when use MSE - tensorflow

I am trying the code from tensorflow "Writing a training loop from scratch" with some changes by myself. I changed the loss function from SparseCategoricalCrossentropy to MeanSquaredError. I also changed the architecture of the model by adding a new Lambda layer for loss calculation. However, I have the Value error that no gradients provided for variable. Is there any way that I can make the code to run with MSE?
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
inputs = keras.Input(shape=(784,), name="digits")
x1 = layers.Dense(64, activation="relu")(inputs)
x2 = layers.Dense(64, activation="relu")(x1)
outputs = layers.Dense(10, name="predictions")(x2)
final_outputs = layers.Lambda(lambda x: tf.math.argmax(x, axis = -1))(outputs)
model = keras.Model(inputs=inputs, outputs=final_outputs)
# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.MeanSquaredError()
# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))
# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))

argmax ops is not differentiable. To use an integer label and MSE loss, you want to convert your labels y_train and y_val to integers.
y_train = np.argmax(y_train, axis=-1)
y_val = np.argmax(y_val, axis=-1)
And adjust the output layer to output integer labels
outputs = layers.Dense(1, name="predictions")(x2)

Related

How to visualize the network graph in tensorflow 1.15 with Eager mode using tensorboard?

Hi~ I want to visualize the NN in Eager mode in tf1.15 (can not switch to 2.0.0). And the implementation is based on low-level API of Tensorflow 1.15. I want to use the tensorboard to visualize it.
I write a log tracing code but get the error:
WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
Traceback (most recent call last):
File "/home/frank/PycharmProjects/reconstruction_NN/my_test.py", line 78, in <module>
tf.contrib.summary.trace_on(graph=True, profiler=True)
AttributeError: module 'tensorflow.contrib.summary.summary' has no attribute 'trace_on'
Environment information (required)
tensorboard 1.15.0
tensorflow-estimator 1.15.1
tensorflow-gpu 1.15.0
Ubuntu16.04
Issue description
Code:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D,Dropout
from tensorflow.keras import Model
tf.compat.v1.enable_eager_execution()
print(tf.__version__)
print(tf.executing_eagerly())
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.dropout = Dropout(0.5)
self.d2 = Dense(10, activation='softmax')
def call(self, x):
x = self.flatten(x)
x = self.d1(x)
x = self.dropout(x)
return self.d2(x)
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
from datetime import *
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
logdir = 'logs/func/%s' % stamp
writer = tf.contrib.summary.create_file_writer(logdir)
tf.summary.trace_on(graph=True, profiler=True)
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1, train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
with writer.as_default():
tf.summary.trace_export(
name="my_func_trace",
step=0,
profiler_outdir=logdir)
The warning is coming because you are accessing the tf.contrib namespace, which is deprecated. The documentation specifies that you should use the writer object as
writer = tf.summary.create_file_writer(logdir)

Why does it complain 'images' variable not available?

I am running the TF2 tutorial, and copied exactly the code into a .py file and run it in PyCharm, but got this error message:
Testing started at 12:50 AM ...
/home/martin/nlp/my-env/tf/bin/python /home/martin/.local/share/JetBrains/Toolbox/apps/PyCharm-C/ch-0/193.5233.109/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py --path /home/martin/tf2-tutorial/cnn_mnist.py
Launching pytest with arguments /home/martin/tf2-tutorial/cnn_mnist.py in /home/martin/tf2-tutorial
============================= test session starts ==============================
platform linux -- Python 3.7.1, pytest-5.3.1, py-1.8.0, pluggy-0.13.1 -- /home/martin/nlp/my-env/tf/bin/python
cachedir: .pytest_cache
rootdir: /home/martin/tf2-tutorial
collecting ... collected 1 item
cnn_mnist.py::test2_step ERROR [100%]
test setup failed
file /home/martin/tf2-tutorial/cnn_mnist.py, line 60
#tf.function
def test_step(images, labels):
E fixture 'images' not found
> available fixtures: cache, capfd, capfdbinary, caplog, capsys, capsysbinary, doctest_namespace, monkeypatch, pytestconfig, record_property, record_testsuite_property, record_xml_attribute, recwarn, tmp_path, tmp_path_factory, tmpdir, tmpdir_factory
> use 'pytest --fixtures [testpath]' for help on them.
Why does it think it's a pytest program? And why does it issue this error message? The tutorial should work "as is".
The copied code from the tutorial is below (exact copy):
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
train_ds = tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
Could this be due to the PyCharm environment issue? But it has been working all fine?

Inconsistent results when running the same neural network in TensorFlow vs Keras

I created two identical neural networks for the MNIST dataset, one using TensorFlow and one using Keras. At 10 epochs, Keras achieves over 96% performance, while TensorFlow achieves about 70%.
I have tested the codes below on other datasets as well, and TensorFlow achieved much lower performance on all of them in a direct parameter comparison.
Keras Code:
import warnings
warnings.filterwarnings('ignore')
import keras
from keras.datasets import mnist
# Loading MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Converting the y-value column to an array of classes (one hot enconding)
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
# Changing the shape of input images and normalizing
x_train = x_train.reshape((60000, 784))
x_train = x_train.astype('float32') / 255
x_test = x_test.reshape((10000, 784))
x_test = x_test.astype('float32') / 255
# Making the neural network
from keras.models import Sequential
from keras.layers import Dense, Activation
model = Sequential()
model.add(Dense(30, input_dim=784, kernel_initializer='normal', activation='relu'))
model.add(Dense(30, kernel_initializer='normal', activation='relu'))
model.add(Dense(10, kernel_initializer='normal', activation='softmax'))
from keras.optimizers import Adam
optimizer = Adam()
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
# Training and showing the results
model.fit(x_train, y_train, epochs=10, batch_size=200, validation_data=(x_test, y_test), verbose=1)
TensorFlow Code:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
#Loading MNIST
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# Epochs parameters
epochs = 10
batch_size = 200
# Neural network parameters
n_input = 784
n_hidden_1 = 30
n_hidden_2 = 30
n_classes = 10
# Placeholders x, y
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Creating the first layer
w1 = tf.Variable(tf.random_normal([n_input, n_hidden_1]))
b1 = tf.Variable(tf.random_normal([n_hidden_1]))
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,w1),b1))
# Creating the second layer
w2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]))
b2 = tf.Variable(tf.random_normal([n_hidden_2]))
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,w2),b2))
# Creating the output layer
w_out = tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
bias_out = tf.Variable(tf.random_normal([n_classes]))
output = tf.add(tf.matmul(layer_2, w_out), bias_out)
# Loss function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = y))
# Optimizer
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Making predictions
predictions = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Opening the session
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(epochs):
avg_cost = 0.0
total_batches = int(mnist.train.num_examples / batch_size)
# Loop through all batch iterations
for i in range(total_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Fit training
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Computing the average cost of a complete epoch
avg_cost += sess.run(cost, feed_dict={x: batch_x, y: batch_y}) / total_batches
# Running accuracy (with test data) on each epoch
accuracy_test = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
# Showing results after each epoch
print ("Epoch: ", "{},".format((epoch + 1)), "Average cost = ", "{:.3f}".format(avg_cost))
print ("Accuracy Test = ", "{:.3f}".format(accuracy_test))
print ("Training completed!")
print ("Model Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
Could anyone help me understand what may be causing this divergence?
Normalization drastically improves a model's accuracy.
In the keras code you have performed normalization on data, however I can't find any normalization done in the tensorflow code or perhaps the mnist data in tensorflow is already normalized at source?
Please verify that mnist data in tensorflow code is also normalized

How to modify weights of Keras layers?

I am trying to freeze some of the weights of a layer by setting them to a specific value in Keras. How can I achieve this without moving weights to CPU ?
I checked similar questions such as modify layer weights in keras and modify layer parameters in keras
Answers suggest usage of get_weights() and 'set_weights()', however those functions moves weights between CPU and GPU.
I created a custom lambda layer and modified model.trainable_weights inside of that layer, however weights are not updated.
I used tf advanced tutorial, and just added a custom lambda layer that multiplies weights with zero.
Colab notebook with same code
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Lambda
from tensorflow.keras import Model
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
def antirectifier(x):
for i,w in enumerate(model.trainable_weights):
model.trainable_weights[i] = tf.multiply(w,0)
return x
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10, activation='softmax')
self.mask = Lambda(antirectifier)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.mask(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
Since weights are zero, accuracy should be low. However weights are not changed.

Keras model failed to learn anything after changing to use tf.data api

I was trying to convert a simple Keras model to use tf.data api for data loading, but somehow the accuracy remains about 10% during the whole 10 epochs.
In comparison, the original code without using tf.data api can easily achieve about 98% accuracy. Did I do anything wrong?
The version using tf.data api
import math
import tensorflow as tf
import numpy as np
batch_size = 32
def load_data():
mnist = tf.keras.datasets.mnist
(train_data, train_label), (validation_data, validation_label) = mnist.load_data()
train_data, validation_data = train_data / 255.0, validation_data / 255.0
train_label = train_label.astype(np.float32)
return train_data, train_label
def build_model():
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__(name='my_model')
self.flatten = tf.keras.layers.Flatten()
self.dense_1 = tf.keras.layers.Dense(512, activation=tf.nn.relu)
self.dropout = tf.keras.layers.Dropout(0.2)
self.dense_2 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)
def call(self, inputs):
x = self.flatten(inputs)
x = self.dense_1(x)
x = self.dropout(x)
y = self.dense_2(x)
return y
model = MyModel()
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
train_data, train_label = load_data()
train_sample_count = len(train_data)
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_label))
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.repeat()
model = build_model()
model.fit(
train_dataset,
epochs=10,
steps_per_epoch=math.ceil(train_sample_count/batch_size)
)
The version without using tf.data api
# load_data and build_model are exactly same as those in the tf.data api version
train_data, train_label = load_data()
model = build_model()
model.fit(
train_data,
train_label,
epochs=10
)