keras custom loss got wrong output when using Bayesian layer - tensorflow

custom-loss-function of keras got wrong output:
When I use a Bayesian layer (tensorflow_probability.layers.DenseFlipout), and use my custom loss function, I got a wrong output loss. But if I replace Bayesian layer by a traditional tf.keras.layers.Dense layer, the output is correct. Can anybody help me ?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
train, valid, test = mnist_data.read_data_sets('~/code/Python')
num_classes = 10
from tensorflow import keras
import tensorflow_probability as tfp
model = keras.Sequential()
#model.add(keras.layers.Dense(10, activation = 'softmax', input_shape=(784,)))
model.add(tfp.layers.DenseFlipout(10, activation = 'softmax', input_shape=(784,)))
sgd = keras.optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
def my_loss(y_true,y_pred):
return tf.reduce_mean((y_true-y_pred)**2)
model.compile(loss=my_loss, optimizer=sgd, metrics=['accuracy'])
x_train, y_train = train.images, train.labels
x_test, y_test = test.images, test.labels
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model.fit(x_train, y_train,
batch_size=128,
epochs=10,
validation_data=(x_test, y_test),
shuffle=True)

Related

Why are the weights of my QAT tf_model are floats and not 8-bit Integers?

I performed a simple Quantization Aware Training with Tensorflow on MNIST as follows:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
# Define the model architecture.
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(28, 28)),
keras.layers.Reshape(target_shape=(28, 28, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3)),
keras.layers.Activation('relu'),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10)
])
# Train the digit classification model
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(
train_images,
train_labels,
epochs=5,
validation_split=0.1,
)
import tensorflow_model_optimization as tfmot
quantize_model = tfmot.quantization.keras.quantize_model
# q_aware stands for for quantization aware.
q_aware_model = quantize_model(model)
# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
train_images_subset = train_images[0:1000] # out of 60000
train_labels_subset = train_labels[0:1000]
q_aware_model.fit(train_images_subset, train_labels_subset,
batch_size=500, epochs=5, validation_split=0.1)
However, when I try to investigate the weights of the quantized model using, for instance, q_aware_model.get_weights()[5], I get an array of type Float-32. I am supposed to get type 8-bit integer; what am I doing wrong?

when we should use tf.function decorator

I'm trying to boost the performance of a simple 2NN. Here is the code:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.datasets import mnist
from tensorflow import keras
import tensorflow as tf
# load Mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data(path='mnist.npz')
X_train = X_train.reshape(60000, 784).astype('float32') / 255
X_test = X_test.reshape(10000, 784).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# configure the model
model = Sequential()
model.add(Dense(200, activation='relu', input_shape=(784,)))
model.add(Dense(200, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy'])
# train and evaluate the model
model.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, y_test))
model.evaluate(X_test, y_test)
Now, I wounder either there is a case to use #tf.function decorator or not, and if it's needed, how?
Your code only used builtin functions and classes so there is no need to use a #tf.function decorator. #tf.function is basically used to convert a normal function into a TensorFlow Graph as mentioned here. Since you are only using the builtin modules and functions, they are already treated as a graph by the TF compiler.

Tensorboard graph messy (readvariableop_resource nodes) using tf.summay.trace_export

The code bellow build two tensorboard graphs for the same model, while using Keras API build nice simple graph, using tf.summary.trace_export() add for each variable define in the graph a node in the external scope with the suffix "readvariableop_resource", which make the graph be really messy as the number of the parameters increase.
(In the example below we have 2 dense layer each one have 2 variable (kernel and bias) total 4 variables (4 nodes))
from datetime import datetime
import tensorflow as tf
from tensorflow import keras
# Define the model.
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dropout(0.2),
keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
(train_images, train_labels), _ = keras.datasets.mnist.load_data()
train_images = train_images / 255.0
# Define the Keras TensorBoard callback.
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
# Train the model.
model.fit(
train_images,
train_labels,
batch_size=64,
epochs=1,
callbacks=[tensorboard_callback])
#tf.function
def traceme(x):
return model(x)
logdir="logs/fit1/" + datetime.now().strftime("%Y%m%d-%H%M%S")
writer = tf.summary.create_file_writer(logdir)
tf.summary.trace_on(graph=True)
# Forward pass
traceme(tf.zeros((1, 28, 28, 1)))
with writer.as_default():
tf.summary.trace_export(name="model_trace", step=0)

How to freeze weights in certain layer with Keras?

I am trying to freeze the weights of certain layer in a prediction model with Keras and mnist dataset, but it does not work. The code is like:
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.losses import categorical_crossentropy
import numpy as np
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
return x_train, y_train, x_test, y_test
def run():
x_train, y_train, x_test, y_test = load_data()
model = Sequential([Flatten(input_shape=(28, 28)),
Dense(300, name='dense1', activation='relu'),
Dense(100, name='dense2', activation='relu'),
Dense(10, name='dense3', activation='softmax')])
model.trainable = True
model.compile(optimizer='Adam',
metrics=['accuracy'],
loss=categorical_crossentropy)
print(model.summary())
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def freeze(model):
x_train, y_train, x_test, y_test = load_data()
name = 'dense1'
weightsAndBias = model.get_layer(name=name).get_weights()
# freeze the weights of this layer
model.get_layer(name=name).trainable = False
# record the weights before retrain
weights_before = weightsAndBias[0]
# retrain
model.fit(x_train, y_train, verbose=2, epochs=1)
weights_after = model.get_layer(name=name).get_weights()[0]
if (weights_before == weights_after).all():
print('the weights did not change!!!')
else:
print('the weights changed!!!!')
if __name__ == '__main__':
model = run()
freeze(model)
The program outputs 'the weights changed!!!!'.
I do not understand why the weights of the layer named 'dense1' changes after setting model.get_layer(name=name).trainable = False.
You can do it by using:
model=Sequential()
layer=Dense(64,init='glorot_uniform',input_shape=(784,))
layer.trainable=False
model.add(layer)
layer2=Dense(784, activation='sigmoid',init='glorot_uniform')
layer2.trainable=True
model.add(layer2)
model.compile(loss='relu', optimizer=sgd,metrics = ['mae'])
You need to compile the graph after setting 'trainable'.
more info here
let me keep my layers freezed upto 5th layer, rest i will keep trainable
Here is more simple & more efficient code
for layer in model.layers[:5]:
layer.trainable=False
for layer in model.layers[5:]:
layer.trainable=True

student-teacher model in keras

I'm converting student-teacher model in below url to keras one.
https://github.com/chengshengchan/model_compression/blob/master/teacher-student.py
How can I give input to two model(student, teacher) and get one output from only student in keras?
I'll set teacher's all tensors with trainable=false, and loss function as difference between student and teacher's output like below :
tf_loss = tf.nn.l2_loss(teacher - student)/batch_size
As I know, it is possible to give input to only one model when defining model.fit. But in this cases, I should it to both of teacher and student model.
Thank in advance!
Below is very simple student-teacher model in keras.
I hope it might be helpful to someone like me.
Good job!
import keras
from keras.datasets import mnist
from keras.layers import Input, Embedding, LSTM, Dense, Lambda
from keras.models import Model
import numpy as np
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
from keras.models import Sequential
from keras.layers import Dense, Merge
from keras.optimizers import SGD, Adam, RMSprop
batch_size = 128
nb_classes = 10
nb_epoch = 3
teacher = Sequential()
teacher.add(Dense(10, input_shape=(784,)))
teacher.add(Dense(10))
teacher.add(Activation('softmax'))
teacher.summary()
teacher.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = teacher.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = teacher.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
for i in range(len(teacher.layers)):
setattr(teacher.layers[i], 'trainable', False)
Y_train = np.zeros((60000, 10))
student = Sequential()
student.add(Dense(10, input_dim=784))
student.add(Activation('softmax'))
student.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy'])
from keras.layers import *
def negativeActivation(x):
return -x
negativeRight = Activation(negativeActivation)(student.output)
diff = Add()([teacher.output,negativeRight])
model = Model(inputs=[teacher.input, student.input], outputs=[diff])
model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['acc'])
model.summary(line_length=150)
model.fit([X_train, X_train], [Y_train], batch_size=128, nb_epoch=5)
print student.evaluate(X_test, Y_test)
The only implementation I have seen in Keras involves building 2 separate functions which either widen or deepen weight layers from the teacher model as initial weights for the student model.
I am not sure if it is precisely that Hinton et al. (2015) distillation to be honest, but it is teacher-student.
https://github.com/fchollet/keras/issues/3491