I am trying to freeze the weights of certain layer in a prediction model with Keras and mnist dataset, but it does not work. The code is like:
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.losses import categorical_crossentropy
import numpy as np
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
return x_train, y_train, x_test, y_test
def run():
x_train, y_train, x_test, y_test = load_data()
model = Sequential([Flatten(input_shape=(28, 28)),
Dense(300, name='dense1', activation='relu'),
Dense(100, name='dense2', activation='relu'),
Dense(10, name='dense3', activation='softmax')])
model.trainable = True
model.compile(optimizer='Adam',
metrics=['accuracy'],
loss=categorical_crossentropy)
print(model.summary())
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def freeze(model):
x_train, y_train, x_test, y_test = load_data()
name = 'dense1'
weightsAndBias = model.get_layer(name=name).get_weights()
# freeze the weights of this layer
model.get_layer(name=name).trainable = False
# record the weights before retrain
weights_before = weightsAndBias[0]
# retrain
model.fit(x_train, y_train, verbose=2, epochs=1)
weights_after = model.get_layer(name=name).get_weights()[0]
if (weights_before == weights_after).all():
print('the weights did not change!!!')
else:
print('the weights changed!!!!')
if __name__ == '__main__':
model = run()
freeze(model)
The program outputs 'the weights changed!!!!'.
I do not understand why the weights of the layer named 'dense1' changes after setting model.get_layer(name=name).trainable = False.
You can do it by using:
model=Sequential()
layer=Dense(64,init='glorot_uniform',input_shape=(784,))
layer.trainable=False
model.add(layer)
layer2=Dense(784, activation='sigmoid',init='glorot_uniform')
layer2.trainable=True
model.add(layer2)
model.compile(loss='relu', optimizer=sgd,metrics = ['mae'])
You need to compile the graph after setting 'trainable'.
more info here
let me keep my layers freezed upto 5th layer, rest i will keep trainable
Here is more simple & more efficient code
for layer in model.layers[:5]:
layer.trainable=False
for layer in model.layers[5:]:
layer.trainable=True
Related
I have so far been unable to resolve this bug. I am running my code and getting floating point exception with no stack trace. I tried to debug and step through my code and it dies after going through the call function twice. Here is the full code:
import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))
# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset
def Secure_Voice_Channel(func):
"""Define Secure_Voice_Channel decorator."""
def execute_func(*args, **kwargs):
print('Established Secure Connection.')
returned_value = func(*args, **kwargs)
print("Ended Secure Connection.")
return returned_value
return execute_func
#Secure_Voice_Channel
class generic_vns_function(tf.keras.Model):
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same", input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
def train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test):
"""Generic Deep Learning Model training function."""
cb = [callbacks.EarlyStopping(monitor='val_loss', patience=3)]
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,
batch_size=batch_size, verbose=1, callbacks=cb)
scores = model.evaluate(X_test, y_test, verbose=2)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
return model
def choose_dataset(dataset_type):
"""Select dataset based on string variable."""
if dataset_type == "nlp":
return get_imdb_dataset(dir=DB_DIR)
elif dataset_type == "computer_vision":
(X_train, y_train), (X_test, y_test) = mnist.load_data()
elif dataset_type == "speech_recognition":
# (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
(X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)
else:
raise ValueError("Couldn't find dataset.")
(X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)
(X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)
return (X_train, y_train), (X_test, y_test)
def normalize_dataset(string, X_train, X_test):
"""Normalize speech recognition and computer vision datasets."""
if string == "computer vision":
X_train = X_train / 255
X_test = X_test / 255
else:
mean = np.mean(X_train)
std = np.std(X_train)
X_train = (X_train-std)/mean
X_test = (X_test-std)/mean
return (X_train, X_test)
def reshape_dataset(X_train, y_train, X_test, y_test):
"""Reshape Computer Vision and Speech datasets."""
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
return (X_train, y_train), (X_test, y_test)
def create_LSTM(cnn_model, input_shape, num_classes):
input_layer = layers.Input(shape=input_shape)
distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer)
x, state_a, state_b = layers.LSTM(distributed_cnn)
output = layers.Dense(num_classes, activation="softmax")(x)
model = models.Model(inputs=input_layer, outputs=output)
opt = Adam()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def main():
# Hyperparameters
layers = [64, 32]
layer_units = 1000
epochs = 10
batch_size = 200
lr = 0.001
filter = 64
kernal_size = 3
window = 3
dataset = "speech_recognition"
# Import Datasets
(X_train, y_train), (X_test, y_test) = choose_dataset(dataset)
num_classes = y_train.shape[1]
# Reshape both the train and test dataset
X_train = X_train.reshape(X_train.shape[0], window, int(X_train.shape[1]/window), X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], window, int(X_test.shape[1]/window), X_test.shape[2], 1)
# Generate CNN model
cnn_model = generic_vns_function(X_train.shape[1:], layers, layer_units)
print("Created generic CNN model")
# Create the LSTM CNN with time distributed layer
model = create_LSTM(cnn_model, X_train.shape[1:], num_classes)
print("Created LST model")
trained_model = train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test)
save_format="tf"
# # Save model to h5 file
trained_model.save(save_format, 'models/model_%s_a3.h5' % dataset)
return None
The error occurs after looping through the call function twice while creating the distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer). Any ideas as to why? This is baffling to me.
To run the code, you also need to create the database via this file: https://gist.github.com/logankilpatrick/e621cf31f620524591a24cd9d4cf30f3
I'm trying to boost the performance of a simple 2NN. Here is the code:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.datasets import mnist
from tensorflow import keras
import tensorflow as tf
# load Mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data(path='mnist.npz')
X_train = X_train.reshape(60000, 784).astype('float32') / 255
X_test = X_test.reshape(10000, 784).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# configure the model
model = Sequential()
model.add(Dense(200, activation='relu', input_shape=(784,)))
model.add(Dense(200, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy'])
# train and evaluate the model
model.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, y_test))
model.evaluate(X_test, y_test)
Now, I wounder either there is a case to use #tf.function decorator or not, and if it's needed, how?
Your code only used builtin functions and classes so there is no need to use a #tf.function decorator. #tf.function is basically used to convert a normal function into a TensorFlow Graph as mentioned here. Since you are only using the builtin modules and functions, they are already treated as a graph by the TF compiler.
custom-loss-function of keras got wrong output:
When I use a Bayesian layer (tensorflow_probability.layers.DenseFlipout), and use my custom loss function, I got a wrong output loss. But if I replace Bayesian layer by a traditional tf.keras.layers.Dense layer, the output is correct. Can anybody help me ?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
train, valid, test = mnist_data.read_data_sets('~/code/Python')
num_classes = 10
from tensorflow import keras
import tensorflow_probability as tfp
model = keras.Sequential()
#model.add(keras.layers.Dense(10, activation = 'softmax', input_shape=(784,)))
model.add(tfp.layers.DenseFlipout(10, activation = 'softmax', input_shape=(784,)))
sgd = keras.optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
def my_loss(y_true,y_pred):
return tf.reduce_mean((y_true-y_pred)**2)
model.compile(loss=my_loss, optimizer=sgd, metrics=['accuracy'])
x_train, y_train = train.images, train.labels
x_test, y_test = test.images, test.labels
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model.fit(x_train, y_train,
batch_size=128,
epochs=10,
validation_data=(x_test, y_test),
shuffle=True)
I changed the getting started example of Tensorflow as following:
import tensorflow as tf
from sklearn.metrics import roc_auc_score
import numpy as np
import commons as cm
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation=tf.nn.tanh),
# tf.keras.layers.Dense(512, activation=tf.nn.tanh),
# tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.tanh)
])
model.compile(optimizer='adam',
loss='mean_squared_error',
# loss = 'sparse_categorical_crossentropy',
metrics=['accuracy'])
history = cm.Histories()
h= model.fit(x_train, y_train, epochs=50, callbacks=[history])
print("history:", history.losses)
cm.plot_history(h)
# cm.plot(history.losses, history.aucs)
test_predictions = model.predict(x_test)
# Compute confusion matrix
pred = np.argmax(test_predictions,axis=1)
pred2 = model.predict_classes(x_test)
confusion = confusion_matrix(y_test, pred)
cm.draw_confusion(confusion,range(10))
With its default parameters:
relu activation at hidden layers,
softmax at the output layer and
sparse_categorical_crossentropy as loss function,
it works fine and the prediction for all digits are above 99%
However with my parameters: tanh activation function and mean_squared_error loss function it just predict 0 for all test samples:
I wonder what is the problem? The accuracy rate is increasing for each epoch and it reaches 99% and loss is about 20
You need to use the proper loss function for your data. Here you have a categorical output, so you need to use sparse_categorical_crossentropy, but also set from_logits without any activation for the last layer.
If you need to use tanh as your output, then you can use MSE with a one-hot encoded version of your labels + rescaling.
I'm converting student-teacher model in below url to keras one.
https://github.com/chengshengchan/model_compression/blob/master/teacher-student.py
How can I give input to two model(student, teacher) and get one output from only student in keras?
I'll set teacher's all tensors with trainable=false, and loss function as difference between student and teacher's output like below :
tf_loss = tf.nn.l2_loss(teacher - student)/batch_size
As I know, it is possible to give input to only one model when defining model.fit. But in this cases, I should it to both of teacher and student model.
Thank in advance!
Below is very simple student-teacher model in keras.
I hope it might be helpful to someone like me.
Good job!
import keras
from keras.datasets import mnist
from keras.layers import Input, Embedding, LSTM, Dense, Lambda
from keras.models import Model
import numpy as np
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
from keras.models import Sequential
from keras.layers import Dense, Merge
from keras.optimizers import SGD, Adam, RMSprop
batch_size = 128
nb_classes = 10
nb_epoch = 3
teacher = Sequential()
teacher.add(Dense(10, input_shape=(784,)))
teacher.add(Dense(10))
teacher.add(Activation('softmax'))
teacher.summary()
teacher.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = teacher.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = teacher.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
for i in range(len(teacher.layers)):
setattr(teacher.layers[i], 'trainable', False)
Y_train = np.zeros((60000, 10))
student = Sequential()
student.add(Dense(10, input_dim=784))
student.add(Activation('softmax'))
student.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy'])
from keras.layers import *
def negativeActivation(x):
return -x
negativeRight = Activation(negativeActivation)(student.output)
diff = Add()([teacher.output,negativeRight])
model = Model(inputs=[teacher.input, student.input], outputs=[diff])
model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['acc'])
model.summary(line_length=150)
model.fit([X_train, X_train], [Y_train], batch_size=128, nb_epoch=5)
print student.evaluate(X_test, Y_test)
The only implementation I have seen in Keras involves building 2 separate functions which either widen or deepen weight layers from the teacher model as initial weights for the student model.
I am not sure if it is precisely that Hinton et al. (2015) distillation to be honest, but it is teacher-student.
https://github.com/fchollet/keras/issues/3491