when we should use tf.function decorator - tensorflow

I'm trying to boost the performance of a simple 2NN. Here is the code:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.datasets import mnist
from tensorflow import keras
import tensorflow as tf
# load Mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data(path='mnist.npz')
X_train = X_train.reshape(60000, 784).astype('float32') / 255
X_test = X_test.reshape(10000, 784).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# configure the model
model = Sequential()
model.add(Dense(200, activation='relu', input_shape=(784,)))
model.add(Dense(200, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.1), metrics=['accuracy'])
# train and evaluate the model
model.fit(X_train, y_train, batch_size=128, epochs=20, verbose=1, validation_data=(X_test, y_test))
model.evaluate(X_test, y_test)
Now, I wounder either there is a case to use #tf.function decorator or not, and if it's needed, how?

Your code only used builtin functions and classes so there is no need to use a #tf.function decorator. #tf.function is basically used to convert a normal function into a TensorFlow Graph as mentioned here. Since you are only using the builtin modules and functions, they are already treated as a graph by the TF compiler.

Related

how to replace keras embedding with pre-trained word embedding to CNN

I am currently studying how CNNs can be used in text classification and found some code on stack overflow that had worked with the use of a keras embedding layer.
I ran the code with the keras embedding but now want to test out what would happen with a pre-trained embedding, I have downloaded the word2vec api from gensim but dont know how to adapt the code from there?
My question is how can I replace the keras embedding layer with a pre-trained embedding like the word2vec model or Glove?
heres is the code
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Convolution1D, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import TensorBoard
# Using keras to load the dataset with the top_words
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# Pad the sequence to the same length
max_review_length = 1600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# Using embedding from Keras
embedding_vecor_length = 300
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
# Log to tensorboard
tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, callbacks=[tensorBoardCallback], batch_size=64)
# Evaluation on the test set
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
This reads the text file containing the weights, stores the words and their weights in a dictionary, then maps them into a new matrix using the vocabulary of your fit tokenizer.
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Convolution1D, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import TensorBoard
from tensorflow import keras
import itertools
import numpy as np
# Using keras to load the dataset with the top_words
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
word_index = keras.datasets.imdb.get_word_index()
embedding_vecor_length = 300 # same as the embeds to be loaded below
embeddings_dictionary = dict()
glove_file = open('./embeds/glove.6B.300d.txt', 'rb')
for line in glove_file:
records = line.split() # seperates each line by a white space
word = records[0] # the first element is the word
vector_dimensions = np.asarray(
records[1:], dtype='float32') # the rest are the weights
# storing in dictionary
embeddings_dictionary[word] = vector_dimensions
glove_file.close()
# len_of_vocab = len(word_index)
embeddings_matrix = np.zeros((top_words, embedding_vecor_length))
# mapping to a new matrix, using only the words in your tokenizer's vocabulary
for word, index in word_index.items():
if index>=top_words:
continue
# the weights of the individual words in your vocabulary
embedding_vector = embeddings_dictionary.get(bytes(word, 'utf-8'))
if embedding_vector is not None:
embeddings_matrix[index] = embedding_vector
# Pad the sequence to the same length
max_review_length = 1600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# Using embedding from Keras
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length,
input_length=max_review_length, name="embeddinglayer", weights=[embeddings_matrix], trainable=True))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180, activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
# Log to tensorboard
tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True)
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, callbacks=[
tensorBoardCallback], batch_size=64)
# Evaluation on the test set
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Group convolution in keras

I created a simple neural network for understanding how group convolutions can reduce the number of parameters. But when I use the groups parameter in the second convolution layer, I am getting an unimplemented error. However, when groups parameter is not used, everything works fine. Why when using groups parameter, it throws unimplemented error? Does that mean group convolution is not available in keras api?
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D,Reshape,MaxPooling2D
from tensorflow.keras.utils import to_categorical
import numpy as np
num_classes = 10
a = np.random.randint(low=0,high=255,size=(100,28,28,1))
b = np.random.randint(low=0,high=10,size=(100,7,7))
a = a.astype('float32')
a = a/255
X_train, Y_train = a[:80], b[:80]
X_test, Y_test = a[80:], b[80:]
num_classes=10
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)
# Create the model
model = Sequential()
model.add(Conv2D(8, kernel_size=(3,3),input_shape=(28,28,1),padding='same'))
model.add(Conv2D(8, kernel_size=(3,3),groups=4,input_shape=(28,28,1),padding='same'))
# model.add(Dense(10, input_shape=input_shape, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.add(MaxPooling2D())
model.add(MaxPooling2D())
# model.add(Reshape(target_shape=(10,)))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=10, batch_size=250, verbose=1, validation_split=0.2)
# model.save_weights("model.h5")
# # Test the model after training
# test_results = model.evaluate(X_test, Y_test, verbose=1)
# print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')
Error
UnimplementedError: Fused conv implementation does not support grouped convolutions for now.
[[node sequential_38/conv2d_37/BiasAdd (defined at <ipython-input-42-e7c1c931a421>:50) ]] [Op:__inference_train_function_8596]
Function call stack:
train_function
Here is the colab file for your code. According to the doc
A positive integer specifying the number of groups in which the input is split along the channel axis. Each group is convolved separately with filters / groups filters. The output is the concatenation of all the groups results along the channel axis. Input channels and filters must both be divisible by groups.
In your code, I found no conflict with that. It should work. Otherwise, it may issue with something else.

How to freeze weights in certain layer with Keras?

I am trying to freeze the weights of certain layer in a prediction model with Keras and mnist dataset, but it does not work. The code is like:
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.losses import categorical_crossentropy
import numpy as np
def load_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
return x_train, y_train, x_test, y_test
def run():
x_train, y_train, x_test, y_test = load_data()
model = Sequential([Flatten(input_shape=(28, 28)),
Dense(300, name='dense1', activation='relu'),
Dense(100, name='dense2', activation='relu'),
Dense(10, name='dense3', activation='softmax')])
model.trainable = True
model.compile(optimizer='Adam',
metrics=['accuracy'],
loss=categorical_crossentropy)
print(model.summary())
model.fit(x_train, y_train, epochs=5, verbose=2)
print(model.evaluate(x_test, y_test))
return model
def freeze(model):
x_train, y_train, x_test, y_test = load_data()
name = 'dense1'
weightsAndBias = model.get_layer(name=name).get_weights()
# freeze the weights of this layer
model.get_layer(name=name).trainable = False
# record the weights before retrain
weights_before = weightsAndBias[0]
# retrain
model.fit(x_train, y_train, verbose=2, epochs=1)
weights_after = model.get_layer(name=name).get_weights()[0]
if (weights_before == weights_after).all():
print('the weights did not change!!!')
else:
print('the weights changed!!!!')
if __name__ == '__main__':
model = run()
freeze(model)
The program outputs 'the weights changed!!!!'.
I do not understand why the weights of the layer named 'dense1' changes after setting model.get_layer(name=name).trainable = False.
You can do it by using:
model=Sequential()
layer=Dense(64,init='glorot_uniform',input_shape=(784,))
layer.trainable=False
model.add(layer)
layer2=Dense(784, activation='sigmoid',init='glorot_uniform')
layer2.trainable=True
model.add(layer2)
model.compile(loss='relu', optimizer=sgd,metrics = ['mae'])
You need to compile the graph after setting 'trainable'.
more info here
let me keep my layers freezed upto 5th layer, rest i will keep trainable
Here is more simple & more efficient code
for layer in model.layers[:5]:
layer.trainable=False
for layer in model.layers[5:]:
layer.trainable=True

keras custom loss got wrong output when using Bayesian layer

custom-loss-function of keras got wrong output:
When I use a Bayesian layer (tensorflow_probability.layers.DenseFlipout), and use my custom loss function, I got a wrong output loss. But if I replace Bayesian layer by a traditional tf.keras.layers.Dense layer, the output is correct. Can anybody help me ?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
train, valid, test = mnist_data.read_data_sets('~/code/Python')
num_classes = 10
from tensorflow import keras
import tensorflow_probability as tfp
model = keras.Sequential()
#model.add(keras.layers.Dense(10, activation = 'softmax', input_shape=(784,)))
model.add(tfp.layers.DenseFlipout(10, activation = 'softmax', input_shape=(784,)))
sgd = keras.optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
def my_loss(y_true,y_pred):
return tf.reduce_mean((y_true-y_pred)**2)
model.compile(loss=my_loss, optimizer=sgd, metrics=['accuracy'])
x_train, y_train = train.images, train.labels
x_test, y_test = test.images, test.labels
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model.fit(x_train, y_train,
batch_size=128,
epochs=10,
validation_data=(x_test, y_test),
shuffle=True)

student-teacher model in keras

I'm converting student-teacher model in below url to keras one.
https://github.com/chengshengchan/model_compression/blob/master/teacher-student.py
How can I give input to two model(student, teacher) and get one output from only student in keras?
I'll set teacher's all tensors with trainable=false, and loss function as difference between student and teacher's output like below :
tf_loss = tf.nn.l2_loss(teacher - student)/batch_size
As I know, it is possible to give input to only one model when defining model.fit. But in this cases, I should it to both of teacher and student model.
Thank in advance!
Below is very simple student-teacher model in keras.
I hope it might be helpful to someone like me.
Good job!
import keras
from keras.datasets import mnist
from keras.layers import Input, Embedding, LSTM, Dense, Lambda
from keras.models import Model
import numpy as np
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
from keras.models import Sequential
from keras.layers import Dense, Merge
from keras.optimizers import SGD, Adam, RMSprop
batch_size = 128
nb_classes = 10
nb_epoch = 3
teacher = Sequential()
teacher.add(Dense(10, input_shape=(784,)))
teacher.add(Dense(10))
teacher.add(Activation('softmax'))
teacher.summary()
teacher.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = teacher.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = teacher.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
for i in range(len(teacher.layers)):
setattr(teacher.layers[i], 'trainable', False)
Y_train = np.zeros((60000, 10))
student = Sequential()
student.add(Dense(10, input_dim=784))
student.add(Activation('softmax'))
student.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy'])
from keras.layers import *
def negativeActivation(x):
return -x
negativeRight = Activation(negativeActivation)(student.output)
diff = Add()([teacher.output,negativeRight])
model = Model(inputs=[teacher.input, student.input], outputs=[diff])
model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['acc'])
model.summary(line_length=150)
model.fit([X_train, X_train], [Y_train], batch_size=128, nb_epoch=5)
print student.evaluate(X_test, Y_test)
The only implementation I have seen in Keras involves building 2 separate functions which either widen or deepen weight layers from the teacher model as initial weights for the student model.
I am not sure if it is precisely that Hinton et al. (2015) distillation to be honest, but it is teacher-student.
https://github.com/fchollet/keras/issues/3491