I have implemented my own version of MobileNet in TensorFlow and would like to verify it by comparing it against the official tensorflow_hub version.
I can get something working easily as so:
URL = "https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/feature_vector/4"
model = tf.keras.Sequential([
hub.KerasLayer(URL, True, input_shape=(IMG_SIZE, IMG_SIZE, 3)),
Layers.Dropout(0.2),
Layers.Dense(len(class_names))
])
But this model is already trained.
I've tried calling initializers, but tensorflow_hub.KerasLayers don't have them.
I've also tried resetting all of the weights with glorot_uniform() and RandomUniform(), but then the model does not learn at all (and neither does mine, when I do the same randomization of weights).
Can you reinitialize a pre-trained model?
A lot of searching only provided methods for resetting layers that you already have, or restoring them back to what they were when you started with the model.
This is probably far from complete, but maybe someone can build off it!
import tensorflow as tf
import tensorflow.keras.backend as K
def untrain_layer(layer):
initial_weights = layer.weights
new_weights = []
for w in initial_weights:
print(w.name)
if "beta" in w.name:
new_weights.append(K.eval(tf.keras.initializers.zeros())(w.shape))
elif "gamma" in w.name:
new_weights.append(K.eval(tf.keras.initializers.ones())(w.shape))
elif "moving_mean" in w.name:
new_weights.append(K.eval(tf.keras.initializers.zeros())(w.shape))
elif "moving_variance" in w.name:
new_weights.append(K.eval(tf.keras.initializers.ones())(w.shape))
else:
new_weights.append(K.eval(tf.keras.initializers.glorot_uniform())(w.shape))
layer.set_weights(new_weights)
Use:
import tensorflow_hub as hub
feature_extractor = hub.KerasLayer(URL, True, input_shape=(IMG_SIZE, IMG_SIZE, 3))
untrain_keraslayer(feature_extractor)
Related
I would like to clear the memory / network after every time I am done with the training. I used the alternatives proposed online, but it seems like they are not working if I am correctly interpreting my results. I use tf.compat.v1.reset_default_graph() and tf.keras.backend.clear_session() since they are mostly recommended online.
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import backend as K
upper_limit = 2
lower_limit = -2
training_input= np.random.random ([100,5])*(upper_limit - lower_limit) + lower_limit
training_output = np.random.random ([100,1]) *10*(upper_limit - lower_limit) + lower_limit
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(5,)),
tf.keras.layers.Dense(12, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(loss="mse",optimizer = tf.keras.optimizers.Adam(learning_rate=0.01))
for layer in model.layers:
print("layer weights before fitting: ",layer.get_weights(),"\n") # weights
model.fit(training_input, training_output, epochs=5, batch_size=100,verbose=0)
for layer in model.layers:
print("layer weights after fitting: ",layer.get_weights(),"\n") # weights
print("\n")
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
print("after clear","\n")
for layer in model.layers:
print(layer.get_weights(),"\n") # weights
When I print the layer weights after attempting to clear the network, I get the same weight values as before cleaning the session.
I think what are you looking is reset the weights of you model, and that is not really related to the session or the graph (with some exceptions).
The reset of the weights is currently a debated topic you can find how to do it in most of the cases here but as you can see, today nobody is planning to implement this function
for easy access I post the current proposition below
def reset_weights(model):
for layer in model.layers:
if isinstance(layer, tf.keras.Model): #if you're using a model as a layer
reset_weights(layer) #apply function recursively
continue
#where are the initializers?
if hasattr(layer, 'cell'):
init_container = layer.cell
else:
init_container = layer
for key, initializer in init_container.__dict__.items():
if "initializer" not in key: #is this item an initializer?
continue #if no, skip it
# find the corresponding variable, like the kernel or the bias
if key == 'recurrent_initializer': #special case check
var = getattr(init_container, 'recurrent_kernel')
else:
var = getattr(init_container, key.replace("_initializer", ""))
var.assign(initializer(var.shape, var.dtype))
remember that if you are not defining a seed, the weigths will be differents each time you call reset
I have the following code, I am retrieving error when I try to add my own classifier.
import keras
from keras import layers,Model
from keras.layers import Input,GlobalAveragePooling2D,Flatten,Dense
MobileNetV2_model= tf.keras.applications.MobileNetV2(input_shape=None, alpha=1.0, include_top=False,
weights='imagenet')
#MobileNetV2_model.summary()
x= MobileNetV2_model.output
x = layers.GlobalAveragePooling2D()(x)
final_output=layers.Dense(2, activation='sigmoid')(x)
model = keras.Model(inputs=MobileNetV2.input, outputs = final_output)
model.compile(optimizer="adam", loss='BinaryCrossentropy', metrics=['accuracy'],loss_weights=0.1)
Error
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that
you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying
to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
You should never mix keras and tf.keras. You can refer working code as shown below
import tensorflow as tf
from tensorflow.keras import layers, Model
MobileNetV2_model= tf.keras.applications.MobileNetV2(input_shape=(224,224,3), alpha=1.0, include_top=False, weights='imagenet')
#MobileNetV2_model.summary()
x= MobileNetV2_model.output
x = layers.GlobalAveragePooling2D()(x)
final_output=layers.Dense(2, activation='sigmoid')(x)
model = Model(inputs=MobileNetV2_model.input, outputs = final_output)
model.compile(optimizer="adam", loss='BinaryCrossentropy', metrics=['accuracy'],loss_weights=0.1)
I made a script in tensorflow 2.x but I had to downconvert it to tensorflow 1.x (tested in 1.14 and 1.15). However, the tf1 version performs very differently (10% accuracy lower on the test set). See also the plot for train and validation performance (diagram is attached below).
Looking at the operations needed for the migration from tf1 to tf2 it seems that only the Adam learning rate may be a problem but I'm defining it explicitly tensorflow migration
I've reproduced the same behavior both locally on GPU and CPU and on colab. The keras used was the one built-in in tensorflow (tf.keras). I've used the following functions (both for train,validation and test), using a sparse categorization (integers):
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
horizontal_flip=horizontal_flip,
#rescale=None, #not needed for resnet50
preprocessing_function=None,
validation_split=None)
train_dataset = train_datagen.flow_from_directory(
directory=train_dir,
target_size=image_size,
class_mode='sparse',
batch_size=batch_size,
shuffle=True)
And the model is a simple resnet50 with a new layer on top:
IMG_SHAPE = img_size+(3,)
inputs = Input(shape=IMG_SHAPE, name='image_input',dtype = tf.uint8)
x = tf.cast(inputs, tf.float32)
# not working in this version of keras. inserted in imageGenerator
x = preprocess_input_resnet50(x)
base_model = tf.keras.applications.ResNet50(
include_top=False,
input_shape = IMG_SHAPE,
pooling=None,
weights='imagenet')
# Freeze the pretrained weights
base_model.trainable = False
x=base_model(x)
# Rebuild top
x = GlobalAveragePooling2D(data_format='channels_last',name="avg_pool")(x)
top_dropout_rate = 0.2
x = Dropout(top_dropout_rate, name="top_dropout")(x)
outputs = Dense(num_classes,activation="softmax", name="pred_out")(x)
model = Model(inputs=inputs, outputs=outputs,name="ResNet50_comp")
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
loss="sparse_categorical_crossentropy",
metrics=['accuracy'])
And then I'm calling the fit function:
history = model.fit_generator(train_dataset,
steps_per_epoch=n_train_batches,
validation_data=validation_dataset,
validation_steps=n_val_batches,
epochs=initial_epochs,
verbose=1,
callbacks=[stopping])
I've reproduced the same behavior for example with the following full script (applied to my dataset and changed to adam and removed intermediate final dense layer):
deep learning sandbox
The easiest way to replicate this behavior was to enable or disable the following line on a tf2 environment with the same script and add the following line to it. However, I've tested also on tf1 environments (1.14 and 1.15):
tf.compat.v1.disable_v2_behavior()
Sadly I cannot provide the dataset.
Update 26/11/2020
For full reproducibility I've obtained a similar behaviour by means of the food101 (101 categories) dataset enabling tf1 behaviour with 'tf.compat.v1.disable_v2_behavior()'. The following is the script executed with tensorflow-gpu 2.2.0:
#%% ref https://medium.com/deeplearningsandbox/how-to-use-transfer-learning-and-fine-tuning-in-keras-and-tensorflow-to-build-an-image-recognition-94b0b02444f2
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
import tensorflow as tf
# enable and disable this to obtain tf1 behaviour
tf.compat.v1.disable_v2_behavior()
from tensorflow.keras import __version__
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
# since i'm using resnet50 weights from imagenet, i'm using food101 for
# similar but different categorization tasks
# pip install tensorflow-datasets if tensorflow_dataset not found
import tensorflow_datasets as tfds
(train_ds,validation_ds),info= tfds.load('food101', split=['train','validation'], shuffle_files=True, with_info=True)
assert isinstance(train_ds, tf.data.Dataset)
print(train_ds)
#%%
IM_WIDTH, IM_HEIGHT = 224, 224
NB_EPOCHS = 10
BAT_SIZE = 32
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
#x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(inputs=base_model.input, outputs=predictions)
return model
def train(nb_epoch, batch_size):
"""Use transfer learning and fine-tuning to train a network on a new dataset"""
#nb_train_samples = train_ds.cardinality().numpy()
nb_train_samples=info.splits['train'].num_examples
nb_classes = info.features['label'].num_classes
classes_names = info.features['label'].names
#nb_val_samples = validation_ds.cardinality().numpy()
nb_val_samples = info.splits['validation'].num_examples
#nb_epoch = int(args.nb_epoch)
#batch_size = int(args.batch_size)
def preprocess(features):
#print(features['image'], features['label'])
image = tf.image.resize(features['image'], [224,224])
#image = tf.divide(image, 255)
#print(image)
# data augmentation
image=tf.image.random_flip_left_right(image)
image = preprocess_input(image)
label = features['label']
# for categorical crossentropy
#label = tf.one_hot(label,101,axis=-1)
#return image, tf.cast(label, tf.float32)
return image, label
#pre-processing the dataset to fit a specific image size and 2D labelling
train_generator = train_ds.map(preprocess).batch(batch_size).repeat()
validation_generator = validation_ds.map(preprocess).batch(batch_size).repeat()
#train_generator=train_ds
#validation_generator=validation_ds
#fig = tfds.show_examples(validation_generator, info)
# setup model
base_model = ResNet50(weights='imagenet', include_top=False) #include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
# transfer learning
setup_to_transfer_learn(model, base_model)
history = model.fit(
train_generator,
epochs=nb_epoch,
steps_per_epoch=nb_train_samples//BAT_SIZE,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
#class_weight='auto')
#execute
history = train(nb_epoch=NB_EPOCHS, batch_size=BAT_SIZE)
And the performance on food101 dataset:
update 27/11/2020
It's possible to see the discrepancy also in the way smaller oxford_flowers102 dataset:
(train_ds,validation_ds,test_ds),info= tfds.load('oxford_flowers102', split=['train','validation','test'], shuffle_files=True, with_info=True)
Nb: the above plot shows confidences given by running the same training multiple times and evaluatind mean and std to check for the effects on random weights initialization and data augmentation.
Moreover I've tried some hyperparameter tuning on tf2 resulting in the following picture:
changing optimizer (adam and rmsprop)
not applying horizontal flipping aumgentation
deactivating keras resnet50 preprocess_input
Thanks in advance for every suggestion. Here are the accuracy and validation performance on tf1 and tf2 on my dataset:
Update 14/12/2020
I'm sharing the colab for reproducibility on oxford_flowers at the clic of a button:
colab script
I came across something similar, when doing the opposite migration (from TF1+Keras to TF2).
Running this code below:
# using TF2
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50
fe = ResNet50(include_top=False, pooling="avg")
out = fe.predict(np.ones((1,224,224,3))).flatten()
sum(out)
>>> 212.3205274187726
# using TF1+Keras
import numpy as np
from keras.applications.resnet50 import ResNet50
fe = ResNet50(include_top=False, pooling="avg")
out = fe.predict(np.ones((1,224,224,3))).flatten()
sum(out)
>>> 187.23898954353717
you can see the same model from the same library on different versions does not return the same value (using sum as a quick check-up). I found the answer to this mysterious behavior in this other SO answer: ResNet model in keras and tf.keras give different output for the same image
Another recommendation I'd give you is, try using pooling from inside applications.resnet50.ResNet50 class, instead of the additional layer in your function, for simplicity, and to remove possible problem-generators :)
I am using Anaconda Navigator, Jupyter to be precised.
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
>>> 1.14.0
This is my model
def create_model():
model = tf.keras.Sequential([
keras.layers.Dense(86, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001),input_shape=(129,)),
keras.layers.Dropout(0.2),
keras.layers.Dense(142, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001)),
keras.layers.Dropout(0.2),
keras.layers.Dense(4, activation='softmax')
])
return model
model = create_model()
# Display the model's architecture
model.summary()
After training,predicting and evaluating my model, I decided to save it using
model.save('/Users/Jennifer/myproject/my_model.h5')
I checked the directory and folder with the h5py file. And I decided to load it using
new_model1 = tf.keras.models.load_model('/Users/Jennifer/myproject/my_model.h5')
I got an Error
ValueError: Unknown entries in loss dictionary: ['class_name', 'config']. Only expected following keys: ['dense_17']
Please help me. What should I do? I have almost spent the whole day trying to solve this issue. Thanks
Here is a bit of a work around that just loads the weights:
#!/usr/bin/env python3
from tensorflow import keras
import os
def create_model():
model = keras.Sequential([
keras.layers.Dense(86, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001),input_shape=(129,)),
keras.layers.Dropout(0.2),
keras.layers.Dense(142, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001)),
keras.layers.Dropout(0.2),
keras.layers.Dense(4, activation='softmax')
])
return model
if os.path.exists("junk.h5"):
model = create_model()
model.load_weights("junk.h5")
else:
model = create_model()
model.compile(optimizer=keras.optimizers.Adam(0.0001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.save("junk.h5")
Another workaround would be to save the model without the optimizer
model.save("junk.h5", include_optimizer=False)
It looks like the loss function you're using creates a dictionary that has invalid keys. This sounds like a bug in keras/tensorflow. That is why the colab one probably worked because it was using a newer version.
This isn't really a question that's code-specific, but I haven't been able to find any answers or resources.
I'm currently trying to teach myself some "pure" TensorFlow rather than just using Keras, and I felt that it would be very helpful if there were some sources where they have TensorFlow code and the equivalent Keras code side-by-side for comparison.
Unfortunately, most of the results I find on the Internet talk about performance-wise differences or have very simple comparison examples (e.g. "and so this is why Keras is much simpler to use"). I'm not so much interested in those details as much as I am in the code itself.
Does anybody know if there are any resources out there that could help with this?
Here you have two models, in Tensorflow and in Keras, that are correspondent:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Tensorflow
X = tf.placeholder(dtype=tf.float64)
Y = tf.placeholder(dtype=tf.float64)
num_hidden=128
# Build a hidden layer
W_hidden = tf.Variable(np.random.randn(784, num_hidden))
b_hidden = tf.Variable(np.random.randn(num_hidden))
p_hidden = tf.nn.sigmoid( tf.add(tf.matmul(X, W_hidden), b_hidden) )
# Build another hidden layer
W_hidden2 = tf.Variable(np.random.randn(num_hidden, num_hidden))
b_hidden2 = tf.Variable(np.random.randn(num_hidden))
p_hidden2 = tf.nn.sigmoid( tf.add(tf.matmul(p_hidden, W_hidden2), b_hidden2) )
# Build the output layer
W_output = tf.Variable(np.random.randn(num_hidden, 10))
b_output = tf.Variable(np.random.randn(10))
p_output = tf.nn.softmax( tf.add(tf.matmul(p_hidden2, W_output), b_output) )
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=Y,predictions=p_output))
accuracy=1-tf.sqrt(loss)
minimization_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
feed_dict = {
X: x_train.reshape(-1,784),
Y: pd.get_dummies(y_train)
}
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(10000):
J_value = session.run(loss, feed_dict)
acc = session.run(accuracy, feed_dict)
if step % 100 == 0:
print("Step:", step, " Loss:", J_value," Accuracy:", acc)
session.run(minimization_op, feed_dict)
pred00 = session.run([p_output], feed_dict={X: x_test.reshape(-1,784)})
Keras
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from keras.models import Model
l = tf.keras.layers
model = tf.keras.Sequential([
l.Flatten(input_shape=(784,)),
l.Dense(128, activation='relu'),
l.Dense(128, activation='relu'),
l.Dense(10, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
model.summary()
model.fit(x_train.reshape(-1,784),pd.get_dummies(y_train),nb_epoch=15,batch_size=128,verbose=1)
You can take a look to this toy example, but it may be too simple.