How to transfer weights from baseline model to federated model? - tensorflow

def create_keras_model():
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.load_weights('/content/drive/My Drive/localmodel/weights')
return model
Tried something like this in Colab, but I get errno 21, is a directory.
Then I tried another method as shown below,
tff_model = create_keras_model() #now this function doesnt load weights, just returns a Sequential model
tff.learning.assign_weights_to_keras_model(tff_model, model_with_weights)
Just like assign_weights_to_keras_model() transfers weights from tff_model to keras model, I want to transfer weights from keras model to tff_model. How can this be done?

here model_with_weights must be a TFF value representing the weights of a model for example:
def model_fn():
keras_model = create_keras_model()
return tff.learning.from_keras_model(keras_model)
fed_avg = tff.learning.build_federated_averaging_process(model_fn, ...)
state = fed_avg.initialize()
state = fed_avg.next(state, ...)
...
tff.learning.assign_weights_to_keras_model(keras_model, state.model)

I just got to know how this can be done.
The idea is to use:
tff.learning.state_with_new_model_weights(state, trainable_weights_numpy, non_trainable_weights_numpy)
Documentation here
where trainable weights are taken from baseline model and converted to numpy format.
trainable_weights = []
for weights in baseline_model.trainable_weights:
trainable_weights.append(weights.numpy())
This is particularly useful when the server has part of the data and the client has similar data. May be this can be used for transfer learning.

Related

Merge two sequential models on Keras for hybrid model

I want to combine two sequential models for a hybrid model (with Keras 2.6.0). The first model is a succession of dense layer of a set of 4 parameters, and the second is a succession of 2D convolution of an image ((32,32)). The goal is to predict a curve of 128 points.
My actual model:
def get_model_v2(params_shape, img_shape):
params_model = models.Sequential()
params_model.add(layers.Dense(512, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n1'))
params_model.add(layers.Dense(512, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n2'))
params_model.add(layers.Dense(256, name='Output'))
img_model = models.Sequential()
img_model.add(layers.Input(img_shape, name='InputLayer2'))
img_model.add(layers.Conv2D(64, kernel_size=4, strides=2, padding="same"))
img_model.add(layers.LeakyReLU(alpha=0.2))
img_model.add(layers.Conv2D(16, kernel_size=4, strides=2, padding="same"))
img_model.add(layers.LeakyReLU(alpha=0.2))
img_model.add(layers.Flatten())
concat = tf.keras.layers.concatenate([params_model, img_model])
model = models.Sequential()
model.add(layers.Input(concat, name='InputLayer3'))
model.add(layers.Dense(256, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n1'))
model.add(layers.Dense(128, name='Output'))
model.compile(optimizer = 'adam',
loss = 'mse',
metrics = ['mae', 'mse'])
return model
model = get_model_v2 ( (4,), (32, 32, 1) )
My problem is when I have to combine the two models, I don't know what to use, with this "concatenate" example I have an error like: TypeError: 'NoneType' object is not subscriptable. I understand the problem, but I can't find an other solution...
Few issues here,
You are not using params_shape for your params_model (which comes out with an undefined shape).
As you understood, you can't concatenate models with a concatenation layer
The final model needs to through the Functional API
You got a bunch of layers with same name - you cannot have the same name for two layers in the same model
import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
import tensorflow.keras.regularizers as regularizers
import tensorflow as tf
def get_model_v2(params_shape, img_shape):
params_model = models.Sequential()
params_model.add(layers.Dense(512, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n1', input_shape=params_shape))
params_model.add(layers.Dense(512, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n2'))
params_model.add(layers.Dense(256, name='Output'))
img_model = models.Sequential()
img_model.add(layers.Input(img_shape, name='InputLayer2'))
img_model.add(layers.Conv2D(64, kernel_size=4, strides=2, padding="same"))
img_model.add(layers.LeakyReLU(alpha=0.2))
img_model.add(layers.Conv2D(16, kernel_size=4, strides=2, padding="same"))
img_model.add(layers.LeakyReLU(alpha=0.2))
img_model.add(layers.Flatten())
param_out = params_model.outputs[0]
img_out = img_model.outputs[0]
concat_out = tf.keras.layers.concatenate([param_out, img_out])
full_dense_out = layers.Dense(256, kernel_regularizer=regularizers.l2(0.001), activation='relu', name='Dense_n3')(concat_out)
final_out = layers.Dense(128, name='Output_final')(full_dense_out)
model = models.Model(inputs=[params_model.inputs, img_model.inputs], outputs=final_out)
model.summary()
model.compile(optimizer = 'adam',
loss = 'mse',
metrics = ['mae', 'mse'])
return model
model = get_model_v2 ( (4,), (32, 32, 1) )

How do I change the decoder feature type in Tensorflow Datasets?

I am trying to do some augmentations to tensorflow image datasets such as cifar and others from here:
https://www.tensorflow.org/datasets/catalog/
Now I have a mapping function that actually another user from SO helped me with that works with my own dataset:
def map_data(inputs):
image = inputs['image']
image = tf.numpy_function(func=aug_fn, inp=[image], Tout=tf.float32)
image = image / 255.0
labels = inputs['label']
labels = tf.one_hot(labels, num_classes)
return {'image_input': image, 'label': labels}, labels
now when iterating over the dataset I get this error:
ValueError: Missing data for input "image_input". You passed a data dictionary with keys ['image', 'label']. Expected the following keys: ['image_input', 'label']
which makes sense because the decoder returns uint8 type.
But I couldn't find any info or examples in the docs on how to change that.
Can I somehow just access the decoder Object's attributes?
I tried like in the API https://www.tensorflow.org/datasets/api_docs/python/tfds/decode/Decoder?hl=cs
but it doesn't work.
thank you very much in advance!
If it is augmentation you want to use, you should use these layers in the top of your model
data_augmentation = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
])
For the scaling, you use this line
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1. / 255)
This should work smoothly in your model. Here an example of a model with augmentation and rescaling
model = Sequential([
layers.experimental.preprocessing.Rescaling(1. / 255, input_shape=(256, 256, 3)),
layers.experimental.preprocessing.RandomFlip('horizontal'),
layers.experimental.preprocessing.RandomRotation(0.2),
layers.experimental.preprocessing.RandomZoom(0.1),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])

Tensorflow NIH Chest X-ray CNN validation accuracy not improving even with regularization

I’ve been working on a CNN that takes in a 224x224 grayscale xray image and outputs either 0 or 1 based on whether it detects an abnormality.
This is the dataset I am using. I split the dataset into two with 106496 images for training and the remaining 5624 for validation. Since they’re both from the same dataset, they should both come from the same distribution.
I tried training the model I described above using the pretrained InceptionV3 and VGG19 architectures without success. I then tried making my own model similar to the VGG19 architecture.
I simplified the model as much as possible so that the training accuracy was above 90% and added various regularizers such as dropout and l2. I also tried different hyperparameters and image augmentation but the validation accuracy wouldn’t exceed 70% after 5-10 epochs. The validation loss doesn't seem to drop at all either.
Here are my accuracy vs epoch and loss vs epoch curves (pink is train, green in validation):
And here is my code:
def create_model(settings):
"""
Create a basic model
"""
# create model
model = tf.keras.models.Sequential()
model.add(layers.Input((224, 224, 1)))
# block 1
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block1_conv'))
model.add(layers.MaxPool2D((2, 2), strides=(2, 2), name='block1_pool'))
# block 2
model.add(layers.Conv2D(96, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block2_conv'))
model.add(layers.MaxPool2D((2, 2), strides=(2, 2), name='block2_pool'))
# block 3
model.add(layers.Conv2D(192, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block3_conv1'))
model.add(layers.Conv2D(192, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block3_conv2'))
model.add(layers.MaxPool2D((2, 2), strides=(2, 2), name='block3_pool'))
# block 4
model.add(layers.Conv2D(384, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block4_conv1'))
model.add(layers.Conv2D(384, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block4_conv2'))
model.add(layers.Conv2D(384, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block4_conv3'))
model.add(layers.MaxPool2D((2, 2), strides=(2, 2), name='block4_pool'))
# block 5
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block5_conv1'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block5_conv2'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_uniform', use_bias=True, name='block5_conv3'))
model.add(layers.MaxPool2D((2, 2), strides=(2, 2), name='block5_pool'))
# fully connected
model.add(layers.GlobalAveragePooling2D(name='fc_pool'))
model.add(layers.Dropout(0.3, name='fc_dropout'))
model.add(layers.Dense(1, activation='sigmoid', name='fc_output'))
# compile model
model.compile(
optimizers.SGD(
learning_rate=settings["lr_init"],
momentum=settings["momentum"],
),
loss='binary_crossentropy',
metrics=[
'accuracy',
metrics.Precision(),
metrics.Recall(),
metrics.AUC()
]
)
model.summary()
return model
def configure_callbacks(settings):
"""
Create a list of callback objects
"""
# tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# learning rate reduction on plateau
lrreduce_callback = callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=settings["lr_factor"],
patience=settings["lr_patience"],
min_lr=settings["lr_min"],
verbose=1,
)
# save model
checkpoint_callback = callbacks.ModelCheckpoint(
filepath="saves/" + settings["modelname"] + "/cp-{epoch:03d}",
monitor='val_accuracy',
mode='max',
save_weights_only=True,
save_best_only=True,
verbose=1,
)
return [tensorboard_callback, lrreduce_callback, checkpoint_callback]
def get_data(settings):
"""
Create a generator that will be used for training
"""
df=pd.read_csv("dataset/y_train_binary.csv")
columns = [
"Abnormal"
]
datagen = ImageDataGenerator(
rescale=1./255.,
rotation_range=5,
brightness_range=(0.9, 1.1),
zoom_range=(1, 1.1),
)
# 94.983% for training (106496 = 64*6656)
traindata = datagen.flow_from_dataframe(
dataframe=df[:NTRAIN],
directory="dataset/images",
x_col="Image Index",
y_col=columns,
color_mode='grayscale',
batch_size=settings["batchsize"],
class_mode="raw",
target_size=(224,224),
shuffle=True,
)
# 5.017% for testing (5624)
testdata = datagen.flow_from_dataframe(
dataframe=df[NTRAIN:],
directory="dataset/images",
x_col="Image Index",
y_col=columns,
color_mode='grayscale',
batch_size=settings["batchsize"],
class_mode="raw",
target_size=(224,224),
shuffle=True,
)
return (traindata, testdata)
def newtrain(settings):
"""
Create a new model "(modelname)" and train for (epoch) epochs
"""
model = create_model(settings)
callbacks = configure_callbacks(settings)
traindata, testdata = get_data(settings)
# train
model.fit(
x=traindata,
epochs=settings["epoch"],
validation_data=testdata,
callbacks=callbacks,
verbose=1,
)
model.save_weights(f"saves/{settings['modelname']}/cp-{settings['epoch']:03d}")
I’m running out of ideas and it takes half a day to train 50 epochs so I would appreciate if anyone knows how I can solve this issue. Thanks.
If you do some EDA on NIH Chest X-rays you may also see that there is a significant class imbalance issue among 14 classes. By your model definition, I can assume that you put a normal image on one side and an abnormal (13 cases) on the other side. First of all, if this true, I would say, it's better to classify all cases - all are important in clinician practice.
Shift to 14 cases classification
You're using your own design model, but you should first start with the pre-trained model. It's better and next you can gradually integrate your own idea.
Use pretriend model, e.g DenseNet, EfficientNet, NFNet etc
In your data generator, you use shuffle=True for the test set, which is wrong, rather it should be False.
testdata = datagen.flow_from_dataframe(
....
target_size=(224,224),
shuffle=False
For better control of your input pipeline, IMO, you should write your own custom data generator and experiment with advanced augmentation to prevent overfitting stuff.

Tensoflow model gives wrong output after conversion

My goal is to develop an app that does image classification. Keras model seems to work good but after i try to convert to Tensorflow Lite, output is completely wrong.
These are the layers of the model
model = Sequential([
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
This is how i save and then convert the model.
model.save('/content/drive/My Drive/my_model.h5')
TF_LITE_MODEL_NAME = "tf_lite_model.tflite"
tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = tf_lite_converter.convert()
tflite_model_name = TF_LITE_MODEL_NAME
open(tflite_model_name, "wb").write(tflite_model)
Output is like this:
[[ 1.4364377 -0.02920453 -0.149581 -0.7537567 ]]
Why this problem occur?
Edit (Code that gives random input to the model)
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="/content/tf_lite_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)

Merging tensors based on a key

I am dealing with a problem in which network design is such that it requires merging output of one part of the network with a tabular input(other input) data based on a key and training the network further with the merged data. It appeared that there is no way two tensors can be merged based on a key. Hence though of converting tensor to numpy to pandas data and them merging. The merged data would be converted back to tensor and used further in the network. Below is the code for it:
def build_convnet(shape=(112, 112, 1)):
from keras.layers import Conv2D, BatchNormalization, MaxPool2D, GlobalMaxPool2D
momentum = .9
model = keras.Sequential()
model.add(Conv2D(64, (3,3), input_shape=shape,
padding='same', activation='relu'))
model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization(momentum=momentum))
model.add(MaxPool2D())
model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization(momentum=momentum))
model.add(MaxPool2D())
model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization(momentum=momentum))
model.add(MaxPool2D())
model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization(momentum=momentum))
# flatten...
model.add(GlobalMaxPool2D())
return model
def action_model(shape=(3, 112, 112, 1)):
from keras.layers import TimeDistributed, GRU, Dense, Dropout, Concatenate
# Create our convnet with (224, 224, 3) input shape
convnet = build_convnet(shape[1:])
# then create our final model
model = keras.Sequential()
# add the convnet with (5, 224, 224, 3) shape
model.add(TimeDistributed(convnet, input_shape=shape))
# here, you can also use GRU or LSTM
model.add(GRU(64))
# and finally, we make a decision network
model.add(Dense(1024, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(4, activation='relu'))
return model
# create the tab_data and cnn_gru models
tab_dt = keras.Input(shape=(trainX.shape[1],))
cnn_gru = action_model(X_train.shape[1:])
# converting tensor to numpy array and merging with a tabular data on a key(Patient)
cnn_gru_np = cnn_gru.output.eval()
cnn_gru_pd = pd.Dataframe(cnn_gru_np, names = ["V1", "V2", "V3", "V4"])
cnn_gru_pd["Patient"] = train_p
tab_dt_np = tab_dt.eval()
tab_dt_pd = pd.Dataframe(tab_dt_np, names = ["Weeks", "Percent", "Age", "Sex_Male", "SmokingStatus_Ex-smoker", "SmokingStatus_Never smoked"])
tab_dt_pd["Patient"] = train_p.numpy()
combinedInput_pd = pd.merge(tab_dt_pd, cnn_gru_pd, on = ["Patient"], how = "left")
combinedInput_pd.drop(["Patient"], axis = 1, inplace = True)
combinedInput_np = np.array(combinedInput_pd)
combinedInput = tf.convert_to_tensor(combinedInput_np)
# being our regression head
x = Dense(8, activation="relu")(combinedInput)
x = Dense(1, activation="relu")(x)
model = Model(inputs=[tab_dt, cnn_gru.input], outputs=x)
I am getting the below error for eval function in the line "cnn_gru_np = cnn_gru.output.eval()"
ValueError: Cannot evaluate tensor u`enter code here`sing `eval()`: No default session is registered. Use `with sess.as_default()` or pass an explicit session to `eval(session=sess)`
Please help with suggesting what is going wrong here.
The reason you're getting a ValueError is that the output of a keras model isn't an eager tensor, and thus does not support eval like that.
Just try
some_model = keras.Sequential([keras.layers.Dense(10, input_shape=(5,))])
print(type(some_model.output))
print(type(tf.zeros((2,))))
some_model.output.eval()
# <class 'tensorflow.python.framework.ops.Tensor'>
# <class 'tensorflow.python.framework.ops.EagerTensor'>
# ValueError
However, there is a bigger problem with your approach: there is no connected computation graph from your models inputs to your models outputs because none of the pandas stuff are tensorflow ops. I.E. even if you were able to use eager tensors, you still wouldn't be able to train your model with automatic differentiation.
You're going to have to specify your entire model in tf I'm afraid.
Maybe you could do the data processing before giving it as input to the model? Then you only need split concat ops to put everything together?