Average weights in keras models - tensorflow

How to average weights in Keras models, when I train few models with the same architecture with different initialisations?
Now my code looks something like this?
datagen = ImageDataGenerator(rotation_range=15,
width_shift_range=2.0/28,
height_shift_range=2.0/28
)
epochs = 40
lr = (1.234e-3)
optimizer = Adam(lr=lr)
main_input = Input(shape= (28,28,1), name='main_input')
sub_models = []
for i in range(5):
x = Conv2D(32, kernel_size=(3,3), strides=1)(main_input)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=2)(x)
x = Conv2D(64, kernel_size=(3,3), strides=1)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=2)(x)
x = Conv2D(64, kernel_size=(3,3), strides=1)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Flatten()(x)
x = Dense(1024)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.1)(x)
x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
x = Dense(10, activation='softmax')(x)
sub_models.append(x)
x = keras.layers.average(sub_models)
main_output = keras.layers.average(sub_models)
model = Model(inputs=[main_input], outputs=[main_output])
model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
optimizer=optimizer)
print(model.summary())
plot_model(model, to_file='model.png')
filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
callbacks = [checkpoint, tensorboard]
model.fit_generator(datagen.flow(X_train, y_train, batch_size=128),
steps_per_epoch=len(X_train) / 128,
epochs=epochs,
callbacks=callbacks,
verbose=1,
validation_data=(X_test, y_test))
So now I average only last layer, but I want to average weights in all layers after training each one separately.
Thanks!

So let's assume that models is a collection of your models. First - collect all weights:
weights = [model.get_weights() for model in models]
Now - create a new averaged weights:
new_weights = list()
for weights_list_tuple in zip(*weights):
new_weights.append(
[numpy.array(weights_).mean(axis=0)\
for weights_ in zip(*weights_list_tuple)])
And what is left is to set these weights in a new model:
new_model.set_weights(new_weights)
Of course - averaging weights might be a bad idea, but in case you try - you should follow this approach.

I can't comment on the accepted answer, but to make it work on tensorflow 2.0 with tf.keras I had to make the list in the loop into a numpy array:
new_weights = list()
for weights_list_tuple in zip(*weights):
new_weights.append(
np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
)
If different input models need to be weighted differently, np.array(w).mean(axis=0) needs to be replaced with np.average(np.array(w),axis=0, weights=relative_weights) where relative_weights is an array with a weight factor for each model.

Related

What is wrong with my neural networks prediction code? All predictions are returning the same class name for every image

Here is my training code:
def train():
#START
img_input = layers.Input(shape=(150, 150, 3))
x = layers.Conv2D(16, 3, activation='relu')(img_input)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)
model = Model(img_input, output)
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
#END
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
bs = 20
# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
train_dir, # This is the source directory for training images
target_size=(150, 150), # All images will be resized to 150x150
batch_size=bs,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
# Flow validation images in batches of 20 using val_datagen generator
validation_generator = val_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=bs,
class_mode='binary')
history = model.fit(
train_generator,
steps_per_epoch=train_steps,
epochs=4,
validation_data=validation_generator,
validation_steps=val_steps,
verbose=1)
model.save_weights("trained_weights.h5")
Here is my prediction code:
def evaluate(imgpath):
if not os.path.isfile(imgpath):
print("No such file: {}".format(imgpath))
sys.exit(-1)
# START
img_input = layers.Input(shape=(150, 150, 3))
x = layers.Conv2D(16, 3, activation='relu')(img_input)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)
model = Model(img_input, output)
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
# END
model.load_weights("trained_weights.h5")
img = image.load_img(path=imgpath,grayscale=False,target_size=(150,150),color_mode='rgb')
img_arr = image.img_to_array(img)
test_img = np.expand_dims(img_arr, axis=0)
y_prob = model.predict(test_img)
classname = y_prob.argmax(axis=-1)
print("Class: ",classname)
return classname
I have a feeling that the error is somewhere in the last 5-6 lines of the evaluate function, where I am loading the image. The problem is that whenever I run the evaluate function for any image, my output is [0]. Even though the training went well, as seen in the image below.
enter image description here
Am I making some silly mistake somewhere?
since you have a single neuron as the top layer, when you do predictions you will get a single prediction. Since you have a single prediction using argmax will always return 0. What you need to do is to set a threshold value for the prediction for example
if yprob>=.5:
klass=1
else:
klass=0
Also as pointed out by Dr. Snoopy you should rescale your image by 1/255.

Default visualisation keras progress bar neural net

I come to you because I have a visualisation problem in my Jupyter-lab.
When I run a neural network in a cell of the jupyter-lab the progress bar works but, the text print every iteration. The cell :
seed = 1337
np.random.seed(seed)
loss_ = "categorical_crossentropy"
optimizer_ = "rmsprop"
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation=activation_1, padding="same")(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation=activation_1, padding="same")(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation=activation_1, padding="same")(x)
x = MaxPooling1D(3)(x) # global max pooling
x = Flatten()(x)
x = Dense(128, activation=activation_1)(x)
preds = Dense(len(set(df.y)), activation=activation_2)(x)
model = Model(sequence_input, preds)
model.compile(loss=loss_,
optimizer=optimizer_,
metrics=[metrics.categorical_accuracy])
# happy learning!
model.fit(x_train, y_train, validation_data=(x_val, y_val),
epochs=25, batch_size=128, callbacks=[early_stopping_monitor, checkpoint])
The result that I obtain is like this :
I have updated the different packages to have the latest version in my anaconda environment.
How can I fix the visualisation of the progress bar ?
Best,
Chris

TensorFlow Keras Optimise prediction

I'm Using tensorflow and keras to predict handwrting digits. For training I'm using nmist dataset.
the accuracy is about 98.8% after training. but sometimes in test its confuse between 4 and 9 , 7 and 3, i'm alerady optimize the image input with opencv, like remove noise, rescale, threshold etc.
What should i do next to improved this prdiction accuracy?
My plan is add more sample, and resize the sample image from 28x28 to 56x56.
Will this affect accuracy?
This my model for training:
epoc=15, batch size=64
def build_model():
model = Sequential()
# add Convolutional layers
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
# Densely connected layers
model.add(Dense(128, activation='relu'))
# output layer
model.add(Dense(10, activation='softmax'))
# compile with adam optimizer & categorical_crossentropy loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return model
You can try to add regularization:
def conv2d_bn(x,
units,
kernel_size=(3, 3),
activation='relu',
dropout=.5):
y = Dropout(x)
y = Conv2D(units, kernel_size=kernel_size, use_bias=False)(y)
y = BatchNormalization(y)
y = Activation(activation)(y)
return y
def build_model(..., dropout=.5):
x = Input(shape=[...])
y = conv2d_bn(x, 32)
y = MaxPooling2D(y)
...
y = Dropout(dropout)(y)
y = Dense(10, activation='softmax')
model = Model(x, y)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
You can tweak the class weights to force the model to pay more attention to classes 3, 4, 7 and 9 during training:
model.fit(..., class_weights={0: 1, 1: 1, 2:1, 3:2, 4:2, 5:1, 6:1, 7:2, 8:1, 9:2})
If you have some time to burn, you can also try to grid or random-search the models hyperparameters. Something in the lines:
def build(conv_layers, dense_layers, dense_units, activation, dropout):
y = x = Input(shape=[...])
kernels = 32
kernel_size = (2, 2)
for i in range(conv_layers):
y = conv2d_bn(y, kernel_size, kernels, activation, dropout)
if i % 2 == 0: # or 3 or 4.
y = MaxPooling2D(y)
kernels *= 2
kernel_size = tuple(k+1 for k in kernel_size)
y = GlobalAveragePooling2D()(y)
for i in range(dense_layers):
y = Dropout(dropout)(y)
y = Dense(dense_units)(y)
y = Dense(10, activation='softmax')(y)
model = KerasClassifier(build_model,
epochs=epochs,
validation_split=validation_split,
verbose=0,
...)
params = dict(conv_layers=[2, 3, 4],
dense_layers=[0, 1],
activation=['relu', 'selu'],
dropout=[.2, .3, .5],
callbacks=[callbacks.EarlyStopping(patience=10,
restore_best_weights=True)])
grid = GridSearchCV(model, params,
scoring='balanced_accuracy_score',
verbose=2,
n_jobs=1)
Now, combining hyperparams searching with the NumpyArrayIterator is a little tricky, because the latter assumes we have all training samples (and targets) at hand before the training steps. It's still doable, though:
g = ImageDataGenerator(...)
cv = StratifiedKFold(n_splits=3)
results = dict(params=[], valid_score=[])
for params in ParameterGrid(params):
fold_scores = []
for t, v in cv.split(train_data, train_labels):
train = g.flow(train_data[t], train_labels[t], subset='training')
nn_valid = g.flow(train_data[t], train_labels[t], subset='validation')
fold_valid = g.flow(train_data[v], train_labels[v])
nn = build_model(**params)
nn.fit_generator(train, validation_data=nn_valid, ...)
probabilities = nn.predict_generator(fold_valid, steps=...)
p = np.argmax(probabilities, axis=1)
fold_scores += [metrics.accuracy_score(valid.classes_, p)]
results['params'] += [params]
results['valid_score'] += [fold_scores]
best_ix = np.argmax(np.mean(results['valid_score'], axis=1))
best_params = results['params'][best_ix]
nn = build_model(**best_params)
nn.fit_generator(...)

Getting an AssertionError when using keras

When I run this code, I get the following error and don`t know why?
AssertionError Traceback (most recent call last)
I try again and again, here is my code:
def ConvNet(embeddings, max_sequence_length, num_words, embedding_dim, labels_index, trainable=False, extra_conv=True):
embedding_layer = Embedding(num_words,
embedding_dim,
weights=[embeddings],
input_length=max_sequence_length,
trainable=trainable)
sequence_input = Input(shape=(max_sequence_length,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
# Yoon Kim model (https://arxiv.org/abs/1408.5882)
convs = []
filter_sizes = [3,4,5]
for filter_size in filter_sizes:
l_conv = Conv1D(filters=128, kernel_size=filter_size, activation='relu')(embedded_sequences)
l_pool = MaxPooling1D(pool_size=3)(l_conv)
convs.append(l_pool)
l_merge = Concatenate(axis=1)(convs)
# add a 1D convnet with global maxpooling, instead of Yoon Kim model
conv = Conv1D(filters=128, kernel_size=3, activation='relu')(embedded_sequences)
pool = MaxPooling1D(pool_size=3)(conv)
x = (l_merge)
if extra_conv==True:
x = Dropout(0.5)(l_merge)
else:
# Original Yoon Kim model
x = Dropout(0.5)(pool)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
# Finally, we feed the output into a Sigmoid layer.
# The reason why sigmoid is used is because we are trying to achieve a binary classification(1,0)
# for each of the 6 labels, and the sigmoid function will squash the output between the bounds of 0 and 1.
preds = Dense(labels_index, activation='sigmoid')(x)
model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
model.summary()
return model
model = ConvNet(train_embedding_weights, MAX_SEQUENCE_LENGTH, len(train_word_index)+1, EMBEDDING_DIM,
len(list(label_names)), False)
And I get AssertionError.

training output is not in the valid range by using CNN with LSTM

I use keras with tf as the backend.
The goal of the simulation is attempting to use geo-spatial time series dataset to build a classifier. The target Y is labeled on -1, 0, 1 and 2, where -1 indicates the measured data at that grid point, 0 is meaning the data at good quality, 1 is middle quality and 2 is the worst.
Right now, i have two inputs. I have some atmospheric surface variables, such as wind, wind speed, and rain as one input. And , oceanic surface variables, such as sea surface temperature, and sea surface salinity as second input. The dimensions of all the datasets should be in, for example, (n_samples, n_timesteps, n_variables, n_xpoints: longitude, n_ypoints: latitude). The target dataset is in 3D dimensions like this: (n_samples, n_xpoints: longitude, n_ypoints: latitude).
In addition, all of the input variables are normalized by their value range. For example, the sea surface current velocity is normalized in the rage of (-1,1) from (-2, 2) [m/s], and the surface wind speed is normalized in the rage of (-1,1) from (-20,20) [m/s].
The model configuration is designed as described below.
def cnn():
model = Sequential()
model.add( Conv2D(64, (3,3), activation='relu',
data_format='channels_first', kernel_initializer='he_normal',
name='conv1') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( BatchNormalization() )
model.add( Conv2D(32, (3,3), activation='relu',
kernel_initializer='he_normal', data_format='channels_first',
name='conv2') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Dropout(0.2) )
model.add( BatchNormalization() )
model.add( Activation('relu') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Flatten() )
model.add( Dense(128,activation='relu') )
return model
def cnn2lstm(Input_shape, premo, name):
branch_in = Input(shape=Input_shape, dtype='float32')
model = TimeDistributed(premo)(branch_in)
model = LSTM(256, return_sequences=True, name=name+'_lstm1')(model)
model = TimeDistributed(Dense(4096, activation='relu'))(model)
model = TimeDistributed(Dropout(0.3))(model)
model = LSTM(256, return_sequences = True, name=name+'_lstm2')(model)
model = Dense(101, activation='sigmoid')(model)
model = Dropout(0.3)(model)
return branch_in, model
atm_in, atm = cnn2lstm(Train_atm.shape[1:], cnn(),'atm')
ocn_in, ocn = cnn2lstm(Train_ocn.shape[1:], cnn(),'ocn')
#--- two inputs into one output
x = keras.layers.concatenate([atm,ocn],axis=1)
x = LSTM(150,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(200,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(500)(x)
x = Dense(1001,activation='relu')(x)
x = Dense(2001,activation='relu')(x)
x = Dense(2501,activation='tanh')(x)
x = Dense(2701,activation='relu')(x)
x = Dense(3355,activation='softmax')(x)
x = Reshape((61,55),input_shape=(3355,))(x)
model2 = Model(inputs=[atm_in, ocn_in, bio_in], outputs=x)
plot_model(model2, show_shapes = True, to_file='model_way4_2.png')
model2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
filepath='ways4_model02_best.hdf5'
checkpoint = ModelCheckpoint(filepath,monitor='val_acc', verbose=1,save_best_only=True,mode='max')
callbacks_list = [checkpoint]
hist = model2.fit([Train_atm, Train_ocn, Train_bio], Train_Y,
epochs=150, batch_size=3, validation_split=0.1,
shuffle=True, callbacks=callbacks_list, verbose=0)
scores = model2.evaluate([Train_atm, Train_ocn, Train_bio], Train_Y)
print("MODEL 2 %s: %.2f%%" % (model2.metrics_names[1], scores[1]*100))
The evaluation scores here is mostly like 83% or higher. But the value of the output from model2.predict doesn't give me the valid range like my target dataset. In contrary, the model output give me the value from 0 to 1 (0,1) with a similar pattern as the target dataset shows.
Could anyone tell any big issue I have in my DL algorithm?