How to average weights in Keras models, when I train few models with the same architecture with different initialisations?
Now my code looks something like this?
datagen = ImageDataGenerator(rotation_range=15,
width_shift_range=2.0/28,
height_shift_range=2.0/28
)
epochs = 40
lr = (1.234e-3)
optimizer = Adam(lr=lr)
main_input = Input(shape= (28,28,1), name='main_input')
sub_models = []
for i in range(5):
x = Conv2D(32, kernel_size=(3,3), strides=1)(main_input)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=2)(x)
x = Conv2D(64, kernel_size=(3,3), strides=1)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=2)(x)
x = Conv2D(64, kernel_size=(3,3), strides=1)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Flatten()(x)
x = Dense(1024)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.1)(x)
x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
x = Dense(10, activation='softmax')(x)
sub_models.append(x)
x = keras.layers.average(sub_models)
main_output = keras.layers.average(sub_models)
model = Model(inputs=[main_input], outputs=[main_output])
model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
optimizer=optimizer)
print(model.summary())
plot_model(model, to_file='model.png')
filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
callbacks = [checkpoint, tensorboard]
model.fit_generator(datagen.flow(X_train, y_train, batch_size=128),
steps_per_epoch=len(X_train) / 128,
epochs=epochs,
callbacks=callbacks,
verbose=1,
validation_data=(X_test, y_test))
So now I average only last layer, but I want to average weights in all layers after training each one separately.
Thanks!
So let's assume that models is a collection of your models. First - collect all weights:
weights = [model.get_weights() for model in models]
Now - create a new averaged weights:
new_weights = list()
for weights_list_tuple in zip(*weights):
new_weights.append(
[numpy.array(weights_).mean(axis=0)\
for weights_ in zip(*weights_list_tuple)])
And what is left is to set these weights in a new model:
new_model.set_weights(new_weights)
Of course - averaging weights might be a bad idea, but in case you try - you should follow this approach.
I can't comment on the accepted answer, but to make it work on tensorflow 2.0 with tf.keras I had to make the list in the loop into a numpy array:
new_weights = list()
for weights_list_tuple in zip(*weights):
new_weights.append(
np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
)
If different input models need to be weighted differently, np.array(w).mean(axis=0) needs to be replaced with np.average(np.array(w),axis=0, weights=relative_weights) where relative_weights is an array with a weight factor for each model.
Related
Here is my training code:
def train():
#START
img_input = layers.Input(shape=(150, 150, 3))
x = layers.Conv2D(16, 3, activation='relu')(img_input)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)
model = Model(img_input, output)
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
#END
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
bs = 20
# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
train_dir, # This is the source directory for training images
target_size=(150, 150), # All images will be resized to 150x150
batch_size=bs,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
# Flow validation images in batches of 20 using val_datagen generator
validation_generator = val_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=bs,
class_mode='binary')
history = model.fit(
train_generator,
steps_per_epoch=train_steps,
epochs=4,
validation_data=validation_generator,
validation_steps=val_steps,
verbose=1)
model.save_weights("trained_weights.h5")
Here is my prediction code:
def evaluate(imgpath):
if not os.path.isfile(imgpath):
print("No such file: {}".format(imgpath))
sys.exit(-1)
# START
img_input = layers.Input(shape=(150, 150, 3))
x = layers.Conv2D(16, 3, activation='relu')(img_input)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)
model = Model(img_input, output)
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
# END
model.load_weights("trained_weights.h5")
img = image.load_img(path=imgpath,grayscale=False,target_size=(150,150),color_mode='rgb')
img_arr = image.img_to_array(img)
test_img = np.expand_dims(img_arr, axis=0)
y_prob = model.predict(test_img)
classname = y_prob.argmax(axis=-1)
print("Class: ",classname)
return classname
I have a feeling that the error is somewhere in the last 5-6 lines of the evaluate function, where I am loading the image. The problem is that whenever I run the evaluate function for any image, my output is [0]. Even though the training went well, as seen in the image below.
enter image description here
Am I making some silly mistake somewhere?
since you have a single neuron as the top layer, when you do predictions you will get a single prediction. Since you have a single prediction using argmax will always return 0. What you need to do is to set a threshold value for the prediction for example
if yprob>=.5:
klass=1
else:
klass=0
Also as pointed out by Dr. Snoopy you should rescale your image by 1/255.
I come to you because I have a visualisation problem in my Jupyter-lab.
When I run a neural network in a cell of the jupyter-lab the progress bar works but, the text print every iteration. The cell :
seed = 1337
np.random.seed(seed)
loss_ = "categorical_crossentropy"
optimizer_ = "rmsprop"
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation=activation_1, padding="same")(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation=activation_1, padding="same")(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation=activation_1, padding="same")(x)
x = MaxPooling1D(3)(x) # global max pooling
x = Flatten()(x)
x = Dense(128, activation=activation_1)(x)
preds = Dense(len(set(df.y)), activation=activation_2)(x)
model = Model(sequence_input, preds)
model.compile(loss=loss_,
optimizer=optimizer_,
metrics=[metrics.categorical_accuracy])
# happy learning!
model.fit(x_train, y_train, validation_data=(x_val, y_val),
epochs=25, batch_size=128, callbacks=[early_stopping_monitor, checkpoint])
The result that I obtain is like this :
I have updated the different packages to have the latest version in my anaconda environment.
How can I fix the visualisation of the progress bar ?
Best,
Chris
I'm Using tensorflow and keras to predict handwrting digits. For training I'm using nmist dataset.
the accuracy is about 98.8% after training. but sometimes in test its confuse between 4 and 9 , 7 and 3, i'm alerady optimize the image input with opencv, like remove noise, rescale, threshold etc.
What should i do next to improved this prdiction accuracy?
My plan is add more sample, and resize the sample image from 28x28 to 56x56.
Will this affect accuracy?
This my model for training:
epoc=15, batch size=64
def build_model():
model = Sequential()
# add Convolutional layers
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
# Densely connected layers
model.add(Dense(128, activation='relu'))
# output layer
model.add(Dense(10, activation='softmax'))
# compile with adam optimizer & categorical_crossentropy loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return model
You can try to add regularization:
def conv2d_bn(x,
units,
kernel_size=(3, 3),
activation='relu',
dropout=.5):
y = Dropout(x)
y = Conv2D(units, kernel_size=kernel_size, use_bias=False)(y)
y = BatchNormalization(y)
y = Activation(activation)(y)
return y
def build_model(..., dropout=.5):
x = Input(shape=[...])
y = conv2d_bn(x, 32)
y = MaxPooling2D(y)
...
y = Dropout(dropout)(y)
y = Dense(10, activation='softmax')
model = Model(x, y)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
You can tweak the class weights to force the model to pay more attention to classes 3, 4, 7 and 9 during training:
model.fit(..., class_weights={0: 1, 1: 1, 2:1, 3:2, 4:2, 5:1, 6:1, 7:2, 8:1, 9:2})
If you have some time to burn, you can also try to grid or random-search the models hyperparameters. Something in the lines:
def build(conv_layers, dense_layers, dense_units, activation, dropout):
y = x = Input(shape=[...])
kernels = 32
kernel_size = (2, 2)
for i in range(conv_layers):
y = conv2d_bn(y, kernel_size, kernels, activation, dropout)
if i % 2 == 0: # or 3 or 4.
y = MaxPooling2D(y)
kernels *= 2
kernel_size = tuple(k+1 for k in kernel_size)
y = GlobalAveragePooling2D()(y)
for i in range(dense_layers):
y = Dropout(dropout)(y)
y = Dense(dense_units)(y)
y = Dense(10, activation='softmax')(y)
model = KerasClassifier(build_model,
epochs=epochs,
validation_split=validation_split,
verbose=0,
...)
params = dict(conv_layers=[2, 3, 4],
dense_layers=[0, 1],
activation=['relu', 'selu'],
dropout=[.2, .3, .5],
callbacks=[callbacks.EarlyStopping(patience=10,
restore_best_weights=True)])
grid = GridSearchCV(model, params,
scoring='balanced_accuracy_score',
verbose=2,
n_jobs=1)
Now, combining hyperparams searching with the NumpyArrayIterator is a little tricky, because the latter assumes we have all training samples (and targets) at hand before the training steps. It's still doable, though:
g = ImageDataGenerator(...)
cv = StratifiedKFold(n_splits=3)
results = dict(params=[], valid_score=[])
for params in ParameterGrid(params):
fold_scores = []
for t, v in cv.split(train_data, train_labels):
train = g.flow(train_data[t], train_labels[t], subset='training')
nn_valid = g.flow(train_data[t], train_labels[t], subset='validation')
fold_valid = g.flow(train_data[v], train_labels[v])
nn = build_model(**params)
nn.fit_generator(train, validation_data=nn_valid, ...)
probabilities = nn.predict_generator(fold_valid, steps=...)
p = np.argmax(probabilities, axis=1)
fold_scores += [metrics.accuracy_score(valid.classes_, p)]
results['params'] += [params]
results['valid_score'] += [fold_scores]
best_ix = np.argmax(np.mean(results['valid_score'], axis=1))
best_params = results['params'][best_ix]
nn = build_model(**best_params)
nn.fit_generator(...)
When I run this code, I get the following error and don`t know why?
AssertionError Traceback (most recent call last)
I try again and again, here is my code:
def ConvNet(embeddings, max_sequence_length, num_words, embedding_dim, labels_index, trainable=False, extra_conv=True):
embedding_layer = Embedding(num_words,
embedding_dim,
weights=[embeddings],
input_length=max_sequence_length,
trainable=trainable)
sequence_input = Input(shape=(max_sequence_length,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
# Yoon Kim model (https://arxiv.org/abs/1408.5882)
convs = []
filter_sizes = [3,4,5]
for filter_size in filter_sizes:
l_conv = Conv1D(filters=128, kernel_size=filter_size, activation='relu')(embedded_sequences)
l_pool = MaxPooling1D(pool_size=3)(l_conv)
convs.append(l_pool)
l_merge = Concatenate(axis=1)(convs)
# add a 1D convnet with global maxpooling, instead of Yoon Kim model
conv = Conv1D(filters=128, kernel_size=3, activation='relu')(embedded_sequences)
pool = MaxPooling1D(pool_size=3)(conv)
x = (l_merge)
if extra_conv==True:
x = Dropout(0.5)(l_merge)
else:
# Original Yoon Kim model
x = Dropout(0.5)(pool)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
# Finally, we feed the output into a Sigmoid layer.
# The reason why sigmoid is used is because we are trying to achieve a binary classification(1,0)
# for each of the 6 labels, and the sigmoid function will squash the output between the bounds of 0 and 1.
preds = Dense(labels_index, activation='sigmoid')(x)
model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
model.summary()
return model
model = ConvNet(train_embedding_weights, MAX_SEQUENCE_LENGTH, len(train_word_index)+1, EMBEDDING_DIM,
len(list(label_names)), False)
And I get AssertionError.
I use keras with tf as the backend.
The goal of the simulation is attempting to use geo-spatial time series dataset to build a classifier. The target Y is labeled on -1, 0, 1 and 2, where -1 indicates the measured data at that grid point, 0 is meaning the data at good quality, 1 is middle quality and 2 is the worst.
Right now, i have two inputs. I have some atmospheric surface variables, such as wind, wind speed, and rain as one input. And , oceanic surface variables, such as sea surface temperature, and sea surface salinity as second input. The dimensions of all the datasets should be in, for example, (n_samples, n_timesteps, n_variables, n_xpoints: longitude, n_ypoints: latitude). The target dataset is in 3D dimensions like this: (n_samples, n_xpoints: longitude, n_ypoints: latitude).
In addition, all of the input variables are normalized by their value range. For example, the sea surface current velocity is normalized in the rage of (-1,1) from (-2, 2) [m/s], and the surface wind speed is normalized in the rage of (-1,1) from (-20,20) [m/s].
The model configuration is designed as described below.
def cnn():
model = Sequential()
model.add( Conv2D(64, (3,3), activation='relu',
data_format='channels_first', kernel_initializer='he_normal',
name='conv1') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( BatchNormalization() )
model.add( Conv2D(32, (3,3), activation='relu',
kernel_initializer='he_normal', data_format='channels_first',
name='conv2') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Dropout(0.2) )
model.add( BatchNormalization() )
model.add( Activation('relu') )
model.add( MaxPooling2D(pool_size=(2, 2), strides = (2,2)))
model.add( Flatten() )
model.add( Dense(128,activation='relu') )
return model
def cnn2lstm(Input_shape, premo, name):
branch_in = Input(shape=Input_shape, dtype='float32')
model = TimeDistributed(premo)(branch_in)
model = LSTM(256, return_sequences=True, name=name+'_lstm1')(model)
model = TimeDistributed(Dense(4096, activation='relu'))(model)
model = TimeDistributed(Dropout(0.3))(model)
model = LSTM(256, return_sequences = True, name=name+'_lstm2')(model)
model = Dense(101, activation='sigmoid')(model)
model = Dropout(0.3)(model)
return branch_in, model
atm_in, atm = cnn2lstm(Train_atm.shape[1:], cnn(),'atm')
ocn_in, ocn = cnn2lstm(Train_ocn.shape[1:], cnn(),'ocn')
#--- two inputs into one output
x = keras.layers.concatenate([atm,ocn],axis=1)
x = LSTM(150,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(200,return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(500)(x)
x = Dense(1001,activation='relu')(x)
x = Dense(2001,activation='relu')(x)
x = Dense(2501,activation='tanh')(x)
x = Dense(2701,activation='relu')(x)
x = Dense(3355,activation='softmax')(x)
x = Reshape((61,55),input_shape=(3355,))(x)
model2 = Model(inputs=[atm_in, ocn_in, bio_in], outputs=x)
plot_model(model2, show_shapes = True, to_file='model_way4_2.png')
model2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
filepath='ways4_model02_best.hdf5'
checkpoint = ModelCheckpoint(filepath,monitor='val_acc', verbose=1,save_best_only=True,mode='max')
callbacks_list = [checkpoint]
hist = model2.fit([Train_atm, Train_ocn, Train_bio], Train_Y,
epochs=150, batch_size=3, validation_split=0.1,
shuffle=True, callbacks=callbacks_list, verbose=0)
scores = model2.evaluate([Train_atm, Train_ocn, Train_bio], Train_Y)
print("MODEL 2 %s: %.2f%%" % (model2.metrics_names[1], scores[1]*100))
The evaluation scores here is mostly like 83% or higher. But the value of the output from model2.predict doesn't give me the valid range like my target dataset. In contrary, the model output give me the value from 0 to 1 (0,1) with a similar pattern as the target dataset shows.
Could anyone tell any big issue I have in my DL algorithm?