I have RNN model that have been trained on Dataset:
train = tf.data.Dataset.from_tensor_slices((data_x[:train_size],
data_y[:train_size])).batch(batch_size).repeat()
model:
model = tf.keras.Sequential()
model.add(tf.keras.layers.GRU(units=lstm_num_units,
return_sequences=True,
kernel_initializer='random_uniform',
recurrent_initializer='random_uniform',
bias_initializer='random_uniform',
batch_size=batch_size,
input_shape = [seq_len, num_features]))
model.add(tf.keras.layers.LSTM(units=lstm_num_units,
batch_size=batch_size,
return_sequences=True,
input_shape = [seq_len, num_features]))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=dence_units))
model.add(tf.keras.layers.Dropout(drop_flat))
model.add(tf.keras.layers.Dense(units=out_units))
model.add(tf.keras.layers.Softmax())
model.compile(loss="sparse_categorical_crossentropy",
optimizer=tf.train.RMSPropOptimizer(opt),
metrics=['accuracy'])
model.fit(train, epochs=EPOCHS,
steps_per_epoch=repeat_size_train,
validation_data=validate,
validation_steps=repeat_size_validate,
verbose=1,
shuffle=True)
callbacks=[tensorboard, cp_callback])
I need to do prediction on single input of seq_len, but looks like my input have to be of a batch size:
ar = np.random.randint(98, size=[batch_size, seq_len])
ar = np.reshape(ar, [batch_size, seq_len, 1])
prediction = model.m.predict(ar)
Is there a way to make it work on a single input of shape [1, seq_len, 1]?
Yes, simply rebuild the model without a batch size in the first layer.
Copy the weights of the old model.
newModel.set_weights(oldModel.get_weights())
The purpose of the batch size only exists in stateful=True models to keep consistency between batches.
Even though, there is no mathematical change due to batch size.
Related
I am training deepfake image detection using Tensorflow, but the validation accuracy is stuck at 67. I have tried to use different optimizers, but it's not decreasing and only floating around the same score.
Here is my step to creating the model.
Importing data from the image folder
Create an ImageDataGenerator object to do some augmentation.
datagen = ImageDataGenerator(
horizontal_flip=True,
validation_split=0.2,
rescale=1./255,
)
Creating the model
image dimension: 299, 299, 3
input_layer = Input(shape = (image_dimensions['height'], image_dimensions['width'], image_dimensions['channels']))
base_model = keras.applications.EfficientNetB5(
weights='imagenet',
input_shape=(image_dimensions['height'], image_dimensions['width'], image_dimensions['channels']),
include_top=False)
base_model.trainable = False
x = base_model(input_layer, training=False)
# Add pooling layer or flatten layer
y = GlobalAveragePooling2D()(x)
y = Dense(512, activation='relu')(y)
y = Dropout(0.4)(y)
y = Dense(256)(y)
# Add final dense layer
output_layer = Dense(1, activation='sigmoid')(y)
model = Model(inputs=input_layer, outputs=output_layer)
Training
efficientNet = EfficientNet(learning_rate = 0.001)
efficientNet.summary()
history = efficientNet.fit(datagen.flow(X_train, y_train, batch_size=64, subset='training'),
epochs=10,
validation_data=datagen.flow(X_train, y_train, batch_size=64, subset='validation'))
Result
Here is the result of the model training
Is there anyway I can fix this problem?
I'm trying to train a model that I loaded and freezed its layers then added 3 new layers that I want to train, in the model.fit stage I'm getting InvalidArgumentError: required broadcastable shapes [Op:Sub]
This is the code I'm using
# Load Saved Model and freeze layers
file_path = r'F:\ku.ac.ae\Intelligent Robotic Manufacturing - Documents\codes\Visuotactile sensor\contact_est\final\m3_130x173_512x16_DATASET_3'
loaded_model = tf.keras.models.load_model(file_path)
tf.keras.backend.set_epsilon(1)
model = tf.keras.models.Sequential(loaded_model.layers[:-3])
for layer in model.layers[:]:
layer.trainable = False
#print(layer, layer.trainable)
# Add Layers
model.add(tfl.Flatten())
model.add(tfl.Dense(64))
model.add(tfl.Dense(66, activation='softmax'))
for layer in model.layers[:]:
print(layer, layer.trainable)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss='mean_absolute_percentage_error',
metrics=['mean_absolute_error'],
#metrics=['accuracy'],
run_eagerly=True)
file_name = 'freezed_m3_130x173_512x32_dataset3'
and then I run this
history = model.fit(
x_train, y_train,
epochs = 512,
batch_size = 32,
validation_data = (x_valid, y_valid),
#callbacks = callbacks_list,
shuffle=True)
I'm getting the error InvalidArgumentError: required broadcastable shapes [Op:Sub]
Any idea about this ? knowing that x_train and y_train have the exact same shape of the loaded model and in fact they are the train dataset used to train the loaded model I just want to play with the last layer
Thanks
I have a simple Keras sequential model.
I have N categories and i have to predict in which category the next point will fall based on the previous one.
The weird thing is that when i remove the Softmax activation function from the output layer the performance are better (lower loss and highest sparse_categorical_accuracy).
As loss i'm using the sparse_categorical_crossentropy with logits=True.
Is there any reason for that? Should not be the opposite?
Thank you in advance for any suggestion!
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
batch_input_shape=[batch_size, None]),
tf.keras.layers.GRU(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size, activation='softmax')
])
return model
model = build_model(
vocab_size = vocab_size,
embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE)
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=['sparse_categorical_accuracy'])
EPOCHS = 5
history = model.fit(train_set, epochs=EPOCHS, validation_data=val_set,)
In a nutshell, when you are using the option from_logits = True, you are telling the loss function that your neural network output is not normalized. Since you are using softmax activation in your last layer, your outputs are indeed normalized, so you have two options:
Remove the softmax activation as you have already tried. Keep in mind that, after this, your output probabilities won't be normalized.
Use from_logits = False.
Tensorflow Version = 2.0.0
num_units = 128
inp = Input(shape = (50,36))
fw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(inp)
bw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(K.reverse(inp, axes=0))
out = layers.Concatenate(axis=-1)([fw, bw])
brnn_model = Model(inputs=inp, outputs=out)
model = tf.keras.Sequential()
model.add(layers.Masking(input_shape=(50,36)))
model.add(brnn_model)
model.add(layers.Dense(10, activation='softmax', activity_regularizer=regularizers.l2(0.001)))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=32, epochs=14, shuffle=True, validation_data=(x_test, y_test))
With above code I get the same results as with not using Bidirectional RNN and just using a LSTM layer instead. In short the above code fails to act as Bidirectional rather it is giving same result as with a unidirectional LSTM layers.
If i use the pre-built Bidirectional Wrapper I get the expected result.
model = tf.keras.Sequential()
model.add(layers.Masking(input_shape=(50,36)))
model.add(layers.Bidirectional(layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))))
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(10, activation='softmax', activity_regularizer=regularizers.l2(0.001)))model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=32, epochs=14, shuffle=True, validation_data=(x_test, y_test))
I have already checked that replacing
bw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(K.reverse(inp, axes=0))
with
bw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(inp)
has absolutely no effect on the results.
I don't understand how is this even possible.
You seem to be reversing inputs on batch dimension. And yes, that won't have the effect you desire. Instead of,
K.reverse(inp, axes=0)
Try
K.reverse(inp, axes=1)
Because the input to the LSTM is a 3D tensor where each dimension represents [batch, time, input]. So the time dimension (that is, reversing the input sequence) should be done on axis=1.
Solution:
Change axes=0 to axes=1 as pointed out by thushv89.
Concatenate reverse of bw to fw because the bw layer will contain weights for last time step at index 0. (This will correct go_backwards=True case as well
Corrected code:
fw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(inp)
bw = layers.LSTM(num_units, return_sequences=True, activity_regularizer=regularizers.l2(0.001))(K.reverse(inp, axes=1))
out = layers.Concatenate(axis=-1)([fw, K.reverse(bw, axes=1)])
I have image classification problem and i want to use Keras pretrained models for this task.
When I use such a model
model = tf.keras.Sequential([
hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4",
output_shape=[1280],
trainable=False),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(num_classes, activation='softmax')
])
model.build([None, image_size[0], image_size[1], 3])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['acc'])
I easily get ~90% accuracy and very low loss on balanced dataset. However, if use keras.application like that:
`base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(
input_shape=input_img_size,
include_top=False,
weights='imagenet'
)
base_model.trainable = False
model = tf.keras.layers.Dropout(0.5)(model)
model = tf.keras.layers.Dense(num_classes, activation='softmax')(model)
model = tf.keras.models.Model(inputs=base_model.input, outputs=model)
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['acc'])`
and use a proper tf.keras.application.mobilenet_v2.preprocess_input function in datagenerator (and leaving everything else the same) it is stuck at around 60% validation and 80% training.
what is the difference between these approaches? why one is superior to the other?
The data generator:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function = preprocessing_function,
rotation_range=10,
zoom_range=0.3,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=True,
shear_range=0.2,
)
Training:
history = model.fit_generator(
train_generator,
epochs=nb_epochs,
verbose=1,
steps_per_epoch=steps_per_epoch,
validation_data=valid_generator,
validation_steps=val_steps_per_epoch,
callbacks=[
checkpoint,
learning_rate_reduction,
csv_logger,
tensorboard_callback,
],
)
I believe you are training two different 'models'. In your TensorFlow Hub example, you used mobilenet's feature vector. Feature vector as I understand it, is not the same as a model. It is a 1-D tensor of certain length. It is probably the last layer before the output of the mobilenet model. This is different from the tf.keras example, where you are invoking the full mobilenet model.