I'm trying to train a neural network made with the Keras Functional API with one of the default TFDS Datasets, but I keep getting dataset related errors.
The idea is doing a model for object detection, but for the first draft I was trying to do just plain image classification (img, label). The input would be (256x256x3) images. The input layer is as follows:
img_inputs = keras.Input(shape=[256, 256, 3], name='image')
Then I'm trying to use the voc2007 dataset as available in TFDS (a very old and light version to make it faster)
(train_ds, test_ds), ds_info = tfds.load(
'voc/2007',
split=['train', 'test'],
data_dir="/content/drive/My Drive",
with_info=True)
then preprocessing the data as follows:
def resize_and_normalize_img(example):
"""Normalizes images: `uint8` -> `float32`."""
example['image'] = tf.image.resize(example['image'], [256, 256])
example['image'] = tf.cast(example['image'], tf.float32) / 255.
return example
def reduce_for_classification(example):
for key in ['image/filename', 'labels_no_difficult', 'objects']:
example.pop(key)
return example
train_ds_class = train_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.cache()
train_ds_class = train_ds_class.shuffle(ds_info.splits['train'].num_examples)
train_ds_class = train_ds_class.batch(64)
train_ds_class = train_ds_class.prefetch(tf.data.AUTOTUNE)
test_ds_class = test_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
test_ds_class = test_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
test_ds_class = test_ds_class.batch(64)
test_ds_class = test_ds_class.cache()
test_ds_class = test_ds_class.prefetch(tf.data.AUTOTUNE)
And then fitting the model like:
epochs=8
history = model.fit(
x=train_x, y =trian_y,
validation_data=test_ds_clas,
epochs=epochs
)
And after doing this is when I get an error saying that my model expects an input of shape [None, 256, 256, 3] but it's getting an input of shape [256, 256, 3].
I think it's an issue to do with the label. Before I got problems with the extra keys from the dictionary-like format of the data you get from tfds and tried to remove everything except the label, but now I'm still getting this and don't know how to go forward. I feel like after getting the dataset prepared with tfds it should be ready to be fed to a model, and after looking through the documentation, tutorials and stack overflow I haven't found the answer, I hope someone who comes across this can help.
Update:
To give a bit more of information, this is the model I'm using:
TLDR: Image input 256x256x3, a succession of convolutions and residual blocks, and an ending with average pooling, fully connected layer, and softmax that results in a (None, 1280) tensor. Using sparse categorical cross-entropy as loss and accuracy as metric.
img_inputs = keras.Input(shape=[256, 256, 3], name='image')
# first convolution
conv_first = tf.keras.layers.Conv2D(32, kernel_size=(3, 3), padding='same', name='first_conv')
x = conv_first(img_inputs)
# Second convolution
x = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=2, padding='same', name='second_conv')(x)
# First residual block
res = tf.keras.layers.Conv2D(32, kernel_size=(1, 1), name='res_block1_conv1')(x)
res = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), padding='same', name='res_block1_conv2')(res)
x = x + res
# Convolution after First residual block
x = tf.keras.layers.Conv2D(128, kernel_size=3, strides=2, padding='same', name='first_post_res_conv')(x)
# Second residual Block
for i in range(2):
shortcut = x
res = tf.keras.layers.Conv2D(64, kernel_size=1, name=f'res_block2_conv1_loop{i}')(x)
res = tf.keras.layers.Conv2D(128, kernel_size=3, padding='same', name=f'res_block2_conv2_loop{i}')(res)
x = res + shortcut
# Convolution after Second residual block
x = tf.keras.layers.Conv2D(256, 3, strides=2, padding='same', name='second_post_res_conv')(x)
# Third residual Block
for i in range(8):
shortcut = x
res = tf.keras.layers.Conv2D(128, kernel_size=1, name=f'res_block3_conv1_loop{i}')(x)
res = tf.keras.layers.Conv2D(256, kernel_size=3, padding='same', name=f'res_block3_conv2_loop{i}')(res)
x = res + shortcut
# Convolution after Third residual block
x = tf.keras.layers.Conv2D(512, 3, strides=2, padding='same', name='third_post_res_conv')(x)
# Fourth residual Block
for i in range(8):
shortcut = x
res = tf.keras.layers.Conv2D(256, kernel_size=1, name=f'res_block4_conv1_loop{i}')(x)
res = tf.keras.layers.Conv2D(512, kernel_size=3, padding='same', name=f'res_block4_conv2_loop{i}')(res)
x = res + shortcut
# Convolution after Fourth residual block
x = tf.keras.layers.Conv2D(1024, 3, strides=2, padding='same', name='fourth_post_res_conv')(x)
# Fifth residual Block
for i in range(4):
shortcut = x
res = tf.keras.layers.Conv2D(512, kernel_size=1, name=f'res_block5_conv1_loop{i}')(x)
res = tf.keras.layers.Conv2D(1024, kernel_size=3, padding='same', name=f'res_block5_conv2_loop{i}')(res)
x = res + shortcut
# Global avg pooling
x = tf.keras.layers.GlobalAveragePooling2D(name='average_pooling')(x)
# Fully connected layer
x = tf.keras.layers.Dense(1280, name='fully_connected_layer')(x)
# Softmax
end_result = tf.keras.layers.Softmax(name='softmax')(x)
model = tf.keras.Model(inputs=img_inputs, outputs=end_result, name="darknet53")
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
After trying the solution proposed by AloneTogether I'm getting the following errors (I tried changing the axis in the tf.one_hot() function many times and same result):
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [64,1280] and labels shape [1280]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_20172]
Which seems to be related to the batching, but don't know exactly how to fix it.
The whole issue really seems related to the labels encoding, because when running that line without the tf.reduce_sum() function I get the same but with:
First element had shape [2,20] and element 1 had shape [1,20].
And if I run the same without the one-hot encoding line, I get this error:
´´´
Node: 'IteratorGetNext'
Cannot batch tensors with different shapes in component 1. First element had shape [4] and element 1 had shape [1].
[[{{node IteratorGetNext}}]] [Op:__inference_train_function_18534]
´´´
I think the problem is that each image can belong to multiple classes, so I would recommend one-hot encoding the labels. It should then work. Here is an example:
import tensorflow as tf
import tensorflow_datasets as tfds
def resize_and_normalize_img(example):
"""Normalizes images: `uint8` -> `float32`."""
example['image'] = tf.image.resize(example['image'], [256, 256])
example['image'] = tf.cast(example['image'], tf.float32) / 255.
return example['image'], example['labels']
def reduce_for_classification(example):
for key in ['image/filename', 'labels_no_difficult', 'objects']:
example.pop(key)
return example
(train_ds, test_ds), ds_info = tfds.load('voc/2007', split=['train', 'test'], with_info=True)
train_ds_class = train_ds.map(reduce_for_classification, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(resize_and_normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
train_ds_class = train_ds_class.map(lambda x, y: (x, tf.reduce_sum(tf.one_hot(y, 20, axis=-1), axis=0)))
train_ds_class = train_ds_class.cache()
train_ds_class = train_ds_class.shuffle(ds_info.splits['train'].num_examples)
train_ds_class = train_ds_class.batch(64)
train_ds_class = train_ds_class.prefetch(tf.data.AUTOTUNE)
inputs = tf.keras.layers.Input(shape=[256, 256, 3], name='image')
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(50, activation='relu')(x)
outputs = tf.keras.layers.Dense(20, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)
model.compile(loss='binary_crossentropy', optimizer='adam')
model.fit(train_ds_class, epochs=5)
Epoch 1/5
40/40 [==============================] - 16s 124ms/step - loss: 3.0883
Epoch 2/5
40/40 [==============================] - 5s 115ms/step - loss: 0.9750
Epoch 3/5
40/40 [==============================] - 5s 115ms/step - loss: 0.4578
Epoch 4/5
40/40 [==============================] - 5s 115ms/step - loss: 0.6004
Epoch 5/5
40/40 [==============================] - 5s 115ms/step - loss: 0.3534
<keras.callbacks.History at 0x7f0e59513f50>
Related
I'm trying to get the 'logits' out of my Keras CNN classifier.
I have tried the suggested method here: link.
First I created two models to check the implementation :
create_CNN_MNIST CNN classifier that returns the softmax probabilities.
create_CNN_MNIST_logits CNN with the same layers as in (1) with a little twist in the last layer - changed the activation function to linear to return logits.
Both models were fed with the same Train and Test data of MNIST. Then I applied softmax on the logits, I got a different output from the softmax CNN.
I couldn't find a problem in my code. Maybe you could help advise another method to extract 'logits' from the model?
the code:
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def create_CNN_MNIST_logits() :
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='linear'))
# compile model
opt = SGD(learning_rate=0.01, momentum=0.9)
def my_sparse_categorical_crossentropy(y_true, y_pred):
return keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=True)
model.compile(optimizer=opt, loss=my_sparse_categorical_crossentropy, metrics=['accuracy'])
return model
def create_CNN_MNIST() :
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
return model
# load data
X_train = np.load('./data/X_train.npy')
X_test = np.load('./data/X_test.npy')
y_train = np.load('./data/y_train.npy')
y_test = np.load('./data/y_test.npy')
#create models
model_softmax = create_CNN_MNIST()
model_logits = create_CNN_MNIST_logits()
pixels = 28
channels = 1
num_labels = 10
# Reshaping to format which CNN expects (batch, height, width, channels)
trainX_cnn = X_train.reshape(X_train.shape[0], pixels, pixels, channels).astype('float32')
testX_cnn = X_test.reshape(X_test.shape[0], pixels, pixels, channels).astype('float32')
# Normalize images from 0-255 to 0-1
trainX_cnn /= 255
testX_cnn /= 255
train_y_cnn = utils.to_categorical(y_train, num_labels)
test_y_cnn = utils.to_categorical(y_test, num_labels)
#train the models:
model_logits.fit(trainX_cnn, train_y_cnn, validation_split=0.2, epochs=10,
batch_size=32)
model_softmax.fit(trainX_cnn, train_y_cnn, validation_split=0.2, epochs=10,
batch_size=32)
On the evaluation stage, I'll do softmax on the logits to check if its the same as the regular model:
#predict
y_pred_softmax = model_softmax.predict(testX_cnn)
y_pred_logits = model_logits.predict(testX_cnn)
#apply softmax on the logits to get the same result of regular CNN
y_pred_logits_activated = softmax(y_pred_logits)
Now I get different values in both y_pred_logits_activated and y_pred_softmax that lead to different accuracy on the test set.
Your models are probably being trained differently, make sure to set the seed prior to both fit commands so that they're initialised the same weights and have the same train/val split. Also, is the softmax might be incorrect:
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x)
return e_x / e_x.sum(axis=1)
This is numerically equivalent to subtracting the max (https://stackoverflow.com/a/34969389/10475762), and the axis should be 1 if your matrix is of shape [batch, outputs].
Each time I run:
y_true = np.argmax(tf.concat([y for x, y in train_ds], axis=0), axis=1)
y_pred = np.argmax(model.predict(train_ds), axis=1)
confusion_matrix(y_true, y_pred)
The result each time is different to my understanding the line:
y_pred = np.argmax(model.predict(train_ds), axis=1) is different each time.
Clarification: I run cell 1 (training) once. And cell 2 (inference) few times.
Why?
THE CODE:
Cell 1 (jupyter)
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, experimental
from tensorflow.keras.layers import MaxPool2D, Flatten, Dense
from tensorflow.keras import Model
from tensorflow.keras.losses import categorical_crossentropy
from sklearn.metrics import accuracy_score
image_size = (100, 100)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
directory,
label_mode='categorical',
validation_split=0.2,
subset="training",
seed=1337,
color_mode="grayscale",
image_size=image_size,
batch_size=batch_size,
)
inputs = Input(shape =(100,100,1))
x = experimental.preprocessing.Rescaling(1./255)(inputs)
x = Conv2D (filters =4, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =4, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
x = Conv2D (filters =8, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =8, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
x = Flatten()(x)
x = Dense(units = 4, activation ='relu')(x)
x = Dense(units = 4, activation ='relu')(x)
output = Dense(units = 5, activation ='softmax')(x)
model = Model (inputs=inputs, outputs =output)
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-3),
loss=categorical_crossentropy,
metrics=["accuracy"])
model.fit(train_ds, epochs=5)
Cell 2:
print (Accuracy:)
y_pred = np.argmax(model.predict(train_ds), axis=1)
print (accuracy_score(y_true, y_pred))
y_pred = np.argmax(model.predict(train_ds), axis=1)
print (accuracy_score(y_true, y_pred))
OUTPUT:
118/118 [==============================] - 7s 57ms/step - loss: 0.1888 - accuracy: 0.9398
Accuracy:
0.593
0.586
Are you sure you do not train the model again every time you run the code? If the parameters of the model are the same the predicted result for the same input should be the same every time.
To my current understanding the reason of an above is the:
tf.keras.preprocessing.image_dataset_from_directory
While instance of it is:
type(train_ds)
tensorflow.python.data.ops.dataset_ops.BatchDataset
Reproduction:
First run:
[x for x, y in train_ds]
Output:
[<tf.Tensor: shape=(32, 100, 100, 1), dtype=float32, numpy= array([[[[157.],
[155.],
[159.],
Second run:
[x for x, y in train_ds]
Output:
[<tf.Tensor: shape=(32, 100, 100, 1), dtype=float32, numpy= array([[[[ 34.],
[ 36.],
[ 39.],
...,
The possible solution
imgs, y_true = [], []
for img, label in train_ds:
imgs.append(img)
y_true.append(label)
imgs = tf.concat(imgs, axis=0)
y_true = np.argmax(tf.concat(y_true, axis=0), axis=1)
y_pred = np.argmax(model.predict(imgs), axis=1)
print (accuracy_score(y_true, y_pred))
y_pred = np.argmax(model.predict(imgs), axis=1)
print (accuracy_score(y_true, y_pred))
OUTPUT
0.944044764
0.944044764
Is there any better solution?
UPDATE 2:
Maybe more appropriate apporach in case of validation dataset (here the train_ds is just for example is to add an argument Shuffle=False)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
directory,
label_mode='categorical',
validation_split=0.2,
subset="training",
seed=1337,
color_mode="grayscale",
image_size=image_size,
batch_size=batch_size,
Shuffle=False
)
UPDATE 3:
Here it's probably the best option in case if your test images are in a separate folder.
path = 'your path to test folder'
test_generator = ImageDataGenerator().flow_from_directory(
directory=path,
class_mode='categorical',
shuffle=False,
batch_size=32,
target_size=(512, 512)
)
test_generator.reset()
This is better than OPTION 1, since it can work on dataset, which doesn't fits into memory (RAM).
In my model, the output of the hidden layer, namely 'encoded', has two channels (eg. shape: [none, 128, 128, 2]). I hope to add SSIM between these two channels in loss function:
loss = ssim(input, output) + theta*ssim(encoded(channel1), encoded(channel2)).
How could I implement this? The following is the architecture of my model.
def structural_similarity_index(y_true, y_pred):
loss = 1 - tf.image.ssim(y_true, y_pred, max_val=1.0)
return loss
def mymodel():
input_img = Input(shape=(256, 256, 1))
# encoder
x = Conv2D(4, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
encoded = Conv2D(2, (3, 3), activation='relu', padding='same', name='encoder')(x)
# decoder
x = Conv2D(4, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer = 'adadelta', loss = structural_similarity_index)
autoencoder.summary()
return autoencoder
I tried to define a 'loss_warper' function as shown below, but it didn't work. This is how I added this loss function:
autoencoder.add_loss(loss_wrapper(encoded[:,:,:,0],encoded[:,:,:,1])(input_img, decoded))
the 'loss_warper' function:
def loss_wrapper(CH1, CH2):
def structural_similarity_index(y_true, y_pred):
regweight = 0.01
loss = 1 - tf.image.ssim(y_true, y_pred, max_val=1.0)
loss = loss + regweight*(1-tf.image.ssim(CH1, CH2, max_val=1.0))
return loss
return structural_similarity_index
The error message:
File "E:/Autoencoder.py", line 160, in trainprocess
validation_data= (x_validate, x_validate))
...
ValueError: ('Error when checking model target: expected no data, but got:', array([...]...[...]))
Does anyone know how to implement this? Any help is highly appreciated!
I've been experimenting with TensorFlow's higher level APIs recently and got some strange results: when I train a seemingly exact same model with the same hyperparameters using Keras Model API and TensorFlow Estimator API, I get different results (using Keras leads to ~4% higher accuracy).
Here's my code:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Activation, Flatten
from tensorflow.keras.initializers import VarianceScaling
from tensorflow.keras.optimizers import Adam
# Load CIFAR-10 dataset and normalize pixel values
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.int32).reshape(-1)
X_test = np.array(X_test, dtype=np.float32)
y_test = np.array(y_test, dtype=np.int32).reshape(-1)
mean = X_train.mean(axis=(0, 1, 2), keepdims=True)
std = X_train.std(axis=(0, 1, 2), keepdims=True)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes=10)
# Define forward pass for a convolutional neural network.
# This function takes a batch of images as input and returns
# unscaled class scores (aka logits) from the last layer
def conv_net(X):
initializer = VarianceScaling(scale=2.0)
X = Conv2D(filters=32, kernel_size=3, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Conv2D(filters=64, kernel_size=3, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = MaxPooling2D()(X)
X = Conv2D(filters=64, kernel_size=3, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Conv2D(filters=128, kernel_size=3, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Conv2D(filters=256, kernel_size=3, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = GlobalAveragePooling2D()(X)
X = Dense(10)(X)
return X
# For training this model I use Adam optimizer with learning_rate=1e-3
# Train the model for 10 epochs using keras.Model API
def keras_model():
inputs = Input(shape=(32,32,3))
scores = conv_net(inputs)
outputs = Activation('softmax')(scores)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=Adam(lr=3e-3),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
model1 = keras_model()
model1.fit(X_train, y_train_one_hot, batch_size=128, epochs=10)
results1 = model1.evaluate(X_test, y_test_one_hot)
print(results1)
# The above usually gives 79-82% accuracy
# Now train the same model for 10 epochs using tf.estimator.Estimator API
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'X': X_train}, y=y_train, \
batch_size=128, num_epochs=10, shuffle=True)
test_input_fn = tf.estimator.inputs.numpy_input_fn(x={'X': X_test}, y=y_test, \
batch_size=128, num_epochs=1, shuffle=False)
def tf_estimator(features, labels, mode, params):
X = features['X']
scores = conv_net(X)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions={'scores': scores})
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=labels)
metrics = {'accuracy': tf.metrics.accuracy(labels=labels, predictions=tf.argmax(scores, axis=-1))}
optimizer = tf.train.AdamOptimizer(learning_rate=params['lr'], epsilon=params['epsilon'])
step = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=tf.reduce_mean(loss), train_op=step, eval_metric_ops=metrics)
model2 = tf.estimator.Estimator(model_fn=tf_estimator, params={'lr': 3e-3, 'epsilon': tf.keras.backend.epsilon()})
model2.train(input_fn=train_input_fn)
results2 = model2.evaluate(input_fn=test_input_fn)
print(results2)
# This usually gives 75-78% accuracy
print('Keras accuracy:', results1[1])
print('Estimator accuracy:', results2['accuracy'])
I've trained both models 30 times, for 10 epochs each time: mean accuracy of the model trained with Keras is 0.8035 and mean accuracy of the model trained with Estimator is 0.7631 (standard deviations are 0.0065 and 0.0072 respectively). Accuracy is significantly higher if I use Keras. My question is why is this happenning? Am I doing something wrong or missing some important parameters? The architecture of the model is the same in both cases and I'm using the same hyperparametrers (I've even set Adam's epsilon to the same value, although it doesn't really affect overall result), but the accuracies are significantly different.
I also wrote training loop using raw TensorFlow and got the same accuracy as with Estimator API (lower than I get with Keras). It made me think that the default value of some parameter in Keras is different from TensorFlow, but they all actually seem to be the same.
I have also tried other architectures and sometimes I got smaller difference in accuracies, but I wasn't able to find any particular layer type that causes the difference. It looks like if I use more shallow network the difference often becomes smaller. Not always, however. For example, the difference in accuracies is even slightly bigger with the following model:
def simple_conv_net(X):
initializer = VarianceScaling(scale=2.0)
X = Conv2D(filters=32, kernel_size=5, strides=2, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Conv2D(filters=64, kernel_size=3, strides=1, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Conv2D(filters=64, kernel_size=3, strides=1, padding='valid', activation='relu', kernel_initializer=initializer)(X)
X = BatchNormalization()(X)
X = Flatten()(X)
X = Dense(10)(X)
return X
Again, I've trained it for 10 epochs 30 times using Adam optimizer with 3e-3 learning rate. Mean accuracy with Keras is 0.6561 and mean accuracy with Estimator is 0.6101 (standard deviations are 0.0084 and 0.0111 respectively). What can be causing such a difference?
My immediate problem is that all of the various CNN regression models I've tried always return the same (or very similar) values and I'm trying to figure out why. But I would be open to a wide range of suggestions.
My dataset looks like this:
x: 64x64 greyscale images arranged into a 64 x 64 x n ndarray
y: Values between 0 and 1, each corresponding to an image (think of this as some sort of proportion)
weather: 4 weather readings from the time each image was taken (ambient temperature, humidity, dewpoint, air pressure)
The goal is to use the images and weather data to predict y. Since I'm working with images, I thought a CNN would be appropriate (please let me know if there are other strategies here).
From what I understand, CNNs are most often used for classification tasks--it's rather unusual to use them for regression. But in theory, it shouldn't be too different--I just need to change the loss function to MSE/RMSE and the last activation function to linear (although maybe a sigmoid is more appropriate here since y is between 0 and 1).
The first hurdle I ran into was trying to figure out how to incorporate the weather data, and the natural choice was to incorporate them into the first fully connected layer. I found an example here: How to train mix of image and data in CNN using ImageAugmentation in TFlearn
The second hurdle I ran into was determining an architecture. Normally I would just pick a paper and copy its architecture, but I couldn't find anything on CNN image regression. So I tried a (fairly simple) network with 3 convolutional layers and 2 fully connected layers, then I tried VGGNet and AlexNet architectures from https://github.com/tflearn/tflearn/tree/master/examples
Now the problem I'm having is that all of the models I'm trying output the same value, namely the mean y of the training set. Looking at tensorboard, the loss function flattens out fairly quickly (after around 25 epochs). Do you know what's going on here? While I do understand the basics of what each layer is doing, I have no intuition on what makes a good architecture for a particular dataset or task.
Here is an example. I am using VGGNet from the tflearn examples page:
tf.reset_default_graph()
img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_flip_updown()
img_aug.add_random_90degrees_rotation(rotations=[0, 1, 2, 3])
convnet = input_data(shape=[None, size, size, 1],
data_augmentation=img_aug,
name='hive')
weathernet = input_data(shape=[None, 4], name='weather')
convnet = conv_2d(convnet, 64, 3, activation='relu', scope='conv1_1')
convnet = conv_2d(convnet, 64, 3, activation='relu', scope='conv1_2')
convnet = max_pool_2d(convnet, 2, strides=2, name='maxpool1')
convnet = conv_2d(convnet, 128, 3, activation='relu', scope='conv2_1')
convnet = conv_2d(convnet, 128, 3, activation='relu', scope='conv2_2')
convnet = max_pool_2d(convnet, 2, strides=2, name='maxpool2')
convnet = conv_2d(convnet, 256, 3, activation='relu', scope='conv3_1')
convnet = conv_2d(convnet, 256, 3, activation='relu', scope='conv3_2')
convnet = conv_2d(convnet, 256, 3, activation='relu', scope='conv3_3')
convnet = max_pool_2d(convnet, 2, strides=2, name='maxpool3')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv4_1')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv4_2')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv4_3')
convnet = max_pool_2d(convnet, 2, strides=2, name='maxpool4')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv5_1')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv5_2')
convnet = conv_2d(convnet, 512, 3, activation='relu', scope='conv5_3')
convnet = max_pool_2d(convnet, 2, strides=2, name='maxpool5')
convnet = fully_connected(convnet, 4096, activation='relu', scope='fc6')
convnet = merge([convnet, weathernet], 'concat')
convnet = dropout(convnet, .75, name='dropout1')
convnet = fully_connected(convnet, 4096, activation='relu', scope='fc7')
convnet = dropout(convnet, .75, name='dropout2')
convnet = fully_connected(convnet, 1, activation='sigmoid', scope='fc8')
convnet = regression(convnet,
optimizer='adam',
learning_rate=learning_rate,
loss='mean_square',
name='targets')
model = tflearn.DNN(convnet,
tensorboard_dir='log',
tensorboard_verbose=0)
model.fit({
'hive': x_train,
'weather': weather_train
},
{'targets': y_train},
n_epoch=1000,
batch_size=batch_size,
validation_set=({
'hive': x_val,
'weather': weather_val
},
{'targets': y_val}),
show_metric=False,
shuffle=True,
run_id='poop')
To get at what my objects are:
x_train is an ndarray of shape (n, 64, 64, 1)
weather_train is an ndarray of shape (n, 4)
y_train is an ndarray of shape (n, 1)
Overfitting is another concern, but given that the models perform poorly on the training set, I think I can worry about that later.
To address your concern regarding the same predicted value for all instances in your test set. You have a couple options here that don't involve changing the structure of your conv net:
You can rescale your target variable using sklearn StandardScaler() (which standardizes features by removing the mean and scaling to unit variance)
Scale pixel data; generally performance increases with scaled pixel data, as a rule of thumb always divide pixel data by 255.0 (shown at end of post)
You can play around with learning rate and the error function (the reason that the CNN is outputting the same value for all predictions is because that is what it has determined is the point of minimum error)
Next. If you are trying to perform regression ensure that your final fully connected layer uses a linear activation function instead of sigmoid. A linear activation function takes inputs to the neuron multiplied by the neuron weight and creates an output proportional to the input.
convnet = fully_connected(convnet, 1, activation='linear', scope='fc8')
Lastly. I have recently implemented ResNet50 for regression tasks in Keras. Here is the construction of that network, this version does not permit loading of pretrained weights and it must receive images of shape (224, 224, 3).
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, MaxPooling2D, DepthwiseConv2D
from keras.layers.core import Activation, Dropout, Dense
from keras.layers import Flatten, Input, Add, ZeroPadding2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.models import Model
from keras import backend
def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
"""
A residual block
:param x: input tensor
:param filters: integer, filters of the bottleneck layer
:param kernel_size: kernel size of bottleneck
:param stride: stride of first layer
:param conv_shortcut: use convolution shortcut if true, otherwise identity shortcut
:param name: string, block label
:return: Output tensor of the residual block
"""
# bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
bn_axis = -1
if conv_shortcut is True:
shortcut = Conv2D(4 * filters, 1, strides=stride, name=name+'_0_conv')(x)
shortcut = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name+'_0_bn')(shortcut)
else:
shortcut = x
x = Conv2D(filters, 1, strides=stride, name=name+'_1_conv')(x)
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name+'_1_bn')(x)
x = Activation('relu', name=name+'_1_relu')(x)
x = Conv2D(filters, kernel_size, padding='SAME', name=name+'_2_conv')(x)
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name+'_2_bn')(x)
x = Activation('relu', name=name+'_2_relu')(x)
x = Conv2D(4 * filters, 1, name=name+'_3_conv')(x)
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name+'_3_bn')(x)
x = Add(name=name+'_add')([shortcut, x])
x = Activation('relu', name=name+'_out')(x)
return x
def stack1(x, filters, blocks, stride1=2, name=None):
"""
a set of stacked residual blocks
:param x: input tensor
:param filters: int, filters fof the bottleneck layer in the block
:param blocks: int, blocks in the stacked blocks,
:param stride1: stride of the first layer in the first block
:param name: stack label
:return: output tensor for the stacked blocks
"""
x = block1(x, filters, stride=stride1, name=name+'_block1')
for i in range(2, blocks+1):
x = block1(x, filters, conv_shortcut=False, name=name+'_block'+str(i))
return x
def resnet(height, width, depth, stack_fn, use_bias=False, nodes=256):
"""
:param height: height of image, int
:param width: image width, int
:param depth: bn_axis or depth, int
:param stack_fn: function that stacks residual blocks
:param nodes: width of nodes included in top layer of CNN, int
:return: a Keras model instance
"""
input_shape = (height, width, depth)
img_input = Input(shape=input_shape)
x = ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
x = Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)
x = ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
x = MaxPooling2D(3, strides=2, name='pool1_pool')(x)
x = stack_fn(x)
# top layer
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(nodes, activation='relu')(x)
# perform regression
x = Dense(1, activation='linear')(x)
model = Model(img_input, x)
return model
def resnet50(height, width, depth, nodes):
def stack_fn(x):
x = stack1(x, 64, 3, stride1=1, name='conv2')
x = stack1(x, 128, 4, name='conv3')
x = stack1(x, 256, 6, name='conv4')
x = stack1(x, 512, 3, name='conv5')
return x
return resnet(height, width, depth, stack_fn, nodes=nodes)
Which can be implemented using some x_train, x_test, y_train, y_test data (where x_train/test is image data and y_train,y_test data are numeric values on the interval [0, 1].
scaler = MinMaxScaler()
images = load_images(df=target, path=PATH_features, resize_shape=(224, 224), quadruple=True)
images = images / 255.0 # scale pixel data to [0, 1]
images = images.astype(np.float32)
imshape = images.shape
target = target[Target]
target = quadruple_target(target, target=Target)
x_train, x_test, y_train, y_test = train_test_split(images, target, test_size=0.3, random_state=101)
y_train = scaler.fit_transform(y_train)
y_test = scaler.transform(y_test)
model = resnet50(imshape[1], imshape[2], imshape[3], nodes=256)
opt = Adam(lr=1e-5, decay=1e-5 / 200)
model.compile(loss=lossFN, optimizer=opt)
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), verbose=1, epochs=200)
pred = model.predict(x_test)