Training Multi Class Image classification model - tensorflow

I am trying to create a model using Tensorflow and Python, I get the data from a folder on my pc
The Folder Structure
An Example from the data
Almost all data are the same size [237 items,223 items,495 items,387 items,301 items]
That's how I load my data:
lables = {'Basic T-shrit':0, 'Bikini Bottom':1, 'Cargo Pants':2, 'Jeans':3, 'Oversize T-Shirt':4}
#Data
train_datagen = ImageDataGenerator(rescale=1/256)
train_generator = train_datagen.flow_from_directory(
'Dataset', # This is the source directory for training images
target_size=(256, 256), # All images will be resized to 200 x 200
batch_size=batch_size,
# Specify the classes explicitly
classes = lables,
# Since we use categorical_crossentropy loss, we need categorical labels
class_mode='categorical')
That's a model I tried:
#Model
model = Sequential()
model.add(Conv2D(32, (3,3), 1, activation='relu', input_shape=(image_width,image_height,3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32,3,3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,3,3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,3,3, activation='relu'))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(categorys_size, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer="adam",
metrics=['acc'])
Then I start the learning process:
model.fit_generator(train_generator,
steps_per_epoch=epoch_steps,
epochs=Epoch,
validation_data=train_generator)
But it's not working good, The model sees the oversize shirt and normal shirt the same and any kind of pants as jeans
Model Train result
Then I tested this model:
model = tf.keras.models.Sequential([
keras.layers.Conv2D(32, kernel_size=(5, 5), activation=tf.keras.activations.relu, input_shape=IMAGE_SHAPE),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis = 1),
keras.layers.Dropout(0.22),
keras.layers.Conv2D(32, kernel_size=(5, 5), activation=tf.keras.activations.relu),
keras.layers.AveragePooling2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis = 1),
keras.layers.Dropout(0.25),
keras.layers.Conv2D(32, kernel_size=(4, 4), activation=tf.keras.activations.relu),
keras.layers.AveragePooling2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis = 1),
keras.layers.Dropout(0.15),
keras.layers.Conv2D(32, kernel_size=(3, 3), activation=tf.keras.activations.relu),
keras.layers.AveragePooling2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis = 1),
keras.layers.Dropout(0.15),
keras.layers.Flatten(),
keras.layers.Dense(256, activation=tf.keras.activations.relu,kernel_regularizer=keras.regularizers.l2(0.001)),
#keras.layers.Dropout(0.25),
keras.layers.Dense(64, activation=tf.keras.activations.relu,kernel_regularizer=keras.regularizers.l2(0.001)),
#keras.layers.Dropout(0.1),
keras.layers.Dense(len(lables), activation=tf.keras.activations.softmax)])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
Then I start learning:
#Start Learning
checkpoint_path="/chk/cp-{epoch:04d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
save_weights_only=True,
verbose=1,
period=10)
model.fit(train,
epochs=Epoch,callbacks = [cp_callback],
validation_data=val,verbose=1)
And that's how I load the data
data = tf.keras.utils.image_dataset_from_directory('Dataset',labels = 'inferred',image_size = (192,192))
data_iterator = data.as_numpy_iterator()
batch = data_iterator.next()
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
ax[idx].imshow(img.astype(int))
ax[idx].title.set_text(batch[1][idx])
#Scale Data
data = data.map(lambda x,y: (x/192, y))
data.as_numpy_iterator().next()
train_size = int(len(data)*.7)
val_size = int(len(data)*.2)
test_size = int(len(data)*.1)
print(train_size)
train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size+val_size).take(test_size)
What I am doing wrong? and is the data I collected good enough for what I am trying to do? Am I missing something ?
Thanks

Related

How to get the filenames of all categories (TP, TN, FP, FN) of a Confusion Matrix in Keras/TensorFlow?

I am working with image data where I am trying to find the list of the files are in TP, TN (true positives, true negatives) and so on. The purpose is to check (visually) whether the files are being identified properly by the model. currntly I am using a sequential image classification model in google colab. Following is my code.
## Model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(8, activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory('./train', target_size=(128, 128), batch_size=batch_size, color_mode = 'grayscale', class_mode='binary')
validation_generator = test_datagen.flow_from_directory('./validation', target_size=(128, 128), batch_size=batch_size, color_mode = 'grayscale', class_mode='binary')
test_generator = test_datagen.flow_from_directory('./test', target_size=(128, 128), batch_size=1,
color_mode = 'grayscale', class_mode='binary', shuffle=False)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
h = model.fit(train_generator, epochs = 50, validation_data=validation_generator)
from sklearn.metrics import confusion_matrix
y_true = test_generator.classes
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())
confusion_matrix(y_true, yy_pred)
Output of confusion Matrix-
array([[22, 10],
[9, 50]])
Where I am trying to get image file names those are in True Positives (22 images), True Negatives (50 images) and so on. I am not sure whether I can get a list directly or do I have to re-generate the predicted images!
The function below will processes the test_generator and produce a classification report and a confusion matrix as well as a list of filenams the were misclassified.
You can process the classification report to get the metrics you desire
def predictor(test_gen):
y_pred= []
error_list=[]
error_pred_list = []
y_true=test_gen.labels
classes=list(test_gen.class_indices.keys())
class_count=len(classes)
errors=0
preds=model.predict(test_gen, verbose=1)
tests=len(preds)
for i, p in enumerate(preds):
pred_index=np.argmax(p)
true_index=test_gen.labels[i] # labels are integer values
if pred_index != true_index: # a misclassification has occurred
errors=errors + 1
file=test_gen.filenames[i]
error_list.append(file)
error_class=classes[pred_index]
error_pred_list.append(error_class)
y_pred.append(pred_index)
acc=( 1-errors/tests) * 100
msg=f'there were {errors} errors in {tests} tests for an accuracy of {acc:6.2f}'
print_in_color(msg, (0,255,255), (100,100,100)) # cyan foreground
ypred=np.array(y_pred)
ytrue=np.array(y_true)
f1score=f1_score(ytrue, ypred, average='weighted')* 100
if class_count <=30:
cm = confusion_matrix(ytrue, ypred )
# plot the confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
plt.xticks(np.arange(class_count)+.5, classes, rotation=90)
plt.yticks(np.arange(class_count)+.5, classes, rotation=0)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
clr = classification_report(y_true, y_pred, target_names=classes, digits= 4) # create classification report
print("Classification Report:\n----------------------\n", clr)
return errors, tests, error_list, error_pred_list, f1score
errors, tests, error_list, error_pred_list, f1score =predictor(test_gen)

Operation type in full integer quantization method in TensorFlowLite

I want to apply Post-Training Quantization (Full integer) using TensorFlow model optimization package on a pre-trained model (LeNet5).
https://www.tensorflow.org/model_optimization/guide/quantization/post_training
model = Sequential()
model._name = 'LeNet5'
model.add(tf.keras.layers.InputLayer(input_shape=(28, 28)))
model.add(tf.keras.layers.Reshape(target_shape=(28, 28, 1)))
model.add(
Conv2D(6, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='same'))
model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'))
model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(Flatten())
model.add(Dense(120, activation='tanh'))
model.add(Dense(84, activation='tanh'))
model.add(Dense(10, activation='softmax'))
and using this code I have applied Full Integer Post-Training Quantization:
mnist_train, _ = tf.keras.datasets.mnist.load_data()
images = tf.cast(mnist_train[0], tf.float32) / 255.0
mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
def representative_data_gen():
for input_value in mnist_ds.take(100):
yield [input_value]
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
converter.representative_dataset = representative_data_gen
converter.allow_custom_ops = True
converter.target_spec.supported_types = [tf.int8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
full_integer_quantization_model = converter.convert()
open("tflite_model.tflite", "wb").write(full_integer_quantization_model)
It works fine in tense of accuracy but when I try to print the data type of each layer (operation, like conv, activation, bias), I see that some of operations are in int32 instead of int8.
I don't know why?
How does TFLite decide to do some ops in int32 and some in int8?
Is it possible to control this feature (is it an option) in TFLite and perform all operations as int8?
Have you taken a look at this https://www.tensorflow.org/lite/performance/quantization_spec
?
Bias values have 32-bits width

CNN for automated car in GTA poor performance

Recently I have been trying to build an automated car in gta v using a CNN model. I started out by collecting about 30k images from the game by driving around while capturing the scene and the key that was pressed at the current time. I also made sure to keep the dataset balanced by limiting the amount of data for each label to be equal.
An example of a random image in the dataset: IMAGE.
The labels are the basic driving inputs -LABELS.
Using this dataset on various models the accuracy never went above 50-60% on the validation test (test accuracy is even lower). Trying to fix this issue I tried cropping the images from the dataset to only include the center of the image which contains the road and drop the outlying data (scenery, buildings etc..). Also tried using RGB pictures as data instead of greyscale, also tested out collecting data from a specific location and testing it in the same place, different model architectures, different parameters and still no luck.
All the models were tested out in-game by constantly capturing the image of the road in-game and using it as an input to the model, then the output of the model would be the input for the game. All models seem to behave in the same general way which is basically outputting the same label – mostly ‘WA’, until it crashes into a wall.
I would love to get some tips on what I may be doing wrong or on what I can do to improve performance and let me know if you need any more information regarding this project to help out.
Thanks in advance.
TWO OF THE MODELS I TRIED:
model = Sequential()
model.add(Conv2D(filters=96, kernel_size=11, strides=4, activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS), padding='same'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
model.add(Conv2D(filters=256, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
model.add(Conv2D(filters=384, kernel_size=3, activation='relu', padding='same'))
model.add(Conv2D(filters=384, kernel_size=3, activation='relu', padding='same'))
model.add(Conv2D(filters=256, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
model.add(Dense(4096, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='tanh'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(7, activation='sigmoid'))
model = Sequential()
model.add(Conv2D(filters=12, kernel_size=11, activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS), padding='same'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Conv2D(filters=256, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Conv2D(filters=384, kernel_size=3, activation='relu', padding='same'))
model.add(Conv2D(filters=256, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Flatten())
model.add(Dense(7, activation='sigmoid'))
The code:
filenames = os.listdir("dataset")
labels = []
for filename in filenames:
label = filename.split('.')[1]
labels.append(label)
df = pd.DataFrame({
'filename': filenames,
'category': labels
})
model = model1()
print(model.summary())
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
train_df, validate_df = train_test_split(df, test_size=0.40, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size=32
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_dataframe(
train_df,
"dataset",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
validation_datagen = ImageDataGenerator( rescale=1./255,)
validation_generator = validation_datagen.flow_from_dataframe(
validate_df,
"dataset",
x_col='filename',
y_col='category',
target_size=IMAGE_SIZE,
class_mode='categorical',
batch_size=batch_size
)
epochs = 25
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
history = model.fit(
train_generator,
epochs=epochs,
validation_data=validation_generator,
validation_steps=total_validate // batch_size,
steps_per_epoch=total_train // batch_size, shuffle=True, callbacks=[tensorboard_callback])
model.save("model.h5")

Tensor name has no shape information ERROR when predicting with tensorflow serving

I'm using tensorflow serving to serve a savedmodel. I have two signatures: 1st outputting keras model.output and the 2nd outputting post processing of model.output. When I try a predict call of the 2nd signature on tensorflow serving it is giving me an error { "error": "Tensor name: prediction has no shape information " }
this is the code to build the savedmodel
shape1 = 92
shape2 = 92
reg=0.000001
learning_rate=0.001
sess = tf.Session()
K.set_session(sess)
K._LEARNING_PHASE = tf.constant(0)
K.set_learning_phase(0)
#preprocessing
x_input = tf.placeholder(tf.string, name='x_input', shape=[None])
reshaped = tf.reshape(x_input, shape=[])
image = tf.image.decode_jpeg(reshaped, channels=3)
image2 = tf.expand_dims(image,0)
resized = tf.image.resize_images(image2, (92,92))
meaned = tf.math.subtract(resized, tf.constant(116.0))
normalized = tf.math.divide(meaned, tf.constant(66.0))
#keras model
model = tf.keras.Sequential()
model.add(InputLayer(input_tensor=normalized))
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.1))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.2))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.3))
model.add(Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu', kernel_regularizer=l2(reg)))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer=tf.train.RMSPropOptimizer(learning_rate=learning_rate),
metrics=['accuracy'])
#post processing to output label
pred = tf.gather_nd(model.output, (0,0))
label = tf.cond(pred > 0.5, lambda: tf.constant('Dog', shape=[]), lambda: tf.constant('Cat', shape=[]))
model.load_weights(r'./checkpoints/4.ckpt')
export_path = './saved_models/1'
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
model.load_weights(r'./checkpoints/4.ckpt')
if os.path.isdir(export_path):
print('\nAlready saved a model, cleaning up\n')
print(subprocess.run(['rm', '-r', export_path]))
#first signature(this works)
x_info = tf.saved_model.utils.build_tensor_info(x_input)
y_info = tf.saved_model.utils.build_tensor_info(model.output)
sigmoid_signature = build_signature_def(inputs={"image": x_info}, outputs={"prediction":y_info}, method_name='tensorflow/serving/predict')
#2nd signature(this doesn't work)
x_info = tf.saved_model.utils.build_tensor_info(x_input)
y_info = tf.saved_model.utils.build_tensor_info(label)
label_signature = build_signature_def(inputs={"image": x_info}, outputs={"prediction":y_info}, method_name='tensorflow/serving/predict')
builder = tf.saved_model.builder.SavedModelBuilder(export_path)
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
builder.add_meta_graph_and_variables(sess=sess,
tags=["serve"],
signature_def_map={'sigmoid': sigmoid_signature, 'label': label_signature})
builder.save()
this is code to call tf serving
imgs = ['./Dog/' + img for img in imgs]
img = open('./Dog/3.jpg', 'rb').read()
img = base64.b64encode(img).decode('utf-8')
data = json.dumps(
{"signature_name": "label",
"instances": [
{'image': {'b64': img}}
]
}
)
json_response = requests.post('http://localhost:8501/v1/models/pet:predict', data=data)
print(json_response.text)
Instead of getting a response of {"predictions": "Dog"}, i am getting an error { "error": "Tensor name: prediction has no shape information " }
I managed to fix this. I used tf.reshape on what i wanted to output and passed that into the signature builder.
#post processing to output label
pred = tf.gather_nd(model.output, (0,0))
label = tf.cond(pred > 0.5, lambda: tf.constant('Dog', shape=[]), lambda: tf.constant('Cat', shape=[]))
label_reshaped = tf.reshape(label, [None])
...
#2nd signature(this doesn't work)
x_info = tf.saved_model.utils.build_tensor_info(x_input)
y_info = tf.saved_model.utils.build_tensor_info(label_reshaped)
label_signature = build_signature_def(inputs={"image": x_info}, outputs={"prediction":y_info}, method_name='tensorflow/serving/predict')
Reading the tensorflow serving documentation, you'll see that there are two ways to specify input tensors in your request, the row format (using instances like your example), and the column format (using inputs).
Since the row format requires that all inputs and outputs have the same 0th dimension, if you did not export the model with explicit output shape, you cannot use the row format.
Therefore, in your case (without having to re-export the model with explicit reshaping, like the other answer has provided), you can send this payload instead
data = json.dumps(
{
"signature_name": "label",
"inputs": {'image': {'b64': img}}
}
)
On the other hand, keep in mind that if you do want to send multiple b64 encoded images, your best bet would be to use the row format with multiple instances (such as if you want to run batch predict on multiple images).

how to convert Tensor objects into numpy array?

i have built and trained a CNN, and i want to get the wieghts of the first dense layer as numpy array . after i trained the model i loaded the model using this code
f = Path("model_structure.json")
model_structure = f.read_text()
model_wieghts = model_from_json(model_structure)
model_wieghts.load_weights("model_weights.h5")
in order to get the wieghts of the first dense layer i used :
wieghts_tf = model_wieghts.layers[9].output
wieghts_tf has this value:
<tf.Tensor 'dense_1/Relu:0' shape=(?, 496) dtype=float32>
the question is , i want to convert the type of wieghts_tf from tensor to numpy array . so i created a session and used the eval() function to do so . as shown below :
sess = tf.Session()
with sess.as_default() :
vector = wieghts_tf.eval()
but im getting this error
InvalidArgumentError: You must feed a value for placeholder tensor 'conv2d_1_input' with dtype float and shape [?,180,180,3]
how can i solve it ?
here is the code of the CNN model :
#creating nueral network
model = Sequential()
conv1_2d = model.add(Conv2D(180, (3, 3), padding='same', input_shape=(180, 180, 3), activation="relu")) #180 is the number of filters
conv2_2d = model.add(Conv2D(180, (3, 3), activation="relu"))
max_pool1 = model.add(MaxPooling2D(pool_size=(3, 3)))
drop_1 = model.add(Dropout(0.25))
conv3_2d =model.add(Conv2D(360, (3, 3), padding='same', activation="relu"))
conv4_2d =model.add(Conv2D(360, (3, 3), activation="relu"))
max_pool2 = model.add(MaxPooling2D(pool_size=(3, 3)))
drop_2 = model.add(Dropout(0.25))
flat = model.add(Flatten())
dense_1 = model.add(Dense(496, activation="relu"))
drop_3 = model.add(Dropout(0.5))
dense_2 = dense_layer = model.add(Dense(376, activation="softmax"))
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
model.fit(
train_data,
train_label,
batch_size=32,
epochs=40,
verbose = 2 ,
validation_split=0.1,
shuffle=True)
# Save neural network structure
model_structure = model.to_json()
f = Path("model_structure.json")
f.write_text(model_structure)
# Save neural network's trained weights
model.save_weights("model_weights.h5")
Found the solution:
x = np.frombuffer(layer.convolution.weights.float16Value, dtype=np.float16)