How To Visualize A Trained Model With Bounding Boxes For Object Detection - tensorflow

I am trying to plot flower images with both the label and prediction that have a bounding box for each. I am using some lower layers of a pre-trained Xception model.
I have set the output layers to be 4 as there will be four coordinates for the bounding box:
loc_output = keras.layers.Dense(4)(avg)
For simplicity, I just set the four coordinates for the label as random numbers using tf.random.uniform.
How do I write a function using matplotlib that generates something like this:
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
dataset, info = tfds.load("tf_flowers", as_supervised=True, with_info=True)
test_set_raw, valid_set_raw, train_set_raw = tfds.load(
"tf_flowers",
split=["train[:10%]", "train[10%:25%]", "train[25%:]"],
as_supervised=True)
class_names = info.features["label"].names
n_classes = info.features["label"].num_classes
## Shuffle & Preprocess
def preprocess(image, label):
resized_image = tf.image.resize(image, [224, 224])
final_image = keras.applications.xception.preprocess_input(resized_image)
return final_image, label
batch_size = 32
train_set = train_set_raw.shuffle(1000).repeat()
train_set = train_set.map(preprocess).batch(batch_size).prefetch(1)
valid_set = valid_set_raw.map(preprocess).batch(batch_size).prefetch(1)
test_set = test_set_raw.map(preprocess).batch(batch_size).prefetch(1)
base_model = keras.applications.xception.Xception(weights="imagenet",
include_top=False) # Reuse lower layers of pretrained Xception model
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
class_output = keras.layers.Dense(n_classes, activation="softmax")(avg)
loc_output = keras.layers.Dense(4)(avg) # 4 coordinates for our bounding box
model = keras.models.Model(inputs=base_model.input, outputs=[class_output, loc_output])
# for layer in base_model.layers:
# layer.trainable = False
optimizer = keras.optimizers.SGD(lr=0.2, momentum=0.9, decay=0.01)
model.compile(loss=["sparse_categorical_crossentropy", "mse"],
loss_weights=[0.8, 0.2],
optimizer=optimizer, metrics=["accuracy"])
def add_random_bounding_boxes(images, labels):
fake_bboxes = tf.random.uniform([tf.shape(images)[0], 4])
return images, (labels, fake_bboxes)
fake_train_set = train_set.take(5).repeat(2).map(add_random_bounding_boxes)
model.fit(fake_train_set, steps_per_epoch=5, epochs=2)

Here is one way to achieve what you want. However, note that the dummy bounding box using tf.random.uniform makes less sense, by default the minval=0, maxval=1, so your dummy coordinates will give value within this range which is not appropriate for the bounding box and that's why in the following demonstration we will rescaling the coordinates with a scaler value (let's say with 150), and hopefully, you get the point.
After training, preparing the test set for inference.
import numpy as np
import matplotlib.pyplot as plt
print(class_names)
test_set = test_set_raw.map(preprocess).batch(1).prefetch(1)
test_set = test_set.map(add_random_bounding_boxes)
['dandelion', 'daisy', 'tulips', 'sunflowers', 'roses']
Display functionalities using matplotlib.
for i, (X,y) in enumerate(test_set.take(1)):
# true labels
true_label = y[0].numpy()
true_bboxs = y[1].numpy()
# model predicts
pred_label, pred_boxes = model.predict(X)
pred_label = np.argmax(pred_label, axis=-1)
# rescaling
dummy_true_boxes = (true_bboxs*150).astype(np.int32).clip(min=0, max=224)
dummy_predict_boxes = (pred_boxes*150).astype(np.int32).clip(min=0, max=224)
# Info printing
print('GT bbox scores: ', true_bboxs)
print('PRED bbox scores: ', pred_boxes)
print('After Rescaling and Clipped True BBOX: ', dummy_true_boxes)
print('After Rescaling and Clipped Pred BBOX: ', dummy_predict_boxes)
print('True label : {}, Predicted label {}'.format(class_names[int(true_label)],
class_names[int(pred_label)]))
plt.figure(figsize=(10, 10))
plt.axis("off")
plt.imshow(X[0])
ax = plt.gca()
for tbox, tcls, pbox, pcls in zip(dummy_true_boxes, true_label, dummy_predict_boxes, pred_label):
# gt and pred labels
ttext = "GT: {}".format(class_names[tcls])
ptext = "Pred: {}".format(class_names[pcls])
# gt and pred co-ordinates
tx1, ty1, x2, y2 = tbox # xmin, ymin, xmax, ymax
tw, th = x2 - tx1, y2 - ty1 # width (w) = xmax - xmin ; height (h) = ymax - ymin
px1, py1, x2, y2 = pbox # xmin, ymin, xmax, ymax
pw, ph = x2 - px1, y2 - py1 # width (w) = xmax - xmin ; height (h) = ymax - ymin
patch = plt.Rectangle(
[tx1, ty1], tw, th, fill=False, edgecolor=[0, 1, 0], linewidth=1
)
ax.add_patch(patch)
ax.text(
tx1,
ty1,
ttext,
bbox={"facecolor": [1, 1, 1], "alpha": 0.5},
clip_box=ax.clipbox,
clip_on=True,
)
patch = plt.Rectangle(
[px1, py1], pw, ph, fill=False, edgecolor=[1, 1, 1], linewidth=1
)
ax.add_patch(patch)
ax.text(
px1,
py1,
ptext,
bbox={"facecolor": [1, 1, 1], "alpha": 0.5},
clip_box=ax.clipbox,
clip_on=True,
)
plt.show()
GT bbox scores: [[0.75246954 0.36959255 0.18266702 0.7125735 ]]
PRED bbox scores: [[1.1755341 0.98745024 0.90438926 1.285707 ]]
After Rescaling and Clipped True BBOX: [[112 55 27 106]]
After Rescaling and Clipped Pred BBOX: [[176 148 135 192]]
True label : tulips, Predicted label sunflowers

Related

Tensorflow Custom Dataset - Add metadata as additional input to an image input processed by a CNN

I've got a working CNN model that classifies images from a custom dataset that is loaded with a csv file. The dataset is split up into training, validation and test dataset after being shuffled. Now I want to expand the image input by four extra input classes containing info / metadata about the images.
I've already learnt that I should split up my cnn model into two branches, one for the images and one for the extra input. My question is, how must I modify my data input so that the model can correctly process both images and additional input?
I'm very new to creating neural networks in tensorflow. My entire code is basically from this website. However, none of the topics could solve the problem for my code.
This is my code: (additional metadata are called usages, completions, heights, constructions)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io
# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values
# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)
img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE
# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
image = tf.io.read_file(image_file)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, (img_width, img_height))
return tf.cast(image, tf.float32) / 255.0, label, \
tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
tf.cast(hei, tf.float32), tf.cast(con, tf.float32)
# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
if tf.random.uniform((), minval=0, maxval=1) < 0.1:
image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
image = tf.image.random_brightness(image, max_delta=0.25)
image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
image = tf.image.random_flip_left_right(image)
return image, label, usg, com, hei, con
# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)
ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)
ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)
## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##
# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,)) # how is this shape supposed to be?
x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)
out2 = layers.Dense(128, activation='relu')(input_dat)
merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)
history = History()
no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss', # stop training when overfitting occurs
min_delta=0.015, patience=1,
verbose=2, mode='auto')
# TRAINING STEP
model.compile(
optimizer=keras.optimizers.Adam(3e-5),
loss=[keras.losses.SparseCategoricalCrossentropy()],
metrics=["accuracy"])
model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
verbose=1, validation_data=ds_val)
So far I've only added the extra inputs to the dataset tensor and changed the model structure. How exactly do I split my dataset into input_img and input_dat so that each model branch will receive their proper input?
Also I have a custom test step in order to plot a confusion matrix. How is this supposed to be modified? Here the working code, for just the image input:
y_true = []
y_pred = []
for x, y in ds_test:
y_true.append(y)
predicts = model.predict(x) # compute model predictions for test step
y_pred.append(np.argmax(predicts, axis=-1))
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)
cm = confusion_matrix(true, pred) # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm)) # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5) # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))
Any help is greatly appreciated!!

Plot Confusion Matrix from Roberta Model

I wrote the text classification code with two classes using the Roberta model and now I want to draw the confusion matrix.
How to go about plotting the confusion matrix based of a Roberta model?
RobertaTokenizer = RobertaTokenizer.from_pretrained('roberta-base',do_lower_case=False)
roberta_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base',num_labels=2)
input_ids=[]
attention_masks=[]
for sent in sentences:
bert_inp=RobertaTokenizer.encode_plus(sent,add_special_tokens = True,max_length =128,pad_to_max_length = True,return_attention_mask = True)
input_ids.append(bert_inp['input_ids'])
attention_masks.append(bert_inp['attention_mask'])
input_ids=np.asarray(input_ids)
attention_masks=np.array(attention_masks)
labels=np.array(labels)
#split
train_inp,val_inp,train_label,val_label,train_mask,val_mask=train_test_split(input_ids,labels,attention_masks,test_size=0.5)
print('Train inp shape {} Val input shape {}\nTrain label shape {} Val label shape {}\nTrain attention mask shape {} Val attention mask shape {}'.format(train_inp.shape,val_inp.shape,train_label.shape,val_label.shape,train_mask.shape,val_mask.shape))
#
log_dir='tensorboard_data/tb_roberta'
model_save_path='/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py'
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,save_weights_only=True,monitor='val_loss',mode='min',save_best_only=True),keras.callbacks.TensorBoard(log_dir=log_dir)]
print('\nBert Model',roberta_model.summary())
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5,epsilon=1e-08)
roberta_model.compile(loss=loss,optimizer=optimizer,metrics=[metric])
history=roberta_model.fit([train_inp,train_mask],train_label,batch_size=16,epochs=2,validation_data=([val_inp,val_mask],val_label),callbacks=callbacks)
trained_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base',num_labels=2)
trained_model.compile(loss=loss,optimizer=optimizer, metrics=[metric])
trained_model.load_weights(model_save_path)
preds = trained_model.predict([val_inp,val_mask],batch_size=16)
pred_labels = np.argmax(preds.logits, axis=1)
conf_matrix = confusion_matrix(labels2,pred_labels)
print('conf_matrix ',conf_matrix)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
for j in range(conf_matrix.shape[1]):
ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix(without preprocessing)', fontsize=18)
plt.show()

In Pytorch, how to test simple image with my loaded model?

I made a alphabet classification CNN model using Pytorch, and then use that model to test it with a single image that I've never seen before. I extracted a bounding box in my handwriting image with opencv, but I don't know how to apply it to the model.
bounded my_image
this is custom dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
"""
Args:
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
"""
self.data = pd.read_csv(csv_path)
self.labels = np.asarray(self.data.iloc[:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(self.data.index)
transformations = transforms.Compose([
transforms.ToTensor()
])
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
this is my model
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(28),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(56),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
model.train()
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
optimizer.zero_grad()
# backpropagate to compute gradients
loss.backward()
# update model weights
optimizer.step()
# update progress bar
batch_loss_list.append(loss.item())
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
model.eval()
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
train()
test()
print("-----------------------------------------------------------------------------")
this is my image to bound
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
rects.append((x,y,w,h))
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_result.append(
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
plt.figure(figsize=(12,8))
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
plt.tight_layout()
plt.show()
You have already written the function test to test your net. The only thing you should do — create batch with one image with same preprocessing as images in your dataset.
def test_one_image(I, model):
'''
I - 28x28 uint8 numpy array
'''
# test phase
model.eval()
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch = batch.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output

How to feed my network with the correct array size in tensorflow

I have the following code and I am trying to train the network that I built with Belgian traffic signs , here is the code below :
import tensorflow as tf
import os
import skimage.io
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
config=tf.ConfigProto(log_device_placement=True)
#config_soft = tf.ConfigProto(allow_soft_placement =True)
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".ppm")]
for f in file_names:
images.append(skimage.io.imread(f))
labels.append(int(d))
return images, labels
Root_Path = "/home/raed/Dropbox/Thesis/Codes/Tensorflow"
training_Directory = os.path.join(Root_Path,"Training")
testing_Directory = os.path.join(Root_Path,"Testing")
images, labels = load_data(training_Directory)
# Convert lists to array in order to retrieve to facilitate information retrieval
images_array = np.asarray(images)
labels_array = np.asanyarray(labels)
#print some information about the datasets
print(images_array.ndim)
print(images_array.size)
print(labels_array.ndim)
print(labels_array.nbytes)
print(len(labels_array))
# plotting the distribution of different signs
sns.set(palette="deep")
plt.hist(labels,62)
# Selecting couple of images based on their indices
traffic_signs = [300,2250,3650,4000]
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.show()
# Fill out the subplots with the random images and add shape, min and max values
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images_array[traffic_signs[i]].shape,
images_array[traffic_signs[i]].max(),
images_array[traffic_signs[i]].min()))
# Get unique labels
unique_labels = set(labels_array)
# initialize the figure
plt.figure(figsize=(15,15))
i=1
for label in unique_labels:
image = images_array[labels.index(label)]
plt.subplot(8,8,i)
plt.axis('off')
plt.title('label:{0} ({1})'.format(label, labels.count(label)))
i=i+1
plt.imshow(image)
plt.show()
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images28_array[i].shape,
images28_array[i].max(),
images28_array[i].min()))
#convert to grayscale
gray_images = skimage.color.rgb2gray(images28_array)
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(gray_images[traffic_signs[i]], cmap="gray")
plt.subplots_adjust(wspace=0.5)
# Show the plot
plt.show()
# prepare placeholders
x = tf.placeholder(dtype=tf.float32, shape =[None, 28,28])
y = tf.placeholder(dtype= tf.int32, shape=[None])
#Flatten the input data
images_flat = tf.layers.flatten(x)
#Fully connected layer , Multi-layer Perceptron (MLP)
logits = tf.contrib.layers.fully_connected(images_flat,62, tf.nn.relu)
#Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
#define an optimizer (Stochastic Gradient Descent )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
#convert logits to label indices
correct_prediction = tf.arg_max(logits,1)
#define an accuracy metric
accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#########################################
print('######### Main Program #########')
#########################################
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss)
print("Optimizer:",optimizer)
print("predicted_labels: ", correct_prediction)
tf.set_random_seed(1235)
#images28 = np.asanyarray(images28).reshape(-1, 28, 28,1)
#with tf.Session() as training_session:
# training_session.run(tf.global_variables_initializer())
# for i in range(201):
# print('Epoch', i)
# _,accuracy_value = training_session([optimizer, accuracy],feed_dict={x:images28, y:labels})
# if i%10 ==0:
# print("Loss", loss)
# print('Epochs Done!!')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(201):
_, loss_value = sess.run([optimizer, loss], feed_dict={x: gray_images, y: labels})
if i % 10 == 0:
print("Loss: ", loss)
I also did a series of transformation before feeding the netwok as follows :
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
But on execution I am getting the following error:
ValueError: Cannot feed value of shape (4575, 28, 28, 3) for Tensor 'Placeholder_189:0', which has shape '(?, 28, 28)'
Could you please help me , where am I doing wrong in training this network, please refer to the following link for more information:
https://www.datacamp.com/community/tutorials/tensorflow-tutorial

Why does my squared loss becomes negative in TensorFlow?

I meet a really strange problem that my squared loss becomes negative. Here's my code.
#!/usr/bin/python
# -*- coding:utf8 -*-
from __future__ import print_function
from models.vgg16 import VGG16_fixed
from keras.backend.tensorflow_backend import set_session
from scipy.misc import imsave
from models.generative_model_v2 import gen_model_v2
from scripts.image_process import *
from scripts.utils_func import *
from tensorflow.python import debug as tf_debug
import tensorflow as tf
import os
import time
# configure gpu usage
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config)) # pass gpu setting to Keras
# set learning phase, or batch norm won't work
K.set_learning_phase(1)
# dataset setting
width, height = 256, 256
coco_img_path = '../../dataset/coco/images/train2014/'
sl_img_path = './images/style/'
# a trade-off coefficient between content loss and style loss, which is multiplied with style loss
alpha = 1
# create placeholders for input images
if K.image_data_format() == 'channels_last':
content_img_shape = [width, height, 3]
style_img_shape = [width, height, 3]
else:
content_img_shape = [3, width, height]
style_img_shape = [3, width, height]
with tf.name_scope('input'):
content_img = tf.placeholder(dtype='float32',
shape=(None, content_img_shape[0], content_img_shape[1], content_img_shape[2]),
name='content_img')
style_img = tf.placeholder(dtype='float32',
shape=(None, style_img_shape[0], style_img_shape[1], style_img_shape[2]),
name='style_img')
# load model
main_model, outputs = gen_model_v2(input_content_tensor=content_img, input_style_tensor=style_img)
concact_input = K.concatenate([content_img,
outputs,
style_img], axis=0)
vgg16_model = VGG16_fixed(input_tensor=concact_input,
weights='imagenet', include_top=False)
# get the symbolic outputs of each "key" layer (we gave them unique names).
vgg16_outputs_dict = dict([(layer.name, layer.output) for layer in vgg16_model.layers])
# get relevant layers
content_feature_layers = 'block3_conv3'
style_feature_layers = ['block1_conv2', 'block2_conv2',
'block3_conv3', 'block4_conv3']
# content loss
ct_loss = K.variable(0.)
layer_features = vgg16_outputs_dict[content_feature_layers]
content_img_features = layer_features[0, :, :, :]
outputs_img_features = layer_features[1, :, :, :]
ct_loss += content_loss(content_img_features, outputs_img_features)
# style loss
sl_loss_temp = K.variable(0.)
for layer_name in style_feature_layers:
layer_features = vgg16_outputs_dict[layer_name]
outputs_img_features = layer_features[1, :, :, :]
style_img_features = layer_features[2, :, :, :]
sl = style_loss(style_img_features, outputs_img_features)
sl_loss_temp += (alpha / len(style_feature_layers)) * sl
sl_loss = sl_loss_temp
# combine loss
loss = ct_loss + sl_loss
# write in summary
tf.summary.scalar('content_loss', ct_loss)
tf.summary.scalar("style_loss", sl_loss)
tf.summary.scalar("loss", loss)
# optimization
train_op = tf.train.AdamOptimizer(learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08).minimize(loss)
with tf.Session(config=config) as sess:
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./logs/gen_model_v2',
sess.graph)
# initialize all variables
tf.global_variables_initializer().run()
# get training image
ct_img_name = [x for x in os.listdir(coco_img_path) if x.endswith(".jpg")]
ct_img_num = len(ct_img_name)
print("content image number: ", ct_img_num)
sl_img_name = [x for x in os.listdir(sl_img_path) if x.endswith(".jpg")]
sl_img_num = len(sl_img_name)
print("style image number: ", sl_img_num)
# start training
start_time = time.time()
for i in range(1):
itr = 0
for ct_name in ct_img_name:
if itr > 10: # used to train a small sample of ms coco
break
sl_name = sl_img_name[itr % sl_img_num]
_, loss_val, summary = sess.run([train_op, loss, merged],
feed_dict={content_img: preprocess_image(coco_img_path + ct_name, height, width),
style_img: preprocess_image(sl_img_path + sl_name, height, width)})
train_writer.add_summary(summary, itr * (i+1))
print('iteration', itr, 'loss =', loss_val)
itr += 1
end_time = time.time()
print('Training completed in %ds' % (end_time - start_time))
# save model
main_model.save('./models/gen_model_v2_1.h5')
# use images to test
test_ct_img_path = './images/content/train-1.jpg'
test_ct_img = preprocess_image(test_ct_img_path, height, width)
test_sl_img_path = './images/style/starry_night.jpg'
test_sl_img = preprocess_image(test_ct_img_path, height, width)
# feed test images into model
output = sess.run(outputs, feed_dict={content_img: test_ct_img, style_img: test_sl_img})
output = deprocess_image(output)
print('Output image shape:', output.shape[1:4])
imsave('./images/autoencoder/test_v2_1.png', output[0])
and my loss function is defined as below:
#!/usr/bin/python
# -*- coding:utf8 -*-
import numpy as np
from keras import backend as K
import tensorflow as tf
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(x):
assert K.ndim(x) == 3
if K.image_data_format() == 'channels_first':
features = K.batch_flatten(x)
else:
features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
gram = K.dot(features, K.transpose(features))
return gram
def style_loss(featuremap_1, featuremap_2):
assert K.ndim(featuremap_1) == 3
assert K.ndim(featuremap_2) == 3
g1 = gram_matrix(featuremap_1)
g2 = gram_matrix(featuremap_2)
channels = 3
if K.image_data_format() == 'channels_first':
size = featuremap_1.shape[1] * featuremap_1[2]
else:
size = K.shape(featuremap_1)[0] * K.shape(featuremap_1)[1]
size = K.cast(size, tf.float32)
return K.sum(K.square(g1 - g2)) / (4. * (channels ** 2) * (size ** 2))
def content_loss(base, combination):
return K.sum(K.square(combination - base))
So, you can see my loss value is squared using K.square(). How can it be a negative value?
This is the result of my code, that the loss decrease sharply, which seems impossible.
You're starting with a ct_loss as a variable. Just set it to the content loss.
ct_loss = content_loss(content_img_features, outputs_img_features)